1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/qu8-dwconv-minmax-rndnu.yaml 11 // Generator: tools/generate-dwconv-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/common.h> 17 #include <xnnpack/isa-checks.h> 18 19 #include <xnnpack/dwconv.h> 20 #include "dwconv-microkernel-tester.h" 21 22 23 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_eq_8)24 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_eq_8) { 25 TEST_REQUIRES_ARM_NEON; 26 DWConvMicrokernelTester() 27 .cr(8) 28 .kr(9) 29 .channels(8) 30 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 31 } 32 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_div_8)33 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_div_8) { 34 TEST_REQUIRES_ARM_NEON; 35 for (uint32_t channels = 16; channels < 128; channels += 24) { 36 DWConvMicrokernelTester() 37 .cr(8) 38 .kr(9) 39 .channels(channels) 40 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 41 } 42 } 43 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_div_8_with_qmin)44 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_div_8_with_qmin) { 45 TEST_REQUIRES_ARM_NEON; 46 for (uint32_t channels = 16; channels < 128; channels += 24) { 47 DWConvMicrokernelTester() 48 .cr(8) 49 .kr(9) 50 .channels(channels) 51 .qmin(128) 52 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 53 } 54 } 55 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_div_8_with_qmax)56 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_div_8_with_qmax) { 57 TEST_REQUIRES_ARM_NEON; 58 for (uint32_t channels = 16; channels < 128; channels += 24) { 59 DWConvMicrokernelTester() 60 .cr(8) 61 .kr(9) 62 .channels(channels) 63 .qmax(128) 64 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 65 } 66 } 67 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_lt_8)68 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_lt_8) { 69 TEST_REQUIRES_ARM_NEON; 70 for (uint32_t channels = 1; channels < 8; channels++) { 71 DWConvMicrokernelTester() 72 .cr(8) 73 .kr(9) 74 .channels(channels) 75 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 76 } 77 } 78 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_gt_8)79 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_gt_8) { 80 TEST_REQUIRES_ARM_NEON; 81 for (uint32_t channels = 9; channels < 16; channels++) { 82 DWConvMicrokernelTester() 83 .cr(8) 84 .kr(9) 85 .channels(channels) 86 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 87 } 88 } 89 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_gt_8_with_qmin)90 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_gt_8_with_qmin) { 91 TEST_REQUIRES_ARM_NEON; 92 for (uint32_t channels = 9; channels < 16; channels++) { 93 DWConvMicrokernelTester() 94 .cr(8) 95 .kr(9) 96 .channels(channels) 97 .qmin(128) 98 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 99 } 100 } 101 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,c_gt_8_with_qmax)102 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, c_gt_8_with_qmax) { 103 TEST_REQUIRES_ARM_NEON; 104 for (uint32_t channels = 9; channels < 16; channels++) { 105 DWConvMicrokernelTester() 106 .cr(8) 107 .kr(9) 108 .channels(channels) 109 .qmax(128) 110 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 111 } 112 } 113 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel)114 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel) { 115 TEST_REQUIRES_ARM_NEON; 116 for (size_t channels = 1; channels <= 40; channels += 7) { 117 DWConvMicrokernelTester() 118 .cr(8) 119 .kr(9) 120 .channels(channels) 121 .width(3) 122 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 123 } 124 } 125 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel_with_step)126 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel_with_step) { 127 TEST_REQUIRES_ARM_NEON; 128 for (size_t channels = 1; channels <= 40; channels += 7) { 129 for (size_t step = 2; step <= 9; step++) { 130 DWConvMicrokernelTester() 131 .cr(8) 132 .kr(9) 133 .channels(channels) 134 .width(3) 135 .step(step) 136 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 137 } 138 } 139 } 140 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel_with_output_stride)141 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel_with_output_stride) { 142 TEST_REQUIRES_ARM_NEON; 143 for (size_t channels = 1; channels <= 40; channels += 7) { 144 DWConvMicrokernelTester() 145 .cr(8) 146 .kr(9) 147 .channels(8) 148 .width(5) 149 .output_stride(43) 150 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 151 } 152 } 153 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel_with_qmin)154 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel_with_qmin) { 155 TEST_REQUIRES_ARM_NEON; 156 for (size_t channels = 1; channels <= 40; channels += 7) { 157 DWConvMicrokernelTester() 158 .cr(8) 159 .kr(9) 160 .channels(channels) 161 .width(3) 162 .qmin(128) 163 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 164 } 165 } 166 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,multipixel_with_qmax)167 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, multipixel_with_qmax) { 168 TEST_REQUIRES_ARM_NEON; 169 for (size_t channels = 1; channels <= 40; channels += 7) { 170 DWConvMicrokernelTester() 171 .cr(8) 172 .kr(9) 173 .channels(channels) 174 .width(3) 175 .qmax(128) 176 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 177 } 178 } 179 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,input_zero_point_only)180 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, input_zero_point_only) { 181 TEST_REQUIRES_ARM_NEON; 182 for (size_t channels = 1; channels <= 40; channels += 7) { 183 DWConvMicrokernelTester() 184 .cr(8) 185 .kr(9) 186 .channels(channels) 187 .width(3) 188 .input_zero_point(255) 189 .kernel_zero_point(0) 190 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 191 } 192 } 193 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,kernel_zero_point_only)194 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, kernel_zero_point_only) { 195 TEST_REQUIRES_ARM_NEON; 196 for (size_t channels = 1; channels <= 40; channels += 7) { 197 DWConvMicrokernelTester() 198 .cr(8) 199 .kr(9) 200 .channels(channels) 201 .width(3) 202 .input_zero_point(0) 203 .kernel_zero_point(255) 204 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 205 } 206 } 207 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,input_offset)208 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, input_offset) { 209 TEST_REQUIRES_ARM_NEON; 210 for (uint32_t channels = 16; channels < 128; channels += 24) { 211 DWConvMicrokernelTester() 212 .cr(8) 213 .kr(9) 214 .channels(channels) 215 .input_offset(176) 216 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 217 } 218 } 219 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8,zero)220 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8, zero) { 221 TEST_REQUIRES_ARM_NEON; 222 for (uint32_t mz = 0; mz < 9; mz++) { 223 for (uint32_t channels = 16; channels < 128; channels += 24) { 224 DWConvMicrokernelTester() 225 .cr(8) 226 .kr(9) 227 .channels(channels) 228 .input_offset(176) 229 .zero_index(mz) 230 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 231 } 232 } 233 } 234 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 235 236 237 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_eq_8)238 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_eq_8) { 239 TEST_REQUIRES_ARM_NEON; 240 DWConvMicrokernelTester() 241 .cr(8) 242 .kr(9) 243 .channels(8) 244 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 245 } 246 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8)247 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8) { 248 TEST_REQUIRES_ARM_NEON; 249 for (uint32_t channels = 16; channels < 128; channels += 24) { 250 DWConvMicrokernelTester() 251 .cr(8) 252 .kr(9) 253 .channels(channels) 254 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 255 } 256 } 257 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8_with_qmin)258 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8_with_qmin) { 259 TEST_REQUIRES_ARM_NEON; 260 for (uint32_t channels = 16; channels < 128; channels += 24) { 261 DWConvMicrokernelTester() 262 .cr(8) 263 .kr(9) 264 .channels(channels) 265 .qmin(128) 266 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 267 } 268 } 269 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8_with_qmax)270 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8_with_qmax) { 271 TEST_REQUIRES_ARM_NEON; 272 for (uint32_t channels = 16; channels < 128; channels += 24) { 273 DWConvMicrokernelTester() 274 .cr(8) 275 .kr(9) 276 .channels(channels) 277 .qmax(128) 278 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 279 } 280 } 281 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_lt_8)282 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_lt_8) { 283 TEST_REQUIRES_ARM_NEON; 284 for (uint32_t channels = 1; channels < 8; channels++) { 285 DWConvMicrokernelTester() 286 .cr(8) 287 .kr(9) 288 .channels(channels) 289 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 290 } 291 } 292 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8)293 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8) { 294 TEST_REQUIRES_ARM_NEON; 295 for (uint32_t channels = 9; channels < 16; channels++) { 296 DWConvMicrokernelTester() 297 .cr(8) 298 .kr(9) 299 .channels(channels) 300 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 301 } 302 } 303 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8_with_qmin)304 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8_with_qmin) { 305 TEST_REQUIRES_ARM_NEON; 306 for (uint32_t channels = 9; channels < 16; channels++) { 307 DWConvMicrokernelTester() 308 .cr(8) 309 .kr(9) 310 .channels(channels) 311 .qmin(128) 312 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 313 } 314 } 315 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8_with_qmax)316 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8_with_qmax) { 317 TEST_REQUIRES_ARM_NEON; 318 for (uint32_t channels = 9; channels < 16; channels++) { 319 DWConvMicrokernelTester() 320 .cr(8) 321 .kr(9) 322 .channels(channels) 323 .qmax(128) 324 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 325 } 326 } 327 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel)328 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel) { 329 TEST_REQUIRES_ARM_NEON; 330 for (size_t channels = 1; channels <= 40; channels += 7) { 331 DWConvMicrokernelTester() 332 .cr(8) 333 .kr(9) 334 .channels(channels) 335 .width(3) 336 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 337 } 338 } 339 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_step)340 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_step) { 341 TEST_REQUIRES_ARM_NEON; 342 for (size_t channels = 1; channels <= 40; channels += 7) { 343 for (size_t step = 2; step <= 9; step++) { 344 DWConvMicrokernelTester() 345 .cr(8) 346 .kr(9) 347 .channels(channels) 348 .width(3) 349 .step(step) 350 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 351 } 352 } 353 } 354 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_output_stride)355 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_output_stride) { 356 TEST_REQUIRES_ARM_NEON; 357 for (size_t channels = 1; channels <= 40; channels += 7) { 358 DWConvMicrokernelTester() 359 .cr(8) 360 .kr(9) 361 .channels(8) 362 .width(5) 363 .output_stride(43) 364 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 365 } 366 } 367 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_qmin)368 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_qmin) { 369 TEST_REQUIRES_ARM_NEON; 370 for (size_t channels = 1; channels <= 40; channels += 7) { 371 DWConvMicrokernelTester() 372 .cr(8) 373 .kr(9) 374 .channels(channels) 375 .width(3) 376 .qmin(128) 377 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 378 } 379 } 380 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_qmax)381 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_qmax) { 382 TEST_REQUIRES_ARM_NEON; 383 for (size_t channels = 1; channels <= 40; channels += 7) { 384 DWConvMicrokernelTester() 385 .cr(8) 386 .kr(9) 387 .channels(channels) 388 .width(3) 389 .qmax(128) 390 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 391 } 392 } 393 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,input_zero_point_only)394 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, input_zero_point_only) { 395 TEST_REQUIRES_ARM_NEON; 396 for (size_t channels = 1; channels <= 40; channels += 7) { 397 DWConvMicrokernelTester() 398 .cr(8) 399 .kr(9) 400 .channels(channels) 401 .width(3) 402 .input_zero_point(255) 403 .kernel_zero_point(0) 404 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 405 } 406 } 407 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,kernel_zero_point_only)408 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, kernel_zero_point_only) { 409 TEST_REQUIRES_ARM_NEON; 410 for (size_t channels = 1; channels <= 40; channels += 7) { 411 DWConvMicrokernelTester() 412 .cr(8) 413 .kr(9) 414 .channels(channels) 415 .width(3) 416 .input_zero_point(0) 417 .kernel_zero_point(255) 418 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 419 } 420 } 421 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,input_offset)422 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, input_offset) { 423 TEST_REQUIRES_ARM_NEON; 424 for (uint32_t channels = 16; channels < 128; channels += 24) { 425 DWConvMicrokernelTester() 426 .cr(8) 427 .kr(9) 428 .channels(channels) 429 .input_offset(176) 430 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 431 } 432 } 433 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,zero)434 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, zero) { 435 TEST_REQUIRES_ARM_NEON; 436 for (uint32_t mz = 0; mz < 9; mz++) { 437 for (uint32_t channels = 16; channels < 128; channels += 24) { 438 DWConvMicrokernelTester() 439 .cr(8) 440 .kr(9) 441 .channels(channels) 442 .input_offset(176) 443 .zero_index(mz) 444 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 445 } 446 } 447 } 448 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 449 450 451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_eq_8)452 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_eq_8) { 453 TEST_REQUIRES_ARM_NEON; 454 DWConvMicrokernelTester() 455 .cr(8) 456 .kr(25) 457 .channels(8) 458 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 459 } 460 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_div_8)461 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_div_8) { 462 TEST_REQUIRES_ARM_NEON; 463 for (uint32_t channels = 16; channels < 128; channels += 24) { 464 DWConvMicrokernelTester() 465 .cr(8) 466 .kr(25) 467 .channels(channels) 468 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 469 } 470 } 471 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_div_8_with_qmin)472 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_div_8_with_qmin) { 473 TEST_REQUIRES_ARM_NEON; 474 for (uint32_t channels = 16; channels < 128; channels += 24) { 475 DWConvMicrokernelTester() 476 .cr(8) 477 .kr(25) 478 .channels(channels) 479 .qmin(128) 480 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 481 } 482 } 483 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_div_8_with_qmax)484 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_div_8_with_qmax) { 485 TEST_REQUIRES_ARM_NEON; 486 for (uint32_t channels = 16; channels < 128; channels += 24) { 487 DWConvMicrokernelTester() 488 .cr(8) 489 .kr(25) 490 .channels(channels) 491 .qmax(128) 492 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 493 } 494 } 495 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_lt_8)496 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_lt_8) { 497 TEST_REQUIRES_ARM_NEON; 498 for (uint32_t channels = 1; channels < 8; channels++) { 499 DWConvMicrokernelTester() 500 .cr(8) 501 .kr(25) 502 .channels(channels) 503 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 504 } 505 } 506 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_gt_8)507 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_gt_8) { 508 TEST_REQUIRES_ARM_NEON; 509 for (uint32_t channels = 9; channels < 16; channels++) { 510 DWConvMicrokernelTester() 511 .cr(8) 512 .kr(25) 513 .channels(channels) 514 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 515 } 516 } 517 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_gt_8_with_qmin)518 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_gt_8_with_qmin) { 519 TEST_REQUIRES_ARM_NEON; 520 for (uint32_t channels = 9; channels < 16; channels++) { 521 DWConvMicrokernelTester() 522 .cr(8) 523 .kr(25) 524 .channels(channels) 525 .qmin(128) 526 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 527 } 528 } 529 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,c_gt_8_with_qmax)530 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, c_gt_8_with_qmax) { 531 TEST_REQUIRES_ARM_NEON; 532 for (uint32_t channels = 9; channels < 16; channels++) { 533 DWConvMicrokernelTester() 534 .cr(8) 535 .kr(25) 536 .channels(channels) 537 .qmax(128) 538 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 539 } 540 } 541 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel)542 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel) { 543 TEST_REQUIRES_ARM_NEON; 544 for (size_t channels = 1; channels <= 40; channels += 7) { 545 DWConvMicrokernelTester() 546 .cr(8) 547 .kr(25) 548 .channels(channels) 549 .width(3) 550 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 551 } 552 } 553 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel_with_step)554 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel_with_step) { 555 TEST_REQUIRES_ARM_NEON; 556 for (size_t channels = 1; channels <= 40; channels += 7) { 557 for (size_t step = 2; step <= 25; step++) { 558 DWConvMicrokernelTester() 559 .cr(8) 560 .kr(25) 561 .channels(channels) 562 .width(3) 563 .step(step) 564 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 565 } 566 } 567 } 568 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel_with_output_stride)569 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel_with_output_stride) { 570 TEST_REQUIRES_ARM_NEON; 571 for (size_t channels = 1; channels <= 40; channels += 7) { 572 DWConvMicrokernelTester() 573 .cr(8) 574 .kr(25) 575 .channels(8) 576 .width(5) 577 .output_stride(43) 578 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 579 } 580 } 581 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel_with_qmin)582 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel_with_qmin) { 583 TEST_REQUIRES_ARM_NEON; 584 for (size_t channels = 1; channels <= 40; channels += 7) { 585 DWConvMicrokernelTester() 586 .cr(8) 587 .kr(25) 588 .channels(channels) 589 .width(3) 590 .qmin(128) 591 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 592 } 593 } 594 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,multipixel_with_qmax)595 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, multipixel_with_qmax) { 596 TEST_REQUIRES_ARM_NEON; 597 for (size_t channels = 1; channels <= 40; channels += 7) { 598 DWConvMicrokernelTester() 599 .cr(8) 600 .kr(25) 601 .channels(channels) 602 .width(3) 603 .qmax(128) 604 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 605 } 606 } 607 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,input_zero_point_only)608 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, input_zero_point_only) { 609 TEST_REQUIRES_ARM_NEON; 610 for (size_t channels = 1; channels <= 40; channels += 7) { 611 DWConvMicrokernelTester() 612 .cr(8) 613 .kr(25) 614 .channels(channels) 615 .width(3) 616 .input_zero_point(255) 617 .kernel_zero_point(0) 618 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 619 } 620 } 621 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,kernel_zero_point_only)622 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, kernel_zero_point_only) { 623 TEST_REQUIRES_ARM_NEON; 624 for (size_t channels = 1; channels <= 40; channels += 7) { 625 DWConvMicrokernelTester() 626 .cr(8) 627 .kr(25) 628 .channels(channels) 629 .width(3) 630 .input_zero_point(0) 631 .kernel_zero_point(255) 632 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 633 } 634 } 635 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,input_offset)636 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, input_offset) { 637 TEST_REQUIRES_ARM_NEON; 638 for (uint32_t channels = 16; channels < 128; channels += 24) { 639 DWConvMicrokernelTester() 640 .cr(8) 641 .kr(25) 642 .channels(channels) 643 .input_offset(176) 644 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 645 } 646 } 647 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8,zero)648 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8, zero) { 649 TEST_REQUIRES_ARM_NEON; 650 for (uint32_t mz = 0; mz < 25; mz++) { 651 for (uint32_t channels = 16; channels < 128; channels += 24) { 652 DWConvMicrokernelTester() 653 .cr(8) 654 .kr(25) 655 .channels(channels) 656 .input_offset(176) 657 .zero_index(mz) 658 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 659 } 660 } 661 } 662 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 663 664 665 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_eq_8)666 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_eq_8) { 667 TEST_REQUIRES_ARM_NEON; 668 DWConvMicrokernelTester() 669 .cr(8) 670 .kr(25) 671 .channels(8) 672 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 673 } 674 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8)675 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8) { 676 TEST_REQUIRES_ARM_NEON; 677 for (uint32_t channels = 16; channels < 128; channels += 24) { 678 DWConvMicrokernelTester() 679 .cr(8) 680 .kr(25) 681 .channels(channels) 682 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 683 } 684 } 685 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8_with_qmin)686 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8_with_qmin) { 687 TEST_REQUIRES_ARM_NEON; 688 for (uint32_t channels = 16; channels < 128; channels += 24) { 689 DWConvMicrokernelTester() 690 .cr(8) 691 .kr(25) 692 .channels(channels) 693 .qmin(128) 694 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 695 } 696 } 697 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8_with_qmax)698 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8_with_qmax) { 699 TEST_REQUIRES_ARM_NEON; 700 for (uint32_t channels = 16; channels < 128; channels += 24) { 701 DWConvMicrokernelTester() 702 .cr(8) 703 .kr(25) 704 .channels(channels) 705 .qmax(128) 706 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 707 } 708 } 709 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_lt_8)710 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_lt_8) { 711 TEST_REQUIRES_ARM_NEON; 712 for (uint32_t channels = 1; channels < 8; channels++) { 713 DWConvMicrokernelTester() 714 .cr(8) 715 .kr(25) 716 .channels(channels) 717 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 718 } 719 } 720 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8)721 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8) { 722 TEST_REQUIRES_ARM_NEON; 723 for (uint32_t channels = 9; channels < 16; channels++) { 724 DWConvMicrokernelTester() 725 .cr(8) 726 .kr(25) 727 .channels(channels) 728 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 729 } 730 } 731 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8_with_qmin)732 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8_with_qmin) { 733 TEST_REQUIRES_ARM_NEON; 734 for (uint32_t channels = 9; channels < 16; channels++) { 735 DWConvMicrokernelTester() 736 .cr(8) 737 .kr(25) 738 .channels(channels) 739 .qmin(128) 740 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 741 } 742 } 743 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8_with_qmax)744 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8_with_qmax) { 745 TEST_REQUIRES_ARM_NEON; 746 for (uint32_t channels = 9; channels < 16; channels++) { 747 DWConvMicrokernelTester() 748 .cr(8) 749 .kr(25) 750 .channels(channels) 751 .qmax(128) 752 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 753 } 754 } 755 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel)756 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel) { 757 TEST_REQUIRES_ARM_NEON; 758 for (size_t channels = 1; channels <= 40; channels += 7) { 759 DWConvMicrokernelTester() 760 .cr(8) 761 .kr(25) 762 .channels(channels) 763 .width(3) 764 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 765 } 766 } 767 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_step)768 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_step) { 769 TEST_REQUIRES_ARM_NEON; 770 for (size_t channels = 1; channels <= 40; channels += 7) { 771 for (size_t step = 2; step <= 25; step++) { 772 DWConvMicrokernelTester() 773 .cr(8) 774 .kr(25) 775 .channels(channels) 776 .width(3) 777 .step(step) 778 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 779 } 780 } 781 } 782 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_output_stride)783 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_output_stride) { 784 TEST_REQUIRES_ARM_NEON; 785 for (size_t channels = 1; channels <= 40; channels += 7) { 786 DWConvMicrokernelTester() 787 .cr(8) 788 .kr(25) 789 .channels(8) 790 .width(5) 791 .output_stride(43) 792 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 793 } 794 } 795 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_qmin)796 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_qmin) { 797 TEST_REQUIRES_ARM_NEON; 798 for (size_t channels = 1; channels <= 40; channels += 7) { 799 DWConvMicrokernelTester() 800 .cr(8) 801 .kr(25) 802 .channels(channels) 803 .width(3) 804 .qmin(128) 805 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 806 } 807 } 808 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_qmax)809 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_qmax) { 810 TEST_REQUIRES_ARM_NEON; 811 for (size_t channels = 1; channels <= 40; channels += 7) { 812 DWConvMicrokernelTester() 813 .cr(8) 814 .kr(25) 815 .channels(channels) 816 .width(3) 817 .qmax(128) 818 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 819 } 820 } 821 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,input_zero_point_only)822 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, input_zero_point_only) { 823 TEST_REQUIRES_ARM_NEON; 824 for (size_t channels = 1; channels <= 40; channels += 7) { 825 DWConvMicrokernelTester() 826 .cr(8) 827 .kr(25) 828 .channels(channels) 829 .width(3) 830 .input_zero_point(255) 831 .kernel_zero_point(0) 832 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 833 } 834 } 835 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,kernel_zero_point_only)836 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, kernel_zero_point_only) { 837 TEST_REQUIRES_ARM_NEON; 838 for (size_t channels = 1; channels <= 40; channels += 7) { 839 DWConvMicrokernelTester() 840 .cr(8) 841 .kr(25) 842 .channels(channels) 843 .width(3) 844 .input_zero_point(0) 845 .kernel_zero_point(255) 846 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 847 } 848 } 849 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,input_offset)850 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, input_offset) { 851 TEST_REQUIRES_ARM_NEON; 852 for (uint32_t channels = 16; channels < 128; channels += 24) { 853 DWConvMicrokernelTester() 854 .cr(8) 855 .kr(25) 856 .channels(channels) 857 .input_offset(176) 858 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 859 } 860 } 861 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,zero)862 TEST(QU8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, zero) { 863 TEST_REQUIRES_ARM_NEON; 864 for (uint32_t mz = 0; mz < 25; mz++) { 865 for (uint32_t channels = 16; channels < 128; channels += 24) { 866 DWConvMicrokernelTester() 867 .cr(8) 868 .kr(25) 869 .channels(channels) 870 .input_offset(176) 871 .zero_index(mz) 872 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 873 } 874 } 875 } 876 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 877 878 879 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_eq_16)880 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_eq_16) { 881 TEST_REQUIRES_ARM_NEON; 882 DWConvMicrokernelTester() 883 .cr(16) 884 .kr(9) 885 .channels(16) 886 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 887 } 888 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_div_16)889 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_div_16) { 890 TEST_REQUIRES_ARM_NEON; 891 for (uint32_t channels = 32; channels < 256; channels += 48) { 892 DWConvMicrokernelTester() 893 .cr(16) 894 .kr(9) 895 .channels(channels) 896 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 897 } 898 } 899 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_div_16_with_qmin)900 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_div_16_with_qmin) { 901 TEST_REQUIRES_ARM_NEON; 902 for (uint32_t channels = 32; channels < 256; channels += 48) { 903 DWConvMicrokernelTester() 904 .cr(16) 905 .kr(9) 906 .channels(channels) 907 .qmin(128) 908 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 909 } 910 } 911 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_div_16_with_qmax)912 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_div_16_with_qmax) { 913 TEST_REQUIRES_ARM_NEON; 914 for (uint32_t channels = 32; channels < 256; channels += 48) { 915 DWConvMicrokernelTester() 916 .cr(16) 917 .kr(9) 918 .channels(channels) 919 .qmax(128) 920 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 921 } 922 } 923 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_lt_16)924 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_lt_16) { 925 TEST_REQUIRES_ARM_NEON; 926 for (uint32_t channels = 1; channels < 16; channels++) { 927 DWConvMicrokernelTester() 928 .cr(16) 929 .kr(9) 930 .channels(channels) 931 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 932 } 933 } 934 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_gt_16)935 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_gt_16) { 936 TEST_REQUIRES_ARM_NEON; 937 for (uint32_t channels = 17; channels < 32; channels++) { 938 DWConvMicrokernelTester() 939 .cr(16) 940 .kr(9) 941 .channels(channels) 942 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 943 } 944 } 945 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_gt_16_with_qmin)946 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_gt_16_with_qmin) { 947 TEST_REQUIRES_ARM_NEON; 948 for (uint32_t channels = 17; channels < 32; channels++) { 949 DWConvMicrokernelTester() 950 .cr(16) 951 .kr(9) 952 .channels(channels) 953 .qmin(128) 954 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 955 } 956 } 957 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,c_gt_16_with_qmax)958 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, c_gt_16_with_qmax) { 959 TEST_REQUIRES_ARM_NEON; 960 for (uint32_t channels = 17; channels < 32; channels++) { 961 DWConvMicrokernelTester() 962 .cr(16) 963 .kr(9) 964 .channels(channels) 965 .qmax(128) 966 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 967 } 968 } 969 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel)970 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel) { 971 TEST_REQUIRES_ARM_NEON; 972 for (size_t channels = 1; channels <= 80; channels += 15) { 973 DWConvMicrokernelTester() 974 .cr(16) 975 .kr(9) 976 .channels(channels) 977 .width(3) 978 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 979 } 980 } 981 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel_with_step)982 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel_with_step) { 983 TEST_REQUIRES_ARM_NEON; 984 for (size_t channels = 1; channels <= 80; channels += 15) { 985 for (size_t step = 2; step <= 9; step++) { 986 DWConvMicrokernelTester() 987 .cr(16) 988 .kr(9) 989 .channels(channels) 990 .width(3) 991 .step(step) 992 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 993 } 994 } 995 } 996 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel_with_output_stride)997 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel_with_output_stride) { 998 TEST_REQUIRES_ARM_NEON; 999 for (size_t channels = 1; channels <= 80; channels += 15) { 1000 DWConvMicrokernelTester() 1001 .cr(16) 1002 .kr(9) 1003 .channels(16) 1004 .width(5) 1005 .output_stride(83) 1006 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1007 } 1008 } 1009 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel_with_qmin)1010 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel_with_qmin) { 1011 TEST_REQUIRES_ARM_NEON; 1012 for (size_t channels = 1; channels <= 80; channels += 15) { 1013 DWConvMicrokernelTester() 1014 .cr(16) 1015 .kr(9) 1016 .channels(channels) 1017 .width(3) 1018 .qmin(128) 1019 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1020 } 1021 } 1022 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,multipixel_with_qmax)1023 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, multipixel_with_qmax) { 1024 TEST_REQUIRES_ARM_NEON; 1025 for (size_t channels = 1; channels <= 80; channels += 15) { 1026 DWConvMicrokernelTester() 1027 .cr(16) 1028 .kr(9) 1029 .channels(channels) 1030 .width(3) 1031 .qmax(128) 1032 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1033 } 1034 } 1035 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,input_zero_point_only)1036 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, input_zero_point_only) { 1037 TEST_REQUIRES_ARM_NEON; 1038 for (size_t channels = 1; channels <= 80; channels += 15) { 1039 DWConvMicrokernelTester() 1040 .cr(16) 1041 .kr(9) 1042 .channels(channels) 1043 .width(3) 1044 .input_zero_point(255) 1045 .kernel_zero_point(0) 1046 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1047 } 1048 } 1049 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,kernel_zero_point_only)1050 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, kernel_zero_point_only) { 1051 TEST_REQUIRES_ARM_NEON; 1052 for (size_t channels = 1; channels <= 80; channels += 15) { 1053 DWConvMicrokernelTester() 1054 .cr(16) 1055 .kr(9) 1056 .channels(channels) 1057 .width(3) 1058 .input_zero_point(0) 1059 .kernel_zero_point(255) 1060 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1061 } 1062 } 1063 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,input_offset)1064 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, input_offset) { 1065 TEST_REQUIRES_ARM_NEON; 1066 for (uint32_t channels = 32; channels < 256; channels += 48) { 1067 DWConvMicrokernelTester() 1068 .cr(16) 1069 .kr(9) 1070 .channels(channels) 1071 .input_offset(304) 1072 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1073 } 1074 } 1075 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8,zero)1076 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8, zero) { 1077 TEST_REQUIRES_ARM_NEON; 1078 for (uint32_t mz = 0; mz < 9; mz++) { 1079 for (uint32_t channels = 32; channels < 256; channels += 48) { 1080 DWConvMicrokernelTester() 1081 .cr(16) 1082 .kr(9) 1083 .channels(channels) 1084 .input_offset(304) 1085 .zero_index(mz) 1086 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1087 } 1088 } 1089 } 1090 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1091 1092 1093 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_eq_16)1094 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_eq_16) { 1095 TEST_REQUIRES_ARM_NEON; 1096 DWConvMicrokernelTester() 1097 .cr(16) 1098 .kr(9) 1099 .channels(16) 1100 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1101 } 1102 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16)1103 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16) { 1104 TEST_REQUIRES_ARM_NEON; 1105 for (uint32_t channels = 32; channels < 256; channels += 48) { 1106 DWConvMicrokernelTester() 1107 .cr(16) 1108 .kr(9) 1109 .channels(channels) 1110 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1111 } 1112 } 1113 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16_with_qmin)1114 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16_with_qmin) { 1115 TEST_REQUIRES_ARM_NEON; 1116 for (uint32_t channels = 32; channels < 256; channels += 48) { 1117 DWConvMicrokernelTester() 1118 .cr(16) 1119 .kr(9) 1120 .channels(channels) 1121 .qmin(128) 1122 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1123 } 1124 } 1125 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16_with_qmax)1126 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16_with_qmax) { 1127 TEST_REQUIRES_ARM_NEON; 1128 for (uint32_t channels = 32; channels < 256; channels += 48) { 1129 DWConvMicrokernelTester() 1130 .cr(16) 1131 .kr(9) 1132 .channels(channels) 1133 .qmax(128) 1134 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1135 } 1136 } 1137 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_lt_16)1138 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_lt_16) { 1139 TEST_REQUIRES_ARM_NEON; 1140 for (uint32_t channels = 1; channels < 16; channels++) { 1141 DWConvMicrokernelTester() 1142 .cr(16) 1143 .kr(9) 1144 .channels(channels) 1145 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1146 } 1147 } 1148 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16)1149 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16) { 1150 TEST_REQUIRES_ARM_NEON; 1151 for (uint32_t channels = 17; channels < 32; channels++) { 1152 DWConvMicrokernelTester() 1153 .cr(16) 1154 .kr(9) 1155 .channels(channels) 1156 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1157 } 1158 } 1159 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16_with_qmin)1160 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16_with_qmin) { 1161 TEST_REQUIRES_ARM_NEON; 1162 for (uint32_t channels = 17; channels < 32; channels++) { 1163 DWConvMicrokernelTester() 1164 .cr(16) 1165 .kr(9) 1166 .channels(channels) 1167 .qmin(128) 1168 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1169 } 1170 } 1171 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16_with_qmax)1172 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16_with_qmax) { 1173 TEST_REQUIRES_ARM_NEON; 1174 for (uint32_t channels = 17; channels < 32; channels++) { 1175 DWConvMicrokernelTester() 1176 .cr(16) 1177 .kr(9) 1178 .channels(channels) 1179 .qmax(128) 1180 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1181 } 1182 } 1183 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel)1184 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel) { 1185 TEST_REQUIRES_ARM_NEON; 1186 for (size_t channels = 1; channels <= 80; channels += 15) { 1187 DWConvMicrokernelTester() 1188 .cr(16) 1189 .kr(9) 1190 .channels(channels) 1191 .width(3) 1192 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1193 } 1194 } 1195 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_step)1196 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_step) { 1197 TEST_REQUIRES_ARM_NEON; 1198 for (size_t channels = 1; channels <= 80; channels += 15) { 1199 for (size_t step = 2; step <= 9; step++) { 1200 DWConvMicrokernelTester() 1201 .cr(16) 1202 .kr(9) 1203 .channels(channels) 1204 .width(3) 1205 .step(step) 1206 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1207 } 1208 } 1209 } 1210 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_output_stride)1211 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_output_stride) { 1212 TEST_REQUIRES_ARM_NEON; 1213 for (size_t channels = 1; channels <= 80; channels += 15) { 1214 DWConvMicrokernelTester() 1215 .cr(16) 1216 .kr(9) 1217 .channels(16) 1218 .width(5) 1219 .output_stride(83) 1220 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1221 } 1222 } 1223 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_qmin)1224 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_qmin) { 1225 TEST_REQUIRES_ARM_NEON; 1226 for (size_t channels = 1; channels <= 80; channels += 15) { 1227 DWConvMicrokernelTester() 1228 .cr(16) 1229 .kr(9) 1230 .channels(channels) 1231 .width(3) 1232 .qmin(128) 1233 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1234 } 1235 } 1236 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_qmax)1237 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_qmax) { 1238 TEST_REQUIRES_ARM_NEON; 1239 for (size_t channels = 1; channels <= 80; channels += 15) { 1240 DWConvMicrokernelTester() 1241 .cr(16) 1242 .kr(9) 1243 .channels(channels) 1244 .width(3) 1245 .qmax(128) 1246 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1247 } 1248 } 1249 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,input_zero_point_only)1250 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, input_zero_point_only) { 1251 TEST_REQUIRES_ARM_NEON; 1252 for (size_t channels = 1; channels <= 80; channels += 15) { 1253 DWConvMicrokernelTester() 1254 .cr(16) 1255 .kr(9) 1256 .channels(channels) 1257 .width(3) 1258 .input_zero_point(255) 1259 .kernel_zero_point(0) 1260 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1261 } 1262 } 1263 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,kernel_zero_point_only)1264 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, kernel_zero_point_only) { 1265 TEST_REQUIRES_ARM_NEON; 1266 for (size_t channels = 1; channels <= 80; channels += 15) { 1267 DWConvMicrokernelTester() 1268 .cr(16) 1269 .kr(9) 1270 .channels(channels) 1271 .width(3) 1272 .input_zero_point(0) 1273 .kernel_zero_point(255) 1274 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1275 } 1276 } 1277 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,input_offset)1278 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, input_offset) { 1279 TEST_REQUIRES_ARM_NEON; 1280 for (uint32_t channels = 32; channels < 256; channels += 48) { 1281 DWConvMicrokernelTester() 1282 .cr(16) 1283 .kr(9) 1284 .channels(channels) 1285 .input_offset(304) 1286 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1287 } 1288 } 1289 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,zero)1290 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, zero) { 1291 TEST_REQUIRES_ARM_NEON; 1292 for (uint32_t mz = 0; mz < 9; mz++) { 1293 for (uint32_t channels = 32; channels < 256; channels += 48) { 1294 DWConvMicrokernelTester() 1295 .cr(16) 1296 .kr(9) 1297 .channels(channels) 1298 .input_offset(304) 1299 .zero_index(mz) 1300 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1301 } 1302 } 1303 } 1304 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1305 1306 1307 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_eq_16)1308 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_eq_16) { 1309 TEST_REQUIRES_ARM_NEON; 1310 DWConvMicrokernelTester() 1311 .cr(16) 1312 .kr(25) 1313 .channels(16) 1314 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1315 } 1316 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_div_16)1317 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_div_16) { 1318 TEST_REQUIRES_ARM_NEON; 1319 for (uint32_t channels = 32; channels < 256; channels += 48) { 1320 DWConvMicrokernelTester() 1321 .cr(16) 1322 .kr(25) 1323 .channels(channels) 1324 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1325 } 1326 } 1327 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_div_16_with_qmin)1328 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_div_16_with_qmin) { 1329 TEST_REQUIRES_ARM_NEON; 1330 for (uint32_t channels = 32; channels < 256; channels += 48) { 1331 DWConvMicrokernelTester() 1332 .cr(16) 1333 .kr(25) 1334 .channels(channels) 1335 .qmin(128) 1336 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1337 } 1338 } 1339 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_div_16_with_qmax)1340 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_div_16_with_qmax) { 1341 TEST_REQUIRES_ARM_NEON; 1342 for (uint32_t channels = 32; channels < 256; channels += 48) { 1343 DWConvMicrokernelTester() 1344 .cr(16) 1345 .kr(25) 1346 .channels(channels) 1347 .qmax(128) 1348 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1349 } 1350 } 1351 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_lt_16)1352 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_lt_16) { 1353 TEST_REQUIRES_ARM_NEON; 1354 for (uint32_t channels = 1; channels < 16; channels++) { 1355 DWConvMicrokernelTester() 1356 .cr(16) 1357 .kr(25) 1358 .channels(channels) 1359 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1360 } 1361 } 1362 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_gt_16)1363 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_gt_16) { 1364 TEST_REQUIRES_ARM_NEON; 1365 for (uint32_t channels = 17; channels < 32; channels++) { 1366 DWConvMicrokernelTester() 1367 .cr(16) 1368 .kr(25) 1369 .channels(channels) 1370 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1371 } 1372 } 1373 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_gt_16_with_qmin)1374 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_gt_16_with_qmin) { 1375 TEST_REQUIRES_ARM_NEON; 1376 for (uint32_t channels = 17; channels < 32; channels++) { 1377 DWConvMicrokernelTester() 1378 .cr(16) 1379 .kr(25) 1380 .channels(channels) 1381 .qmin(128) 1382 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1383 } 1384 } 1385 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,c_gt_16_with_qmax)1386 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, c_gt_16_with_qmax) { 1387 TEST_REQUIRES_ARM_NEON; 1388 for (uint32_t channels = 17; channels < 32; channels++) { 1389 DWConvMicrokernelTester() 1390 .cr(16) 1391 .kr(25) 1392 .channels(channels) 1393 .qmax(128) 1394 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1395 } 1396 } 1397 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel)1398 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel) { 1399 TEST_REQUIRES_ARM_NEON; 1400 for (size_t channels = 1; channels <= 80; channels += 15) { 1401 DWConvMicrokernelTester() 1402 .cr(16) 1403 .kr(25) 1404 .channels(channels) 1405 .width(3) 1406 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1407 } 1408 } 1409 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel_with_step)1410 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel_with_step) { 1411 TEST_REQUIRES_ARM_NEON; 1412 for (size_t channels = 1; channels <= 80; channels += 15) { 1413 for (size_t step = 2; step <= 25; step++) { 1414 DWConvMicrokernelTester() 1415 .cr(16) 1416 .kr(25) 1417 .channels(channels) 1418 .width(3) 1419 .step(step) 1420 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1421 } 1422 } 1423 } 1424 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel_with_output_stride)1425 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel_with_output_stride) { 1426 TEST_REQUIRES_ARM_NEON; 1427 for (size_t channels = 1; channels <= 80; channels += 15) { 1428 DWConvMicrokernelTester() 1429 .cr(16) 1430 .kr(25) 1431 .channels(16) 1432 .width(5) 1433 .output_stride(83) 1434 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1435 } 1436 } 1437 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel_with_qmin)1438 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel_with_qmin) { 1439 TEST_REQUIRES_ARM_NEON; 1440 for (size_t channels = 1; channels <= 80; channels += 15) { 1441 DWConvMicrokernelTester() 1442 .cr(16) 1443 .kr(25) 1444 .channels(channels) 1445 .width(3) 1446 .qmin(128) 1447 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1448 } 1449 } 1450 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,multipixel_with_qmax)1451 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, multipixel_with_qmax) { 1452 TEST_REQUIRES_ARM_NEON; 1453 for (size_t channels = 1; channels <= 80; channels += 15) { 1454 DWConvMicrokernelTester() 1455 .cr(16) 1456 .kr(25) 1457 .channels(channels) 1458 .width(3) 1459 .qmax(128) 1460 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1461 } 1462 } 1463 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,input_zero_point_only)1464 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, input_zero_point_only) { 1465 TEST_REQUIRES_ARM_NEON; 1466 for (size_t channels = 1; channels <= 80; channels += 15) { 1467 DWConvMicrokernelTester() 1468 .cr(16) 1469 .kr(25) 1470 .channels(channels) 1471 .width(3) 1472 .input_zero_point(255) 1473 .kernel_zero_point(0) 1474 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1475 } 1476 } 1477 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,kernel_zero_point_only)1478 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, kernel_zero_point_only) { 1479 TEST_REQUIRES_ARM_NEON; 1480 for (size_t channels = 1; channels <= 80; channels += 15) { 1481 DWConvMicrokernelTester() 1482 .cr(16) 1483 .kr(25) 1484 .channels(channels) 1485 .width(3) 1486 .input_zero_point(0) 1487 .kernel_zero_point(255) 1488 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1489 } 1490 } 1491 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,input_offset)1492 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, input_offset) { 1493 TEST_REQUIRES_ARM_NEON; 1494 for (uint32_t channels = 32; channels < 256; channels += 48) { 1495 DWConvMicrokernelTester() 1496 .cr(16) 1497 .kr(25) 1498 .channels(channels) 1499 .input_offset(304) 1500 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1501 } 1502 } 1503 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8,zero)1504 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8, zero) { 1505 TEST_REQUIRES_ARM_NEON; 1506 for (uint32_t mz = 0; mz < 25; mz++) { 1507 for (uint32_t channels = 32; channels < 256; channels += 48) { 1508 DWConvMicrokernelTester() 1509 .cr(16) 1510 .kr(25) 1511 .channels(channels) 1512 .input_offset(304) 1513 .zero_index(mz) 1514 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1515 } 1516 } 1517 } 1518 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1519 1520 1521 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_eq_16)1522 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_eq_16) { 1523 TEST_REQUIRES_ARM_NEON; 1524 DWConvMicrokernelTester() 1525 .cr(16) 1526 .kr(25) 1527 .channels(16) 1528 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1529 } 1530 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16)1531 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16) { 1532 TEST_REQUIRES_ARM_NEON; 1533 for (uint32_t channels = 32; channels < 256; channels += 48) { 1534 DWConvMicrokernelTester() 1535 .cr(16) 1536 .kr(25) 1537 .channels(channels) 1538 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1539 } 1540 } 1541 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16_with_qmin)1542 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16_with_qmin) { 1543 TEST_REQUIRES_ARM_NEON; 1544 for (uint32_t channels = 32; channels < 256; channels += 48) { 1545 DWConvMicrokernelTester() 1546 .cr(16) 1547 .kr(25) 1548 .channels(channels) 1549 .qmin(128) 1550 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1551 } 1552 } 1553 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16_with_qmax)1554 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16_with_qmax) { 1555 TEST_REQUIRES_ARM_NEON; 1556 for (uint32_t channels = 32; channels < 256; channels += 48) { 1557 DWConvMicrokernelTester() 1558 .cr(16) 1559 .kr(25) 1560 .channels(channels) 1561 .qmax(128) 1562 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1563 } 1564 } 1565 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_lt_16)1566 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_lt_16) { 1567 TEST_REQUIRES_ARM_NEON; 1568 for (uint32_t channels = 1; channels < 16; channels++) { 1569 DWConvMicrokernelTester() 1570 .cr(16) 1571 .kr(25) 1572 .channels(channels) 1573 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1574 } 1575 } 1576 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16)1577 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16) { 1578 TEST_REQUIRES_ARM_NEON; 1579 for (uint32_t channels = 17; channels < 32; channels++) { 1580 DWConvMicrokernelTester() 1581 .cr(16) 1582 .kr(25) 1583 .channels(channels) 1584 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1585 } 1586 } 1587 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16_with_qmin)1588 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16_with_qmin) { 1589 TEST_REQUIRES_ARM_NEON; 1590 for (uint32_t channels = 17; channels < 32; channels++) { 1591 DWConvMicrokernelTester() 1592 .cr(16) 1593 .kr(25) 1594 .channels(channels) 1595 .qmin(128) 1596 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1597 } 1598 } 1599 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16_with_qmax)1600 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16_with_qmax) { 1601 TEST_REQUIRES_ARM_NEON; 1602 for (uint32_t channels = 17; channels < 32; channels++) { 1603 DWConvMicrokernelTester() 1604 .cr(16) 1605 .kr(25) 1606 .channels(channels) 1607 .qmax(128) 1608 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1609 } 1610 } 1611 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel)1612 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel) { 1613 TEST_REQUIRES_ARM_NEON; 1614 for (size_t channels = 1; channels <= 80; channels += 15) { 1615 DWConvMicrokernelTester() 1616 .cr(16) 1617 .kr(25) 1618 .channels(channels) 1619 .width(3) 1620 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1621 } 1622 } 1623 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_step)1624 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_step) { 1625 TEST_REQUIRES_ARM_NEON; 1626 for (size_t channels = 1; channels <= 80; channels += 15) { 1627 for (size_t step = 2; step <= 25; step++) { 1628 DWConvMicrokernelTester() 1629 .cr(16) 1630 .kr(25) 1631 .channels(channels) 1632 .width(3) 1633 .step(step) 1634 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1635 } 1636 } 1637 } 1638 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_output_stride)1639 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_output_stride) { 1640 TEST_REQUIRES_ARM_NEON; 1641 for (size_t channels = 1; channels <= 80; channels += 15) { 1642 DWConvMicrokernelTester() 1643 .cr(16) 1644 .kr(25) 1645 .channels(16) 1646 .width(5) 1647 .output_stride(83) 1648 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1649 } 1650 } 1651 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_qmin)1652 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_qmin) { 1653 TEST_REQUIRES_ARM_NEON; 1654 for (size_t channels = 1; channels <= 80; channels += 15) { 1655 DWConvMicrokernelTester() 1656 .cr(16) 1657 .kr(25) 1658 .channels(channels) 1659 .width(3) 1660 .qmin(128) 1661 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1662 } 1663 } 1664 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_qmax)1665 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_qmax) { 1666 TEST_REQUIRES_ARM_NEON; 1667 for (size_t channels = 1; channels <= 80; channels += 15) { 1668 DWConvMicrokernelTester() 1669 .cr(16) 1670 .kr(25) 1671 .channels(channels) 1672 .width(3) 1673 .qmax(128) 1674 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1675 } 1676 } 1677 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,input_zero_point_only)1678 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, input_zero_point_only) { 1679 TEST_REQUIRES_ARM_NEON; 1680 for (size_t channels = 1; channels <= 80; channels += 15) { 1681 DWConvMicrokernelTester() 1682 .cr(16) 1683 .kr(25) 1684 .channels(channels) 1685 .width(3) 1686 .input_zero_point(255) 1687 .kernel_zero_point(0) 1688 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1689 } 1690 } 1691 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,kernel_zero_point_only)1692 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, kernel_zero_point_only) { 1693 TEST_REQUIRES_ARM_NEON; 1694 for (size_t channels = 1; channels <= 80; channels += 15) { 1695 DWConvMicrokernelTester() 1696 .cr(16) 1697 .kr(25) 1698 .channels(channels) 1699 .width(3) 1700 .input_zero_point(0) 1701 .kernel_zero_point(255) 1702 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1703 } 1704 } 1705 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,input_offset)1706 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, input_offset) { 1707 TEST_REQUIRES_ARM_NEON; 1708 for (uint32_t channels = 32; channels < 256; channels += 48) { 1709 DWConvMicrokernelTester() 1710 .cr(16) 1711 .kr(25) 1712 .channels(channels) 1713 .input_offset(304) 1714 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1715 } 1716 } 1717 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,zero)1718 TEST(QU8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, zero) { 1719 TEST_REQUIRES_ARM_NEON; 1720 for (uint32_t mz = 0; mz < 25; mz++) { 1721 for (uint32_t channels = 32; channels < 256; channels += 48) { 1722 DWConvMicrokernelTester() 1723 .cr(16) 1724 .kr(25) 1725 .channels(channels) 1726 .input_offset(304) 1727 .zero_index(mz) 1728 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1729 } 1730 } 1731 } 1732 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1733 1734 1735 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_eq_24)1736 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_eq_24) { 1737 TEST_REQUIRES_ARM_NEON; 1738 DWConvMicrokernelTester() 1739 .cr(24) 1740 .kr(9) 1741 .channels(24) 1742 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1743 } 1744 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_div_24)1745 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_div_24) { 1746 TEST_REQUIRES_ARM_NEON; 1747 for (uint32_t channels = 48; channels < 384; channels += 72) { 1748 DWConvMicrokernelTester() 1749 .cr(24) 1750 .kr(9) 1751 .channels(channels) 1752 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1753 } 1754 } 1755 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_div_24_with_qmin)1756 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_div_24_with_qmin) { 1757 TEST_REQUIRES_ARM_NEON; 1758 for (uint32_t channels = 48; channels < 384; channels += 72) { 1759 DWConvMicrokernelTester() 1760 .cr(24) 1761 .kr(9) 1762 .channels(channels) 1763 .qmin(128) 1764 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1765 } 1766 } 1767 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_div_24_with_qmax)1768 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_div_24_with_qmax) { 1769 TEST_REQUIRES_ARM_NEON; 1770 for (uint32_t channels = 48; channels < 384; channels += 72) { 1771 DWConvMicrokernelTester() 1772 .cr(24) 1773 .kr(9) 1774 .channels(channels) 1775 .qmax(128) 1776 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1777 } 1778 } 1779 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_lt_24)1780 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_lt_24) { 1781 TEST_REQUIRES_ARM_NEON; 1782 for (uint32_t channels = 1; channels < 24; channels++) { 1783 DWConvMicrokernelTester() 1784 .cr(24) 1785 .kr(9) 1786 .channels(channels) 1787 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1788 } 1789 } 1790 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_gt_24)1791 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_gt_24) { 1792 TEST_REQUIRES_ARM_NEON; 1793 for (uint32_t channels = 25; channels < 48; channels++) { 1794 DWConvMicrokernelTester() 1795 .cr(24) 1796 .kr(9) 1797 .channels(channels) 1798 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1799 } 1800 } 1801 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_gt_24_with_qmin)1802 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_gt_24_with_qmin) { 1803 TEST_REQUIRES_ARM_NEON; 1804 for (uint32_t channels = 25; channels < 48; channels++) { 1805 DWConvMicrokernelTester() 1806 .cr(24) 1807 .kr(9) 1808 .channels(channels) 1809 .qmin(128) 1810 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1811 } 1812 } 1813 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,c_gt_24_with_qmax)1814 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, c_gt_24_with_qmax) { 1815 TEST_REQUIRES_ARM_NEON; 1816 for (uint32_t channels = 25; channels < 48; channels++) { 1817 DWConvMicrokernelTester() 1818 .cr(24) 1819 .kr(9) 1820 .channels(channels) 1821 .qmax(128) 1822 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1823 } 1824 } 1825 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel)1826 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel) { 1827 TEST_REQUIRES_ARM_NEON; 1828 for (size_t channels = 1; channels <= 120; channels += 23) { 1829 DWConvMicrokernelTester() 1830 .cr(24) 1831 .kr(9) 1832 .channels(channels) 1833 .width(3) 1834 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1835 } 1836 } 1837 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel_with_step)1838 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel_with_step) { 1839 TEST_REQUIRES_ARM_NEON; 1840 for (size_t channels = 1; channels <= 120; channels += 23) { 1841 for (size_t step = 2; step <= 9; step++) { 1842 DWConvMicrokernelTester() 1843 .cr(24) 1844 .kr(9) 1845 .channels(channels) 1846 .width(3) 1847 .step(step) 1848 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1849 } 1850 } 1851 } 1852 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel_with_output_stride)1853 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel_with_output_stride) { 1854 TEST_REQUIRES_ARM_NEON; 1855 for (size_t channels = 1; channels <= 120; channels += 23) { 1856 DWConvMicrokernelTester() 1857 .cr(24) 1858 .kr(9) 1859 .channels(24) 1860 .width(5) 1861 .output_stride(127) 1862 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1863 } 1864 } 1865 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel_with_qmin)1866 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel_with_qmin) { 1867 TEST_REQUIRES_ARM_NEON; 1868 for (size_t channels = 1; channels <= 120; channels += 23) { 1869 DWConvMicrokernelTester() 1870 .cr(24) 1871 .kr(9) 1872 .channels(channels) 1873 .width(3) 1874 .qmin(128) 1875 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1876 } 1877 } 1878 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,multipixel_with_qmax)1879 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, multipixel_with_qmax) { 1880 TEST_REQUIRES_ARM_NEON; 1881 for (size_t channels = 1; channels <= 120; channels += 23) { 1882 DWConvMicrokernelTester() 1883 .cr(24) 1884 .kr(9) 1885 .channels(channels) 1886 .width(3) 1887 .qmax(128) 1888 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1889 } 1890 } 1891 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,input_zero_point_only)1892 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, input_zero_point_only) { 1893 TEST_REQUIRES_ARM_NEON; 1894 for (size_t channels = 1; channels <= 120; channels += 23) { 1895 DWConvMicrokernelTester() 1896 .cr(24) 1897 .kr(9) 1898 .channels(channels) 1899 .width(3) 1900 .input_zero_point(255) 1901 .kernel_zero_point(0) 1902 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1903 } 1904 } 1905 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,kernel_zero_point_only)1906 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, kernel_zero_point_only) { 1907 TEST_REQUIRES_ARM_NEON; 1908 for (size_t channels = 1; channels <= 120; channels += 23) { 1909 DWConvMicrokernelTester() 1910 .cr(24) 1911 .kr(9) 1912 .channels(channels) 1913 .width(3) 1914 .input_zero_point(0) 1915 .kernel_zero_point(255) 1916 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1917 } 1918 } 1919 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,input_offset)1920 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, input_offset) { 1921 TEST_REQUIRES_ARM_NEON; 1922 for (uint32_t channels = 48; channels < 384; channels += 72) { 1923 DWConvMicrokernelTester() 1924 .cr(24) 1925 .kr(9) 1926 .channels(channels) 1927 .input_offset(464) 1928 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1929 } 1930 } 1931 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8,zero)1932 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL8, zero) { 1933 TEST_REQUIRES_ARM_NEON; 1934 for (uint32_t mz = 0; mz < 9; mz++) { 1935 for (uint32_t channels = 48; channels < 384; channels += 72) { 1936 DWConvMicrokernelTester() 1937 .cr(24) 1938 .kr(9) 1939 .channels(channels) 1940 .input_offset(464) 1941 .zero_index(mz) 1942 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1943 } 1944 } 1945 } 1946 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1947 1948 1949 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_eq_24)1950 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_eq_24) { 1951 TEST_REQUIRES_ARM_NEON; 1952 DWConvMicrokernelTester() 1953 .cr(24) 1954 .kr(9) 1955 .channels(24) 1956 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1957 } 1958 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24)1959 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24) { 1960 TEST_REQUIRES_ARM_NEON; 1961 for (uint32_t channels = 48; channels < 384; channels += 72) { 1962 DWConvMicrokernelTester() 1963 .cr(24) 1964 .kr(9) 1965 .channels(channels) 1966 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1967 } 1968 } 1969 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24_with_qmin)1970 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24_with_qmin) { 1971 TEST_REQUIRES_ARM_NEON; 1972 for (uint32_t channels = 48; channels < 384; channels += 72) { 1973 DWConvMicrokernelTester() 1974 .cr(24) 1975 .kr(9) 1976 .channels(channels) 1977 .qmin(128) 1978 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1979 } 1980 } 1981 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24_with_qmax)1982 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24_with_qmax) { 1983 TEST_REQUIRES_ARM_NEON; 1984 for (uint32_t channels = 48; channels < 384; channels += 72) { 1985 DWConvMicrokernelTester() 1986 .cr(24) 1987 .kr(9) 1988 .channels(channels) 1989 .qmax(128) 1990 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 1991 } 1992 } 1993 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_lt_24)1994 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_lt_24) { 1995 TEST_REQUIRES_ARM_NEON; 1996 for (uint32_t channels = 1; channels < 24; channels++) { 1997 DWConvMicrokernelTester() 1998 .cr(24) 1999 .kr(9) 2000 .channels(channels) 2001 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2002 } 2003 } 2004 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24)2005 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24) { 2006 TEST_REQUIRES_ARM_NEON; 2007 for (uint32_t channels = 25; channels < 48; channels++) { 2008 DWConvMicrokernelTester() 2009 .cr(24) 2010 .kr(9) 2011 .channels(channels) 2012 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2013 } 2014 } 2015 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24_with_qmin)2016 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24_with_qmin) { 2017 TEST_REQUIRES_ARM_NEON; 2018 for (uint32_t channels = 25; channels < 48; channels++) { 2019 DWConvMicrokernelTester() 2020 .cr(24) 2021 .kr(9) 2022 .channels(channels) 2023 .qmin(128) 2024 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2025 } 2026 } 2027 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24_with_qmax)2028 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24_with_qmax) { 2029 TEST_REQUIRES_ARM_NEON; 2030 for (uint32_t channels = 25; channels < 48; channels++) { 2031 DWConvMicrokernelTester() 2032 .cr(24) 2033 .kr(9) 2034 .channels(channels) 2035 .qmax(128) 2036 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2037 } 2038 } 2039 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel)2040 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel) { 2041 TEST_REQUIRES_ARM_NEON; 2042 for (size_t channels = 1; channels <= 120; channels += 23) { 2043 DWConvMicrokernelTester() 2044 .cr(24) 2045 .kr(9) 2046 .channels(channels) 2047 .width(3) 2048 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2049 } 2050 } 2051 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_step)2052 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_step) { 2053 TEST_REQUIRES_ARM_NEON; 2054 for (size_t channels = 1; channels <= 120; channels += 23) { 2055 for (size_t step = 2; step <= 9; step++) { 2056 DWConvMicrokernelTester() 2057 .cr(24) 2058 .kr(9) 2059 .channels(channels) 2060 .width(3) 2061 .step(step) 2062 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2063 } 2064 } 2065 } 2066 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_output_stride)2067 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_output_stride) { 2068 TEST_REQUIRES_ARM_NEON; 2069 for (size_t channels = 1; channels <= 120; channels += 23) { 2070 DWConvMicrokernelTester() 2071 .cr(24) 2072 .kr(9) 2073 .channels(24) 2074 .width(5) 2075 .output_stride(127) 2076 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2077 } 2078 } 2079 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_qmin)2080 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_qmin) { 2081 TEST_REQUIRES_ARM_NEON; 2082 for (size_t channels = 1; channels <= 120; channels += 23) { 2083 DWConvMicrokernelTester() 2084 .cr(24) 2085 .kr(9) 2086 .channels(channels) 2087 .width(3) 2088 .qmin(128) 2089 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2090 } 2091 } 2092 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_qmax)2093 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_qmax) { 2094 TEST_REQUIRES_ARM_NEON; 2095 for (size_t channels = 1; channels <= 120; channels += 23) { 2096 DWConvMicrokernelTester() 2097 .cr(24) 2098 .kr(9) 2099 .channels(channels) 2100 .width(3) 2101 .qmax(128) 2102 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2103 } 2104 } 2105 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,input_zero_point_only)2106 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, input_zero_point_only) { 2107 TEST_REQUIRES_ARM_NEON; 2108 for (size_t channels = 1; channels <= 120; channels += 23) { 2109 DWConvMicrokernelTester() 2110 .cr(24) 2111 .kr(9) 2112 .channels(channels) 2113 .width(3) 2114 .input_zero_point(255) 2115 .kernel_zero_point(0) 2116 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2117 } 2118 } 2119 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,kernel_zero_point_only)2120 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, kernel_zero_point_only) { 2121 TEST_REQUIRES_ARM_NEON; 2122 for (size_t channels = 1; channels <= 120; channels += 23) { 2123 DWConvMicrokernelTester() 2124 .cr(24) 2125 .kr(9) 2126 .channels(channels) 2127 .width(3) 2128 .input_zero_point(0) 2129 .kernel_zero_point(255) 2130 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2131 } 2132 } 2133 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,input_offset)2134 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, input_offset) { 2135 TEST_REQUIRES_ARM_NEON; 2136 for (uint32_t channels = 48; channels < 384; channels += 72) { 2137 DWConvMicrokernelTester() 2138 .cr(24) 2139 .kr(9) 2140 .channels(channels) 2141 .input_offset(464) 2142 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2143 } 2144 } 2145 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,zero)2146 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, zero) { 2147 TEST_REQUIRES_ARM_NEON; 2148 for (uint32_t mz = 0; mz < 9; mz++) { 2149 for (uint32_t channels = 48; channels < 384; channels += 72) { 2150 DWConvMicrokernelTester() 2151 .cr(24) 2152 .kr(9) 2153 .channels(channels) 2154 .input_offset(464) 2155 .zero_index(mz) 2156 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2157 } 2158 } 2159 } 2160 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2161 2162 2163 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_eq_24)2164 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_eq_24) { 2165 TEST_REQUIRES_ARM_NEON; 2166 DWConvMicrokernelTester() 2167 .cr(24) 2168 .kr(25) 2169 .channels(24) 2170 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2171 } 2172 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_div_24)2173 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_div_24) { 2174 TEST_REQUIRES_ARM_NEON; 2175 for (uint32_t channels = 48; channels < 384; channels += 72) { 2176 DWConvMicrokernelTester() 2177 .cr(24) 2178 .kr(25) 2179 .channels(channels) 2180 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2181 } 2182 } 2183 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_div_24_with_qmin)2184 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_div_24_with_qmin) { 2185 TEST_REQUIRES_ARM_NEON; 2186 for (uint32_t channels = 48; channels < 384; channels += 72) { 2187 DWConvMicrokernelTester() 2188 .cr(24) 2189 .kr(25) 2190 .channels(channels) 2191 .qmin(128) 2192 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2193 } 2194 } 2195 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_div_24_with_qmax)2196 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_div_24_with_qmax) { 2197 TEST_REQUIRES_ARM_NEON; 2198 for (uint32_t channels = 48; channels < 384; channels += 72) { 2199 DWConvMicrokernelTester() 2200 .cr(24) 2201 .kr(25) 2202 .channels(channels) 2203 .qmax(128) 2204 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2205 } 2206 } 2207 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_lt_24)2208 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_lt_24) { 2209 TEST_REQUIRES_ARM_NEON; 2210 for (uint32_t channels = 1; channels < 24; channels++) { 2211 DWConvMicrokernelTester() 2212 .cr(24) 2213 .kr(25) 2214 .channels(channels) 2215 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2216 } 2217 } 2218 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_gt_24)2219 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_gt_24) { 2220 TEST_REQUIRES_ARM_NEON; 2221 for (uint32_t channels = 25; channels < 48; channels++) { 2222 DWConvMicrokernelTester() 2223 .cr(24) 2224 .kr(25) 2225 .channels(channels) 2226 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2227 } 2228 } 2229 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_gt_24_with_qmin)2230 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_gt_24_with_qmin) { 2231 TEST_REQUIRES_ARM_NEON; 2232 for (uint32_t channels = 25; channels < 48; channels++) { 2233 DWConvMicrokernelTester() 2234 .cr(24) 2235 .kr(25) 2236 .channels(channels) 2237 .qmin(128) 2238 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2239 } 2240 } 2241 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,c_gt_24_with_qmax)2242 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, c_gt_24_with_qmax) { 2243 TEST_REQUIRES_ARM_NEON; 2244 for (uint32_t channels = 25; channels < 48; channels++) { 2245 DWConvMicrokernelTester() 2246 .cr(24) 2247 .kr(25) 2248 .channels(channels) 2249 .qmax(128) 2250 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2251 } 2252 } 2253 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel)2254 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel) { 2255 TEST_REQUIRES_ARM_NEON; 2256 for (size_t channels = 1; channels <= 120; channels += 23) { 2257 DWConvMicrokernelTester() 2258 .cr(24) 2259 .kr(25) 2260 .channels(channels) 2261 .width(3) 2262 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2263 } 2264 } 2265 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel_with_step)2266 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel_with_step) { 2267 TEST_REQUIRES_ARM_NEON; 2268 for (size_t channels = 1; channels <= 120; channels += 23) { 2269 for (size_t step = 2; step <= 25; step++) { 2270 DWConvMicrokernelTester() 2271 .cr(24) 2272 .kr(25) 2273 .channels(channels) 2274 .width(3) 2275 .step(step) 2276 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2277 } 2278 } 2279 } 2280 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel_with_output_stride)2281 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel_with_output_stride) { 2282 TEST_REQUIRES_ARM_NEON; 2283 for (size_t channels = 1; channels <= 120; channels += 23) { 2284 DWConvMicrokernelTester() 2285 .cr(24) 2286 .kr(25) 2287 .channels(24) 2288 .width(5) 2289 .output_stride(127) 2290 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2291 } 2292 } 2293 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel_with_qmin)2294 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel_with_qmin) { 2295 TEST_REQUIRES_ARM_NEON; 2296 for (size_t channels = 1; channels <= 120; channels += 23) { 2297 DWConvMicrokernelTester() 2298 .cr(24) 2299 .kr(25) 2300 .channels(channels) 2301 .width(3) 2302 .qmin(128) 2303 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2304 } 2305 } 2306 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,multipixel_with_qmax)2307 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, multipixel_with_qmax) { 2308 TEST_REQUIRES_ARM_NEON; 2309 for (size_t channels = 1; channels <= 120; channels += 23) { 2310 DWConvMicrokernelTester() 2311 .cr(24) 2312 .kr(25) 2313 .channels(channels) 2314 .width(3) 2315 .qmax(128) 2316 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2317 } 2318 } 2319 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,input_zero_point_only)2320 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, input_zero_point_only) { 2321 TEST_REQUIRES_ARM_NEON; 2322 for (size_t channels = 1; channels <= 120; channels += 23) { 2323 DWConvMicrokernelTester() 2324 .cr(24) 2325 .kr(25) 2326 .channels(channels) 2327 .width(3) 2328 .input_zero_point(255) 2329 .kernel_zero_point(0) 2330 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2331 } 2332 } 2333 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,kernel_zero_point_only)2334 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, kernel_zero_point_only) { 2335 TEST_REQUIRES_ARM_NEON; 2336 for (size_t channels = 1; channels <= 120; channels += 23) { 2337 DWConvMicrokernelTester() 2338 .cr(24) 2339 .kr(25) 2340 .channels(channels) 2341 .width(3) 2342 .input_zero_point(0) 2343 .kernel_zero_point(255) 2344 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2345 } 2346 } 2347 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,input_offset)2348 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, input_offset) { 2349 TEST_REQUIRES_ARM_NEON; 2350 for (uint32_t channels = 48; channels < 384; channels += 72) { 2351 DWConvMicrokernelTester() 2352 .cr(24) 2353 .kr(25) 2354 .channels(channels) 2355 .input_offset(464) 2356 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2357 } 2358 } 2359 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8,zero)2360 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL8, zero) { 2361 TEST_REQUIRES_ARM_NEON; 2362 for (uint32_t mz = 0; mz < 25; mz++) { 2363 for (uint32_t channels = 48; channels < 384; channels += 72) { 2364 DWConvMicrokernelTester() 2365 .cr(24) 2366 .kr(25) 2367 .channels(channels) 2368 .input_offset(464) 2369 .zero_index(mz) 2370 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2371 } 2372 } 2373 } 2374 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2375 2376 2377 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_eq_24)2378 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_eq_24) { 2379 TEST_REQUIRES_ARM_NEON; 2380 DWConvMicrokernelTester() 2381 .cr(24) 2382 .kr(25) 2383 .channels(24) 2384 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2385 } 2386 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24)2387 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24) { 2388 TEST_REQUIRES_ARM_NEON; 2389 for (uint32_t channels = 48; channels < 384; channels += 72) { 2390 DWConvMicrokernelTester() 2391 .cr(24) 2392 .kr(25) 2393 .channels(channels) 2394 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2395 } 2396 } 2397 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24_with_qmin)2398 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24_with_qmin) { 2399 TEST_REQUIRES_ARM_NEON; 2400 for (uint32_t channels = 48; channels < 384; channels += 72) { 2401 DWConvMicrokernelTester() 2402 .cr(24) 2403 .kr(25) 2404 .channels(channels) 2405 .qmin(128) 2406 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2407 } 2408 } 2409 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24_with_qmax)2410 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24_with_qmax) { 2411 TEST_REQUIRES_ARM_NEON; 2412 for (uint32_t channels = 48; channels < 384; channels += 72) { 2413 DWConvMicrokernelTester() 2414 .cr(24) 2415 .kr(25) 2416 .channels(channels) 2417 .qmax(128) 2418 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2419 } 2420 } 2421 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_lt_24)2422 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_lt_24) { 2423 TEST_REQUIRES_ARM_NEON; 2424 for (uint32_t channels = 1; channels < 24; channels++) { 2425 DWConvMicrokernelTester() 2426 .cr(24) 2427 .kr(25) 2428 .channels(channels) 2429 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2430 } 2431 } 2432 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24)2433 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24) { 2434 TEST_REQUIRES_ARM_NEON; 2435 for (uint32_t channels = 25; channels < 48; channels++) { 2436 DWConvMicrokernelTester() 2437 .cr(24) 2438 .kr(25) 2439 .channels(channels) 2440 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2441 } 2442 } 2443 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24_with_qmin)2444 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24_with_qmin) { 2445 TEST_REQUIRES_ARM_NEON; 2446 for (uint32_t channels = 25; channels < 48; channels++) { 2447 DWConvMicrokernelTester() 2448 .cr(24) 2449 .kr(25) 2450 .channels(channels) 2451 .qmin(128) 2452 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2453 } 2454 } 2455 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24_with_qmax)2456 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24_with_qmax) { 2457 TEST_REQUIRES_ARM_NEON; 2458 for (uint32_t channels = 25; channels < 48; channels++) { 2459 DWConvMicrokernelTester() 2460 .cr(24) 2461 .kr(25) 2462 .channels(channels) 2463 .qmax(128) 2464 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2465 } 2466 } 2467 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel)2468 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel) { 2469 TEST_REQUIRES_ARM_NEON; 2470 for (size_t channels = 1; channels <= 120; channels += 23) { 2471 DWConvMicrokernelTester() 2472 .cr(24) 2473 .kr(25) 2474 .channels(channels) 2475 .width(3) 2476 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2477 } 2478 } 2479 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_step)2480 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_step) { 2481 TEST_REQUIRES_ARM_NEON; 2482 for (size_t channels = 1; channels <= 120; channels += 23) { 2483 for (size_t step = 2; step <= 25; step++) { 2484 DWConvMicrokernelTester() 2485 .cr(24) 2486 .kr(25) 2487 .channels(channels) 2488 .width(3) 2489 .step(step) 2490 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2491 } 2492 } 2493 } 2494 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_output_stride)2495 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_output_stride) { 2496 TEST_REQUIRES_ARM_NEON; 2497 for (size_t channels = 1; channels <= 120; channels += 23) { 2498 DWConvMicrokernelTester() 2499 .cr(24) 2500 .kr(25) 2501 .channels(24) 2502 .width(5) 2503 .output_stride(127) 2504 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2505 } 2506 } 2507 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_qmin)2508 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_qmin) { 2509 TEST_REQUIRES_ARM_NEON; 2510 for (size_t channels = 1; channels <= 120; channels += 23) { 2511 DWConvMicrokernelTester() 2512 .cr(24) 2513 .kr(25) 2514 .channels(channels) 2515 .width(3) 2516 .qmin(128) 2517 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2518 } 2519 } 2520 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_qmax)2521 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_qmax) { 2522 TEST_REQUIRES_ARM_NEON; 2523 for (size_t channels = 1; channels <= 120; channels += 23) { 2524 DWConvMicrokernelTester() 2525 .cr(24) 2526 .kr(25) 2527 .channels(channels) 2528 .width(3) 2529 .qmax(128) 2530 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2531 } 2532 } 2533 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,input_zero_point_only)2534 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, input_zero_point_only) { 2535 TEST_REQUIRES_ARM_NEON; 2536 for (size_t channels = 1; channels <= 120; channels += 23) { 2537 DWConvMicrokernelTester() 2538 .cr(24) 2539 .kr(25) 2540 .channels(channels) 2541 .width(3) 2542 .input_zero_point(255) 2543 .kernel_zero_point(0) 2544 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2545 } 2546 } 2547 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,kernel_zero_point_only)2548 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, kernel_zero_point_only) { 2549 TEST_REQUIRES_ARM_NEON; 2550 for (size_t channels = 1; channels <= 120; channels += 23) { 2551 DWConvMicrokernelTester() 2552 .cr(24) 2553 .kr(25) 2554 .channels(channels) 2555 .width(3) 2556 .input_zero_point(0) 2557 .kernel_zero_point(255) 2558 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2559 } 2560 } 2561 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,input_offset)2562 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, input_offset) { 2563 TEST_REQUIRES_ARM_NEON; 2564 for (uint32_t channels = 48; channels < 384; channels += 72) { 2565 DWConvMicrokernelTester() 2566 .cr(24) 2567 .kr(25) 2568 .channels(channels) 2569 .input_offset(464) 2570 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2571 } 2572 } 2573 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,zero)2574 TEST(QU8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, zero) { 2575 TEST_REQUIRES_ARM_NEON; 2576 for (uint32_t mz = 0; mz < 25; mz++) { 2577 for (uint32_t channels = 48; channels < 384; channels += 72) { 2578 DWConvMicrokernelTester() 2579 .cr(24) 2580 .kr(25) 2581 .channels(channels) 2582 .input_offset(464) 2583 .zero_index(mz) 2584 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2585 } 2586 } 2587 } 2588 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2589 2590 2591 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_eq_32)2592 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_eq_32) { 2593 TEST_REQUIRES_ARM_NEON; 2594 DWConvMicrokernelTester() 2595 .cr(32) 2596 .kr(9) 2597 .channels(32) 2598 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2599 } 2600 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_div_32)2601 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_div_32) { 2602 TEST_REQUIRES_ARM_NEON; 2603 for (uint32_t channels = 64; channels < 512; channels += 96) { 2604 DWConvMicrokernelTester() 2605 .cr(32) 2606 .kr(9) 2607 .channels(channels) 2608 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2609 } 2610 } 2611 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_div_32_with_qmin)2612 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_div_32_with_qmin) { 2613 TEST_REQUIRES_ARM_NEON; 2614 for (uint32_t channels = 64; channels < 512; channels += 96) { 2615 DWConvMicrokernelTester() 2616 .cr(32) 2617 .kr(9) 2618 .channels(channels) 2619 .qmin(128) 2620 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2621 } 2622 } 2623 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_div_32_with_qmax)2624 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_div_32_with_qmax) { 2625 TEST_REQUIRES_ARM_NEON; 2626 for (uint32_t channels = 64; channels < 512; channels += 96) { 2627 DWConvMicrokernelTester() 2628 .cr(32) 2629 .kr(9) 2630 .channels(channels) 2631 .qmax(128) 2632 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2633 } 2634 } 2635 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_lt_32)2636 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_lt_32) { 2637 TEST_REQUIRES_ARM_NEON; 2638 for (uint32_t channels = 1; channels < 32; channels++) { 2639 DWConvMicrokernelTester() 2640 .cr(32) 2641 .kr(9) 2642 .channels(channels) 2643 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2644 } 2645 } 2646 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_gt_32)2647 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_gt_32) { 2648 TEST_REQUIRES_ARM_NEON; 2649 for (uint32_t channels = 33; channels < 64; channels++) { 2650 DWConvMicrokernelTester() 2651 .cr(32) 2652 .kr(9) 2653 .channels(channels) 2654 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2655 } 2656 } 2657 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_gt_32_with_qmin)2658 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_gt_32_with_qmin) { 2659 TEST_REQUIRES_ARM_NEON; 2660 for (uint32_t channels = 33; channels < 64; channels++) { 2661 DWConvMicrokernelTester() 2662 .cr(32) 2663 .kr(9) 2664 .channels(channels) 2665 .qmin(128) 2666 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2667 } 2668 } 2669 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,c_gt_32_with_qmax)2670 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, c_gt_32_with_qmax) { 2671 TEST_REQUIRES_ARM_NEON; 2672 for (uint32_t channels = 33; channels < 64; channels++) { 2673 DWConvMicrokernelTester() 2674 .cr(32) 2675 .kr(9) 2676 .channels(channels) 2677 .qmax(128) 2678 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2679 } 2680 } 2681 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel)2682 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel) { 2683 TEST_REQUIRES_ARM_NEON; 2684 for (size_t channels = 1; channels <= 160; channels += 31) { 2685 DWConvMicrokernelTester() 2686 .cr(32) 2687 .kr(9) 2688 .channels(channels) 2689 .width(3) 2690 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2691 } 2692 } 2693 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel_with_step)2694 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel_with_step) { 2695 TEST_REQUIRES_ARM_NEON; 2696 for (size_t channels = 1; channels <= 160; channels += 31) { 2697 for (size_t step = 2; step <= 9; step++) { 2698 DWConvMicrokernelTester() 2699 .cr(32) 2700 .kr(9) 2701 .channels(channels) 2702 .width(3) 2703 .step(step) 2704 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2705 } 2706 } 2707 } 2708 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel_with_output_stride)2709 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel_with_output_stride) { 2710 TEST_REQUIRES_ARM_NEON; 2711 for (size_t channels = 1; channels <= 160; channels += 31) { 2712 DWConvMicrokernelTester() 2713 .cr(32) 2714 .kr(9) 2715 .channels(32) 2716 .width(5) 2717 .output_stride(163) 2718 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2719 } 2720 } 2721 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel_with_qmin)2722 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel_with_qmin) { 2723 TEST_REQUIRES_ARM_NEON; 2724 for (size_t channels = 1; channels <= 160; channels += 31) { 2725 DWConvMicrokernelTester() 2726 .cr(32) 2727 .kr(9) 2728 .channels(channels) 2729 .width(3) 2730 .qmin(128) 2731 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2732 } 2733 } 2734 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,multipixel_with_qmax)2735 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, multipixel_with_qmax) { 2736 TEST_REQUIRES_ARM_NEON; 2737 for (size_t channels = 1; channels <= 160; channels += 31) { 2738 DWConvMicrokernelTester() 2739 .cr(32) 2740 .kr(9) 2741 .channels(channels) 2742 .width(3) 2743 .qmax(128) 2744 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2745 } 2746 } 2747 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,input_zero_point_only)2748 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, input_zero_point_only) { 2749 TEST_REQUIRES_ARM_NEON; 2750 for (size_t channels = 1; channels <= 160; channels += 31) { 2751 DWConvMicrokernelTester() 2752 .cr(32) 2753 .kr(9) 2754 .channels(channels) 2755 .width(3) 2756 .input_zero_point(255) 2757 .kernel_zero_point(0) 2758 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2759 } 2760 } 2761 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,kernel_zero_point_only)2762 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, kernel_zero_point_only) { 2763 TEST_REQUIRES_ARM_NEON; 2764 for (size_t channels = 1; channels <= 160; channels += 31) { 2765 DWConvMicrokernelTester() 2766 .cr(32) 2767 .kr(9) 2768 .channels(channels) 2769 .width(3) 2770 .input_zero_point(0) 2771 .kernel_zero_point(255) 2772 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2773 } 2774 } 2775 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,input_offset)2776 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, input_offset) { 2777 TEST_REQUIRES_ARM_NEON; 2778 for (uint32_t channels = 64; channels < 512; channels += 96) { 2779 DWConvMicrokernelTester() 2780 .cr(32) 2781 .kr(9) 2782 .channels(channels) 2783 .input_offset(592) 2784 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2785 } 2786 } 2787 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8,zero)2788 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL8, zero) { 2789 TEST_REQUIRES_ARM_NEON; 2790 for (uint32_t mz = 0; mz < 9; mz++) { 2791 for (uint32_t channels = 64; channels < 512; channels += 96) { 2792 DWConvMicrokernelTester() 2793 .cr(32) 2794 .kr(9) 2795 .channels(channels) 2796 .input_offset(592) 2797 .zero_index(mz) 2798 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2799 } 2800 } 2801 } 2802 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2803 2804 2805 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_eq_32)2806 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_eq_32) { 2807 TEST_REQUIRES_ARM_NEON; 2808 DWConvMicrokernelTester() 2809 .cr(32) 2810 .kr(9) 2811 .channels(32) 2812 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2813 } 2814 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32)2815 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32) { 2816 TEST_REQUIRES_ARM_NEON; 2817 for (uint32_t channels = 64; channels < 512; channels += 96) { 2818 DWConvMicrokernelTester() 2819 .cr(32) 2820 .kr(9) 2821 .channels(channels) 2822 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2823 } 2824 } 2825 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32_with_qmin)2826 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32_with_qmin) { 2827 TEST_REQUIRES_ARM_NEON; 2828 for (uint32_t channels = 64; channels < 512; channels += 96) { 2829 DWConvMicrokernelTester() 2830 .cr(32) 2831 .kr(9) 2832 .channels(channels) 2833 .qmin(128) 2834 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2835 } 2836 } 2837 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32_with_qmax)2838 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32_with_qmax) { 2839 TEST_REQUIRES_ARM_NEON; 2840 for (uint32_t channels = 64; channels < 512; channels += 96) { 2841 DWConvMicrokernelTester() 2842 .cr(32) 2843 .kr(9) 2844 .channels(channels) 2845 .qmax(128) 2846 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2847 } 2848 } 2849 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_lt_32)2850 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_lt_32) { 2851 TEST_REQUIRES_ARM_NEON; 2852 for (uint32_t channels = 1; channels < 32; channels++) { 2853 DWConvMicrokernelTester() 2854 .cr(32) 2855 .kr(9) 2856 .channels(channels) 2857 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2858 } 2859 } 2860 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32)2861 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32) { 2862 TEST_REQUIRES_ARM_NEON; 2863 for (uint32_t channels = 33; channels < 64; channels++) { 2864 DWConvMicrokernelTester() 2865 .cr(32) 2866 .kr(9) 2867 .channels(channels) 2868 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2869 } 2870 } 2871 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32_with_qmin)2872 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32_with_qmin) { 2873 TEST_REQUIRES_ARM_NEON; 2874 for (uint32_t channels = 33; channels < 64; channels++) { 2875 DWConvMicrokernelTester() 2876 .cr(32) 2877 .kr(9) 2878 .channels(channels) 2879 .qmin(128) 2880 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2881 } 2882 } 2883 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32_with_qmax)2884 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32_with_qmax) { 2885 TEST_REQUIRES_ARM_NEON; 2886 for (uint32_t channels = 33; channels < 64; channels++) { 2887 DWConvMicrokernelTester() 2888 .cr(32) 2889 .kr(9) 2890 .channels(channels) 2891 .qmax(128) 2892 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2893 } 2894 } 2895 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel)2896 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel) { 2897 TEST_REQUIRES_ARM_NEON; 2898 for (size_t channels = 1; channels <= 160; channels += 31) { 2899 DWConvMicrokernelTester() 2900 .cr(32) 2901 .kr(9) 2902 .channels(channels) 2903 .width(3) 2904 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2905 } 2906 } 2907 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_step)2908 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_step) { 2909 TEST_REQUIRES_ARM_NEON; 2910 for (size_t channels = 1; channels <= 160; channels += 31) { 2911 for (size_t step = 2; step <= 9; step++) { 2912 DWConvMicrokernelTester() 2913 .cr(32) 2914 .kr(9) 2915 .channels(channels) 2916 .width(3) 2917 .step(step) 2918 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2919 } 2920 } 2921 } 2922 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_output_stride)2923 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_output_stride) { 2924 TEST_REQUIRES_ARM_NEON; 2925 for (size_t channels = 1; channels <= 160; channels += 31) { 2926 DWConvMicrokernelTester() 2927 .cr(32) 2928 .kr(9) 2929 .channels(32) 2930 .width(5) 2931 .output_stride(163) 2932 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2933 } 2934 } 2935 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_qmin)2936 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_qmin) { 2937 TEST_REQUIRES_ARM_NEON; 2938 for (size_t channels = 1; channels <= 160; channels += 31) { 2939 DWConvMicrokernelTester() 2940 .cr(32) 2941 .kr(9) 2942 .channels(channels) 2943 .width(3) 2944 .qmin(128) 2945 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2946 } 2947 } 2948 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_qmax)2949 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_qmax) { 2950 TEST_REQUIRES_ARM_NEON; 2951 for (size_t channels = 1; channels <= 160; channels += 31) { 2952 DWConvMicrokernelTester() 2953 .cr(32) 2954 .kr(9) 2955 .channels(channels) 2956 .width(3) 2957 .qmax(128) 2958 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2959 } 2960 } 2961 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,input_zero_point_only)2962 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, input_zero_point_only) { 2963 TEST_REQUIRES_ARM_NEON; 2964 for (size_t channels = 1; channels <= 160; channels += 31) { 2965 DWConvMicrokernelTester() 2966 .cr(32) 2967 .kr(9) 2968 .channels(channels) 2969 .width(3) 2970 .input_zero_point(255) 2971 .kernel_zero_point(0) 2972 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2973 } 2974 } 2975 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,kernel_zero_point_only)2976 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, kernel_zero_point_only) { 2977 TEST_REQUIRES_ARM_NEON; 2978 for (size_t channels = 1; channels <= 160; channels += 31) { 2979 DWConvMicrokernelTester() 2980 .cr(32) 2981 .kr(9) 2982 .channels(channels) 2983 .width(3) 2984 .input_zero_point(0) 2985 .kernel_zero_point(255) 2986 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2987 } 2988 } 2989 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,input_offset)2990 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, input_offset) { 2991 TEST_REQUIRES_ARM_NEON; 2992 for (uint32_t channels = 64; channels < 512; channels += 96) { 2993 DWConvMicrokernelTester() 2994 .cr(32) 2995 .kr(9) 2996 .channels(channels) 2997 .input_offset(592) 2998 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 2999 } 3000 } 3001 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,zero)3002 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, zero) { 3003 TEST_REQUIRES_ARM_NEON; 3004 for (uint32_t mz = 0; mz < 9; mz++) { 3005 for (uint32_t channels = 64; channels < 512; channels += 96) { 3006 DWConvMicrokernelTester() 3007 .cr(32) 3008 .kr(9) 3009 .channels(channels) 3010 .input_offset(592) 3011 .zero_index(mz) 3012 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3013 } 3014 } 3015 } 3016 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3017 3018 3019 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_eq_32)3020 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_eq_32) { 3021 TEST_REQUIRES_ARM_NEON; 3022 DWConvMicrokernelTester() 3023 .cr(32) 3024 .kr(25) 3025 .channels(32) 3026 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3027 } 3028 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_div_32)3029 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_div_32) { 3030 TEST_REQUIRES_ARM_NEON; 3031 for (uint32_t channels = 64; channels < 512; channels += 96) { 3032 DWConvMicrokernelTester() 3033 .cr(32) 3034 .kr(25) 3035 .channels(channels) 3036 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3037 } 3038 } 3039 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_div_32_with_qmin)3040 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_div_32_with_qmin) { 3041 TEST_REQUIRES_ARM_NEON; 3042 for (uint32_t channels = 64; channels < 512; channels += 96) { 3043 DWConvMicrokernelTester() 3044 .cr(32) 3045 .kr(25) 3046 .channels(channels) 3047 .qmin(128) 3048 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3049 } 3050 } 3051 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_div_32_with_qmax)3052 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_div_32_with_qmax) { 3053 TEST_REQUIRES_ARM_NEON; 3054 for (uint32_t channels = 64; channels < 512; channels += 96) { 3055 DWConvMicrokernelTester() 3056 .cr(32) 3057 .kr(25) 3058 .channels(channels) 3059 .qmax(128) 3060 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3061 } 3062 } 3063 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_lt_32)3064 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_lt_32) { 3065 TEST_REQUIRES_ARM_NEON; 3066 for (uint32_t channels = 1; channels < 32; channels++) { 3067 DWConvMicrokernelTester() 3068 .cr(32) 3069 .kr(25) 3070 .channels(channels) 3071 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3072 } 3073 } 3074 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_gt_32)3075 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_gt_32) { 3076 TEST_REQUIRES_ARM_NEON; 3077 for (uint32_t channels = 33; channels < 64; channels++) { 3078 DWConvMicrokernelTester() 3079 .cr(32) 3080 .kr(25) 3081 .channels(channels) 3082 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3083 } 3084 } 3085 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_gt_32_with_qmin)3086 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_gt_32_with_qmin) { 3087 TEST_REQUIRES_ARM_NEON; 3088 for (uint32_t channels = 33; channels < 64; channels++) { 3089 DWConvMicrokernelTester() 3090 .cr(32) 3091 .kr(25) 3092 .channels(channels) 3093 .qmin(128) 3094 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3095 } 3096 } 3097 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,c_gt_32_with_qmax)3098 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, c_gt_32_with_qmax) { 3099 TEST_REQUIRES_ARM_NEON; 3100 for (uint32_t channels = 33; channels < 64; channels++) { 3101 DWConvMicrokernelTester() 3102 .cr(32) 3103 .kr(25) 3104 .channels(channels) 3105 .qmax(128) 3106 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3107 } 3108 } 3109 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel)3110 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel) { 3111 TEST_REQUIRES_ARM_NEON; 3112 for (size_t channels = 1; channels <= 160; channels += 31) { 3113 DWConvMicrokernelTester() 3114 .cr(32) 3115 .kr(25) 3116 .channels(channels) 3117 .width(3) 3118 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3119 } 3120 } 3121 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel_with_step)3122 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel_with_step) { 3123 TEST_REQUIRES_ARM_NEON; 3124 for (size_t channels = 1; channels <= 160; channels += 31) { 3125 for (size_t step = 2; step <= 25; step++) { 3126 DWConvMicrokernelTester() 3127 .cr(32) 3128 .kr(25) 3129 .channels(channels) 3130 .width(3) 3131 .step(step) 3132 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3133 } 3134 } 3135 } 3136 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel_with_output_stride)3137 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel_with_output_stride) { 3138 TEST_REQUIRES_ARM_NEON; 3139 for (size_t channels = 1; channels <= 160; channels += 31) { 3140 DWConvMicrokernelTester() 3141 .cr(32) 3142 .kr(25) 3143 .channels(32) 3144 .width(5) 3145 .output_stride(163) 3146 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3147 } 3148 } 3149 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel_with_qmin)3150 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel_with_qmin) { 3151 TEST_REQUIRES_ARM_NEON; 3152 for (size_t channels = 1; channels <= 160; channels += 31) { 3153 DWConvMicrokernelTester() 3154 .cr(32) 3155 .kr(25) 3156 .channels(channels) 3157 .width(3) 3158 .qmin(128) 3159 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3160 } 3161 } 3162 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,multipixel_with_qmax)3163 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, multipixel_with_qmax) { 3164 TEST_REQUIRES_ARM_NEON; 3165 for (size_t channels = 1; channels <= 160; channels += 31) { 3166 DWConvMicrokernelTester() 3167 .cr(32) 3168 .kr(25) 3169 .channels(channels) 3170 .width(3) 3171 .qmax(128) 3172 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3173 } 3174 } 3175 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,input_zero_point_only)3176 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, input_zero_point_only) { 3177 TEST_REQUIRES_ARM_NEON; 3178 for (size_t channels = 1; channels <= 160; channels += 31) { 3179 DWConvMicrokernelTester() 3180 .cr(32) 3181 .kr(25) 3182 .channels(channels) 3183 .width(3) 3184 .input_zero_point(255) 3185 .kernel_zero_point(0) 3186 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3187 } 3188 } 3189 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,kernel_zero_point_only)3190 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, kernel_zero_point_only) { 3191 TEST_REQUIRES_ARM_NEON; 3192 for (size_t channels = 1; channels <= 160; channels += 31) { 3193 DWConvMicrokernelTester() 3194 .cr(32) 3195 .kr(25) 3196 .channels(channels) 3197 .width(3) 3198 .input_zero_point(0) 3199 .kernel_zero_point(255) 3200 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3201 } 3202 } 3203 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,input_offset)3204 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, input_offset) { 3205 TEST_REQUIRES_ARM_NEON; 3206 for (uint32_t channels = 64; channels < 512; channels += 96) { 3207 DWConvMicrokernelTester() 3208 .cr(32) 3209 .kr(25) 3210 .channels(channels) 3211 .input_offset(592) 3212 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3213 } 3214 } 3215 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8,zero)3216 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL8, zero) { 3217 TEST_REQUIRES_ARM_NEON; 3218 for (uint32_t mz = 0; mz < 25; mz++) { 3219 for (uint32_t channels = 64; channels < 512; channels += 96) { 3220 DWConvMicrokernelTester() 3221 .cr(32) 3222 .kr(25) 3223 .channels(channels) 3224 .input_offset(592) 3225 .zero_index(mz) 3226 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul8, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3227 } 3228 } 3229 } 3230 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3231 3232 3233 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_eq_32)3234 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_eq_32) { 3235 TEST_REQUIRES_ARM_NEON; 3236 DWConvMicrokernelTester() 3237 .cr(32) 3238 .kr(25) 3239 .channels(32) 3240 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3241 } 3242 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32)3243 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32) { 3244 TEST_REQUIRES_ARM_NEON; 3245 for (uint32_t channels = 64; channels < 512; channels += 96) { 3246 DWConvMicrokernelTester() 3247 .cr(32) 3248 .kr(25) 3249 .channels(channels) 3250 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3251 } 3252 } 3253 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32_with_qmin)3254 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32_with_qmin) { 3255 TEST_REQUIRES_ARM_NEON; 3256 for (uint32_t channels = 64; channels < 512; channels += 96) { 3257 DWConvMicrokernelTester() 3258 .cr(32) 3259 .kr(25) 3260 .channels(channels) 3261 .qmin(128) 3262 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3263 } 3264 } 3265 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32_with_qmax)3266 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32_with_qmax) { 3267 TEST_REQUIRES_ARM_NEON; 3268 for (uint32_t channels = 64; channels < 512; channels += 96) { 3269 DWConvMicrokernelTester() 3270 .cr(32) 3271 .kr(25) 3272 .channels(channels) 3273 .qmax(128) 3274 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3275 } 3276 } 3277 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_lt_32)3278 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_lt_32) { 3279 TEST_REQUIRES_ARM_NEON; 3280 for (uint32_t channels = 1; channels < 32; channels++) { 3281 DWConvMicrokernelTester() 3282 .cr(32) 3283 .kr(25) 3284 .channels(channels) 3285 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3286 } 3287 } 3288 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32)3289 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32) { 3290 TEST_REQUIRES_ARM_NEON; 3291 for (uint32_t channels = 33; channels < 64; channels++) { 3292 DWConvMicrokernelTester() 3293 .cr(32) 3294 .kr(25) 3295 .channels(channels) 3296 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3297 } 3298 } 3299 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32_with_qmin)3300 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32_with_qmin) { 3301 TEST_REQUIRES_ARM_NEON; 3302 for (uint32_t channels = 33; channels < 64; channels++) { 3303 DWConvMicrokernelTester() 3304 .cr(32) 3305 .kr(25) 3306 .channels(channels) 3307 .qmin(128) 3308 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3309 } 3310 } 3311 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32_with_qmax)3312 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32_with_qmax) { 3313 TEST_REQUIRES_ARM_NEON; 3314 for (uint32_t channels = 33; channels < 64; channels++) { 3315 DWConvMicrokernelTester() 3316 .cr(32) 3317 .kr(25) 3318 .channels(channels) 3319 .qmax(128) 3320 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3321 } 3322 } 3323 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel)3324 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel) { 3325 TEST_REQUIRES_ARM_NEON; 3326 for (size_t channels = 1; channels <= 160; channels += 31) { 3327 DWConvMicrokernelTester() 3328 .cr(32) 3329 .kr(25) 3330 .channels(channels) 3331 .width(3) 3332 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3333 } 3334 } 3335 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_step)3336 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_step) { 3337 TEST_REQUIRES_ARM_NEON; 3338 for (size_t channels = 1; channels <= 160; channels += 31) { 3339 for (size_t step = 2; step <= 25; step++) { 3340 DWConvMicrokernelTester() 3341 .cr(32) 3342 .kr(25) 3343 .channels(channels) 3344 .width(3) 3345 .step(step) 3346 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3347 } 3348 } 3349 } 3350 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_output_stride)3351 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_output_stride) { 3352 TEST_REQUIRES_ARM_NEON; 3353 for (size_t channels = 1; channels <= 160; channels += 31) { 3354 DWConvMicrokernelTester() 3355 .cr(32) 3356 .kr(25) 3357 .channels(32) 3358 .width(5) 3359 .output_stride(163) 3360 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3361 } 3362 } 3363 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_qmin)3364 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_qmin) { 3365 TEST_REQUIRES_ARM_NEON; 3366 for (size_t channels = 1; channels <= 160; channels += 31) { 3367 DWConvMicrokernelTester() 3368 .cr(32) 3369 .kr(25) 3370 .channels(channels) 3371 .width(3) 3372 .qmin(128) 3373 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3374 } 3375 } 3376 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_qmax)3377 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_qmax) { 3378 TEST_REQUIRES_ARM_NEON; 3379 for (size_t channels = 1; channels <= 160; channels += 31) { 3380 DWConvMicrokernelTester() 3381 .cr(32) 3382 .kr(25) 3383 .channels(channels) 3384 .width(3) 3385 .qmax(128) 3386 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3387 } 3388 } 3389 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,input_zero_point_only)3390 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, input_zero_point_only) { 3391 TEST_REQUIRES_ARM_NEON; 3392 for (size_t channels = 1; channels <= 160; channels += 31) { 3393 DWConvMicrokernelTester() 3394 .cr(32) 3395 .kr(25) 3396 .channels(channels) 3397 .width(3) 3398 .input_zero_point(255) 3399 .kernel_zero_point(0) 3400 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3401 } 3402 } 3403 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,kernel_zero_point_only)3404 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, kernel_zero_point_only) { 3405 TEST_REQUIRES_ARM_NEON; 3406 for (size_t channels = 1; channels <= 160; channels += 31) { 3407 DWConvMicrokernelTester() 3408 .cr(32) 3409 .kr(25) 3410 .channels(channels) 3411 .width(3) 3412 .input_zero_point(0) 3413 .kernel_zero_point(255) 3414 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3415 } 3416 } 3417 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,input_offset)3418 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, input_offset) { 3419 TEST_REQUIRES_ARM_NEON; 3420 for (uint32_t channels = 64; channels < 512; channels += 96) { 3421 DWConvMicrokernelTester() 3422 .cr(32) 3423 .kr(25) 3424 .channels(channels) 3425 .input_offset(592) 3426 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3427 } 3428 } 3429 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,zero)3430 TEST(QU8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, zero) { 3431 TEST_REQUIRES_ARM_NEON; 3432 for (uint32_t mz = 0; mz < 25; mz++) { 3433 for (uint32_t channels = 64; channels < 512; channels += 96) { 3434 DWConvMicrokernelTester() 3435 .cr(32) 3436 .kr(25) 3437 .channels(channels) 3438 .input_offset(592) 3439 .zero_index(mz) 3440 .Test(xnn_qu8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu); 3441 } 3442 } 3443 } 3444 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3445