1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/qs8-igemm-minmax-rndnu.yaml 11 // Generator: tools/generate-gemm-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/allocator.h> 17 #include <xnnpack/common.h> 18 #include <xnnpack/isa-checks.h> 19 #include <xnnpack/microparams-init.h> 20 21 #include <xnnpack/gemm.h> 22 #include <xnnpack/igemm.h> 23 #include <xnnpack/ppmm.h> 24 #include "gemm-microkernel-tester.h" 25 26 27 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8)28 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8) { 29 TEST_REQUIRES_ARM_NEON; 30 GemmMicrokernelTester() 31 .mr(1) 32 .nr(8) 33 .kr(1) 34 .sr(1) 35 .m(1) 36 .n(8) 37 .k(8) 38 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 39 } 40 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cn)41 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cn) { 42 TEST_REQUIRES_ARM_NEON; 43 GemmMicrokernelTester() 44 .mr(1) 45 .nr(8) 46 .kr(1) 47 .sr(1) 48 .m(1) 49 .n(8) 50 .k(8) 51 .cn_stride(11) 52 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 53 } 54 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile)55 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile) { 56 TEST_REQUIRES_ARM_NEON; 57 for (uint32_t n = 1; n <= 8; n++) { 58 for (uint32_t m = 1; m <= 1; m++) { 59 GemmMicrokernelTester() 60 .mr(1) 61 .nr(8) 62 .kr(1) 63 .sr(1) 64 .m(m) 65 .n(n) 66 .k(8) 67 .iterations(1) 68 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 69 } 70 } 71 } 72 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_m)73 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_m) { 74 TEST_REQUIRES_ARM_NEON; 75 for (uint32_t m = 1; m <= 1; m++) { 76 GemmMicrokernelTester() 77 .mr(1) 78 .nr(8) 79 .kr(1) 80 .sr(1) 81 .m(m) 82 .n(8) 83 .k(8) 84 .iterations(1) 85 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 86 } 87 } 88 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_eq_8_subtile_n)89 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_eq_8_subtile_n) { 90 TEST_REQUIRES_ARM_NEON; 91 for (uint32_t n = 1; n <= 8; n++) { 92 GemmMicrokernelTester() 93 .mr(1) 94 .nr(8) 95 .kr(1) 96 .sr(1) 97 .m(1) 98 .n(n) 99 .k(8) 100 .iterations(1) 101 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 102 } 103 } 104 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8)105 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8) { 106 TEST_REQUIRES_ARM_NEON; 107 for (size_t k = 1; k < 8; k++) { 108 GemmMicrokernelTester() 109 .mr(1) 110 .nr(8) 111 .kr(1) 112 .sr(1) 113 .m(1) 114 .n(8) 115 .k(k) 116 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 117 } 118 } 119 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_lt_8_subtile)120 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_lt_8_subtile) { 121 TEST_REQUIRES_ARM_NEON; 122 for (size_t k = 1; k < 8; k++) { 123 for (uint32_t n = 1; n <= 8; n++) { 124 for (uint32_t m = 1; m <= 1; m++) { 125 GemmMicrokernelTester() 126 .mr(1) 127 .nr(8) 128 .kr(1) 129 .sr(1) 130 .m(m) 131 .n(n) 132 .k(k) 133 .iterations(1) 134 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 135 } 136 } 137 } 138 } 139 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8)140 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8) { 141 TEST_REQUIRES_ARM_NEON; 142 for (size_t k = 9; k < 16; k++) { 143 GemmMicrokernelTester() 144 .mr(1) 145 .nr(8) 146 .kr(1) 147 .sr(1) 148 .m(1) 149 .n(8) 150 .k(k) 151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 152 } 153 } 154 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_gt_8_subtile)155 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_gt_8_subtile) { 156 TEST_REQUIRES_ARM_NEON; 157 for (size_t k = 9; k < 16; k++) { 158 for (uint32_t n = 1; n <= 8; n++) { 159 for (uint32_t m = 1; m <= 1; m++) { 160 GemmMicrokernelTester() 161 .mr(1) 162 .nr(8) 163 .kr(1) 164 .sr(1) 165 .m(m) 166 .n(n) 167 .k(k) 168 .iterations(1) 169 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 170 } 171 } 172 } 173 } 174 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8)175 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8) { 176 TEST_REQUIRES_ARM_NEON; 177 for (size_t k = 16; k <= 80; k += 8) { 178 GemmMicrokernelTester() 179 .mr(1) 180 .nr(8) 181 .kr(1) 182 .sr(1) 183 .m(1) 184 .n(8) 185 .k(k) 186 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 187 } 188 } 189 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,k_div_8_subtile)190 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, k_div_8_subtile) { 191 TEST_REQUIRES_ARM_NEON; 192 for (size_t k = 16; k <= 80; k += 8) { 193 for (uint32_t n = 1; n <= 8; n++) { 194 for (uint32_t m = 1; m <= 1; m++) { 195 GemmMicrokernelTester() 196 .mr(1) 197 .nr(8) 198 .kr(1) 199 .sr(1) 200 .m(m) 201 .n(n) 202 .k(k) 203 .iterations(1) 204 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 205 } 206 } 207 } 208 } 209 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8)210 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8) { 211 TEST_REQUIRES_ARM_NEON; 212 for (uint32_t n = 9; n < 16; n++) { 213 for (size_t k = 1; k <= 40; k += 9) { 214 GemmMicrokernelTester() 215 .mr(1) 216 .nr(8) 217 .kr(1) 218 .sr(1) 219 .m(1) 220 .n(n) 221 .k(k) 222 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 223 } 224 } 225 } 226 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_strided_cn)227 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_strided_cn) { 228 TEST_REQUIRES_ARM_NEON; 229 for (uint32_t n = 9; n < 16; n++) { 230 for (size_t k = 1; k <= 40; k += 9) { 231 GemmMicrokernelTester() 232 .mr(1) 233 .nr(8) 234 .kr(1) 235 .sr(1) 236 .m(1) 237 .n(n) 238 .k(k) 239 .cn_stride(11) 240 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 241 } 242 } 243 } 244 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_subtile)245 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_subtile) { 246 TEST_REQUIRES_ARM_NEON; 247 for (uint32_t n = 9; n < 16; n++) { 248 for (size_t k = 1; k <= 40; k += 9) { 249 for (uint32_t m = 1; m <= 1; m++) { 250 GemmMicrokernelTester() 251 .mr(1) 252 .nr(8) 253 .kr(1) 254 .sr(1) 255 .m(m) 256 .n(n) 257 .k(k) 258 .iterations(1) 259 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 260 } 261 } 262 } 263 } 264 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8)265 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8) { 266 TEST_REQUIRES_ARM_NEON; 267 for (uint32_t n = 16; n <= 24; n += 8) { 268 for (size_t k = 1; k <= 40; k += 9) { 269 GemmMicrokernelTester() 270 .mr(1) 271 .nr(8) 272 .kr(1) 273 .sr(1) 274 .m(1) 275 .n(n) 276 .k(k) 277 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 278 } 279 } 280 } 281 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_strided_cn)282 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_strided_cn) { 283 TEST_REQUIRES_ARM_NEON; 284 for (uint32_t n = 16; n <= 24; n += 8) { 285 for (size_t k = 1; k <= 40; k += 9) { 286 GemmMicrokernelTester() 287 .mr(1) 288 .nr(8) 289 .kr(1) 290 .sr(1) 291 .m(1) 292 .n(n) 293 .k(k) 294 .cn_stride(11) 295 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 296 } 297 } 298 } 299 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_subtile)300 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_subtile) { 301 TEST_REQUIRES_ARM_NEON; 302 for (uint32_t n = 16; n <= 24; n += 8) { 303 for (size_t k = 1; k <= 40; k += 9) { 304 for (uint32_t m = 1; m <= 1; m++) { 305 GemmMicrokernelTester() 306 .mr(1) 307 .nr(8) 308 .kr(1) 309 .sr(1) 310 .m(m) 311 .n(n) 312 .k(k) 313 .iterations(1) 314 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 315 } 316 } 317 } 318 } 319 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,small_kernel)320 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, small_kernel) { 321 TEST_REQUIRES_ARM_NEON; 322 for (size_t k = 1; k <= 40; k += 9) { 323 GemmMicrokernelTester() 324 .mr(1) 325 .nr(8) 326 .kr(1) 327 .sr(1) 328 .m(1) 329 .n(8) 330 .k(k) 331 .ks(3) 332 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 333 } 334 } 335 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,small_kernel_subtile)336 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, small_kernel_subtile) { 337 TEST_REQUIRES_ARM_NEON; 338 for (size_t k = 1; k <= 40; k += 9) { 339 for (uint32_t n = 1; n <= 8; n++) { 340 for (uint32_t m = 1; m <= 1; m++) { 341 GemmMicrokernelTester() 342 .mr(1) 343 .nr(8) 344 .kr(1) 345 .sr(1) 346 .m(m) 347 .n(n) 348 .k(k) 349 .ks(3) 350 .iterations(1) 351 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 352 } 353 } 354 } 355 } 356 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_gt_8_small_kernel)357 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_gt_8_small_kernel) { 358 TEST_REQUIRES_ARM_NEON; 359 for (uint32_t n = 9; n < 16; n++) { 360 for (size_t k = 1; k <= 40; k += 9) { 361 GemmMicrokernelTester() 362 .mr(1) 363 .nr(8) 364 .kr(1) 365 .sr(1) 366 .m(1) 367 .n(n) 368 .k(k) 369 .ks(3) 370 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 371 } 372 } 373 } 374 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,n_div_8_small_kernel)375 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, n_div_8_small_kernel) { 376 TEST_REQUIRES_ARM_NEON; 377 for (uint32_t n = 16; n <= 24; n += 8) { 378 for (size_t k = 1; k <= 40; k += 9) { 379 GemmMicrokernelTester() 380 .mr(1) 381 .nr(8) 382 .kr(1) 383 .sr(1) 384 .m(1) 385 .n(n) 386 .k(k) 387 .ks(3) 388 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 389 } 390 } 391 } 392 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm_subtile)393 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm_subtile) { 394 TEST_REQUIRES_ARM_NEON; 395 for (size_t k = 1; k <= 40; k += 9) { 396 for (uint32_t n = 1; n <= 8; n++) { 397 for (uint32_t m = 1; m <= 1; m++) { 398 GemmMicrokernelTester() 399 .mr(1) 400 .nr(8) 401 .kr(1) 402 .sr(1) 403 .m(m) 404 .n(n) 405 .k(k) 406 .cm_stride(11) 407 .iterations(1) 408 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 409 } 410 } 411 } 412 } 413 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,a_offset)414 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, a_offset) { 415 TEST_REQUIRES_ARM_NEON; 416 for (size_t k = 1; k <= 40; k += 9) { 417 GemmMicrokernelTester() 418 .mr(1) 419 .nr(8) 420 .kr(1) 421 .sr(1) 422 .m(1) 423 .n(8) 424 .k(k) 425 .ks(3) 426 .a_offset(43) 427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 428 } 429 } 430 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,zero)431 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, zero) { 432 TEST_REQUIRES_ARM_NEON; 433 for (size_t k = 1; k <= 40; k += 9) { 434 for (uint32_t mz = 0; mz < 1; mz++) { 435 GemmMicrokernelTester() 436 .mr(1) 437 .nr(8) 438 .kr(1) 439 .sr(1) 440 .m(1) 441 .n(8) 442 .k(k) 443 .ks(3) 444 .a_offset(43) 445 .zero_index(mz) 446 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 447 } 448 } 449 } 450 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmin)451 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmin) { 452 TEST_REQUIRES_ARM_NEON; 453 GemmMicrokernelTester() 454 .mr(1) 455 .nr(8) 456 .kr(1) 457 .sr(1) 458 .m(1) 459 .n(8) 460 .k(8) 461 .qmin(128) 462 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 463 } 464 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,qmax)465 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, qmax) { 466 TEST_REQUIRES_ARM_NEON; 467 GemmMicrokernelTester() 468 .mr(1) 469 .nr(8) 470 .kr(1) 471 .sr(1) 472 .m(1) 473 .n(8) 474 .k(8) 475 .qmax(128) 476 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 477 } 478 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7,strided_cm)479 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__AARCH32_NEON_MLAL_LANE_CORTEX_A7, strided_cm) { 480 TEST_REQUIRES_ARM_NEON; 481 GemmMicrokernelTester() 482 .mr(1) 483 .nr(8) 484 .kr(1) 485 .sr(1) 486 .m(1) 487 .n(8) 488 .k(8) 489 .cm_stride(11) 490 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__aarch32_neon_mlal_lane_cortex_a7, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 491 } 492 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY 493 494 495 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_eq_16)496 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16) { 497 TEST_REQUIRES_ARM_NEON; 498 GemmMicrokernelTester() 499 .mr(2) 500 .nr(8) 501 .kr(16) 502 .sr(1) 503 .m(2) 504 .n(8) 505 .k(16) 506 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 507 } 508 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,strided_cn)509 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cn) { 510 TEST_REQUIRES_ARM_NEON; 511 GemmMicrokernelTester() 512 .mr(2) 513 .nr(8) 514 .kr(16) 515 .sr(1) 516 .m(2) 517 .n(8) 518 .k(16) 519 .cn_stride(11) 520 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 521 } 522 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile)523 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile) { 524 TEST_REQUIRES_ARM_NEON; 525 for (uint32_t n = 1; n <= 8; n++) { 526 for (uint32_t m = 1; m <= 2; m++) { 527 GemmMicrokernelTester() 528 .mr(2) 529 .nr(8) 530 .kr(16) 531 .sr(1) 532 .m(m) 533 .n(n) 534 .k(16) 535 .iterations(1) 536 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 537 } 538 } 539 } 540 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_m)541 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_m) { 542 TEST_REQUIRES_ARM_NEON; 543 for (uint32_t m = 1; m <= 2; m++) { 544 GemmMicrokernelTester() 545 .mr(2) 546 .nr(8) 547 .kr(16) 548 .sr(1) 549 .m(m) 550 .n(8) 551 .k(16) 552 .iterations(1) 553 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 554 } 555 } 556 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_eq_16_subtile_n)557 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_eq_16_subtile_n) { 558 TEST_REQUIRES_ARM_NEON; 559 for (uint32_t n = 1; n <= 8; n++) { 560 GemmMicrokernelTester() 561 .mr(2) 562 .nr(8) 563 .kr(16) 564 .sr(1) 565 .m(2) 566 .n(n) 567 .k(16) 568 .iterations(1) 569 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 570 } 571 } 572 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_lt_16)573 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_lt_16) { 574 TEST_REQUIRES_ARM_NEON; 575 for (size_t k = 1; k < 16; k++) { 576 GemmMicrokernelTester() 577 .mr(2) 578 .nr(8) 579 .kr(16) 580 .sr(1) 581 .m(2) 582 .n(8) 583 .k(k) 584 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 585 } 586 } 587 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_lt_16_subtile)588 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_lt_16_subtile) { 589 TEST_REQUIRES_ARM_NEON; 590 for (size_t k = 1; k < 16; k++) { 591 for (uint32_t n = 1; n <= 8; n++) { 592 for (uint32_t m = 1; m <= 2; m++) { 593 GemmMicrokernelTester() 594 .mr(2) 595 .nr(8) 596 .kr(16) 597 .sr(1) 598 .m(m) 599 .n(n) 600 .k(k) 601 .iterations(1) 602 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 603 } 604 } 605 } 606 } 607 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_gt_16)608 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_gt_16) { 609 TEST_REQUIRES_ARM_NEON; 610 for (size_t k = 17; k < 32; k++) { 611 GemmMicrokernelTester() 612 .mr(2) 613 .nr(8) 614 .kr(16) 615 .sr(1) 616 .m(2) 617 .n(8) 618 .k(k) 619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 620 } 621 } 622 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_gt_16_subtile)623 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_gt_16_subtile) { 624 TEST_REQUIRES_ARM_NEON; 625 for (size_t k = 17; k < 32; k++) { 626 for (uint32_t n = 1; n <= 8; n++) { 627 for (uint32_t m = 1; m <= 2; m++) { 628 GemmMicrokernelTester() 629 .mr(2) 630 .nr(8) 631 .kr(16) 632 .sr(1) 633 .m(m) 634 .n(n) 635 .k(k) 636 .iterations(1) 637 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 638 } 639 } 640 } 641 } 642 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_div_16)643 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_div_16) { 644 TEST_REQUIRES_ARM_NEON; 645 for (size_t k = 32; k <= 160; k += 16) { 646 GemmMicrokernelTester() 647 .mr(2) 648 .nr(8) 649 .kr(16) 650 .sr(1) 651 .m(2) 652 .n(8) 653 .k(k) 654 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 655 } 656 } 657 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,k_div_16_subtile)658 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, k_div_16_subtile) { 659 TEST_REQUIRES_ARM_NEON; 660 for (size_t k = 32; k <= 160; k += 16) { 661 for (uint32_t n = 1; n <= 8; n++) { 662 for (uint32_t m = 1; m <= 2; m++) { 663 GemmMicrokernelTester() 664 .mr(2) 665 .nr(8) 666 .kr(16) 667 .sr(1) 668 .m(m) 669 .n(n) 670 .k(k) 671 .iterations(1) 672 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 673 } 674 } 675 } 676 } 677 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,n_gt_8)678 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8) { 679 TEST_REQUIRES_ARM_NEON; 680 for (uint32_t n = 9; n < 16; n++) { 681 for (size_t k = 1; k <= 80; k += 17) { 682 GemmMicrokernelTester() 683 .mr(2) 684 .nr(8) 685 .kr(16) 686 .sr(1) 687 .m(2) 688 .n(n) 689 .k(k) 690 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 691 } 692 } 693 } 694 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,n_gt_8_strided_cn)695 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_strided_cn) { 696 TEST_REQUIRES_ARM_NEON; 697 for (uint32_t n = 9; n < 16; n++) { 698 for (size_t k = 1; k <= 80; k += 17) { 699 GemmMicrokernelTester() 700 .mr(2) 701 .nr(8) 702 .kr(16) 703 .sr(1) 704 .m(2) 705 .n(n) 706 .k(k) 707 .cn_stride(11) 708 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 709 } 710 } 711 } 712 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,n_gt_8_subtile)713 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_subtile) { 714 TEST_REQUIRES_ARM_NEON; 715 for (uint32_t n = 9; n < 16; n++) { 716 for (size_t k = 1; k <= 80; k += 17) { 717 for (uint32_t m = 1; m <= 2; m++) { 718 GemmMicrokernelTester() 719 .mr(2) 720 .nr(8) 721 .kr(16) 722 .sr(1) 723 .m(m) 724 .n(n) 725 .k(k) 726 .iterations(1) 727 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 728 } 729 } 730 } 731 } 732 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,n_div_8)733 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8) { 734 TEST_REQUIRES_ARM_NEON; 735 for (uint32_t n = 16; n <= 24; n += 8) { 736 for (size_t k = 1; k <= 80; k += 17) { 737 GemmMicrokernelTester() 738 .mr(2) 739 .nr(8) 740 .kr(16) 741 .sr(1) 742 .m(2) 743 .n(n) 744 .k(k) 745 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 746 } 747 } 748 } 749 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,n_div_8_strided_cn)750 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_strided_cn) { 751 TEST_REQUIRES_ARM_NEON; 752 for (uint32_t n = 16; n <= 24; n += 8) { 753 for (size_t k = 1; k <= 80; k += 17) { 754 GemmMicrokernelTester() 755 .mr(2) 756 .nr(8) 757 .kr(16) 758 .sr(1) 759 .m(2) 760 .n(n) 761 .k(k) 762 .cn_stride(11) 763 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 764 } 765 } 766 } 767 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,n_div_8_subtile)768 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_subtile) { 769 TEST_REQUIRES_ARM_NEON; 770 for (uint32_t n = 16; n <= 24; n += 8) { 771 for (size_t k = 1; k <= 80; k += 17) { 772 for (uint32_t m = 1; m <= 2; m++) { 773 GemmMicrokernelTester() 774 .mr(2) 775 .nr(8) 776 .kr(16) 777 .sr(1) 778 .m(m) 779 .n(n) 780 .k(k) 781 .iterations(1) 782 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 783 } 784 } 785 } 786 } 787 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,small_kernel)788 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, small_kernel) { 789 TEST_REQUIRES_ARM_NEON; 790 for (size_t k = 1; k <= 80; k += 17) { 791 GemmMicrokernelTester() 792 .mr(2) 793 .nr(8) 794 .kr(16) 795 .sr(1) 796 .m(2) 797 .n(8) 798 .k(k) 799 .ks(3) 800 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 801 } 802 } 803 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,small_kernel_subtile)804 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, small_kernel_subtile) { 805 TEST_REQUIRES_ARM_NEON; 806 for (size_t k = 1; k <= 80; k += 17) { 807 for (uint32_t n = 1; n <= 8; n++) { 808 for (uint32_t m = 1; m <= 2; m++) { 809 GemmMicrokernelTester() 810 .mr(2) 811 .nr(8) 812 .kr(16) 813 .sr(1) 814 .m(m) 815 .n(n) 816 .k(k) 817 .ks(3) 818 .iterations(1) 819 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 820 } 821 } 822 } 823 } 824 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,n_gt_8_small_kernel)825 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_gt_8_small_kernel) { 826 TEST_REQUIRES_ARM_NEON; 827 for (uint32_t n = 9; n < 16; n++) { 828 for (size_t k = 1; k <= 80; k += 17) { 829 GemmMicrokernelTester() 830 .mr(2) 831 .nr(8) 832 .kr(16) 833 .sr(1) 834 .m(2) 835 .n(n) 836 .k(k) 837 .ks(3) 838 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 839 } 840 } 841 } 842 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,n_div_8_small_kernel)843 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, n_div_8_small_kernel) { 844 TEST_REQUIRES_ARM_NEON; 845 for (uint32_t n = 16; n <= 24; n += 8) { 846 for (size_t k = 1; k <= 80; k += 17) { 847 GemmMicrokernelTester() 848 .mr(2) 849 .nr(8) 850 .kr(16) 851 .sr(1) 852 .m(2) 853 .n(n) 854 .k(k) 855 .ks(3) 856 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 857 } 858 } 859 } 860 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,strided_cm_subtile)861 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cm_subtile) { 862 TEST_REQUIRES_ARM_NEON; 863 for (size_t k = 1; k <= 80; k += 17) { 864 for (uint32_t n = 1; n <= 8; n++) { 865 for (uint32_t m = 1; m <= 2; m++) { 866 GemmMicrokernelTester() 867 .mr(2) 868 .nr(8) 869 .kr(16) 870 .sr(1) 871 .m(m) 872 .n(n) 873 .k(k) 874 .cm_stride(11) 875 .iterations(1) 876 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 877 } 878 } 879 } 880 } 881 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,a_offset)882 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, a_offset) { 883 TEST_REQUIRES_ARM_NEON; 884 for (size_t k = 1; k <= 80; k += 17) { 885 GemmMicrokernelTester() 886 .mr(2) 887 .nr(8) 888 .kr(16) 889 .sr(1) 890 .m(2) 891 .n(8) 892 .k(k) 893 .ks(3) 894 .a_offset(163) 895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 896 } 897 } 898 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,zero)899 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, zero) { 900 TEST_REQUIRES_ARM_NEON; 901 for (size_t k = 1; k <= 80; k += 17) { 902 for (uint32_t mz = 0; mz < 2; mz++) { 903 GemmMicrokernelTester() 904 .mr(2) 905 .nr(8) 906 .kr(16) 907 .sr(1) 908 .m(2) 909 .n(8) 910 .k(k) 911 .ks(3) 912 .a_offset(163) 913 .zero_index(mz) 914 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 915 } 916 } 917 } 918 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,qmin)919 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, qmin) { 920 TEST_REQUIRES_ARM_NEON; 921 GemmMicrokernelTester() 922 .mr(2) 923 .nr(8) 924 .kr(16) 925 .sr(1) 926 .m(2) 927 .n(8) 928 .k(16) 929 .qmin(128) 930 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 931 } 932 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,qmax)933 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, qmax) { 934 TEST_REQUIRES_ARM_NEON; 935 GemmMicrokernelTester() 936 .mr(2) 937 .nr(8) 938 .kr(16) 939 .sr(1) 940 .m(2) 941 .n(8) 942 .k(16) 943 .qmax(128) 944 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 945 } 946 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL,strided_cm)947 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__AARCH64_NEON_MLAL, strided_cm) { 948 TEST_REQUIRES_ARM_NEON; 949 GemmMicrokernelTester() 950 .mr(2) 951 .nr(8) 952 .kr(16) 953 .sr(1) 954 .m(2) 955 .n(8) 956 .k(16) 957 .cm_stride(11) 958 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 959 } 960 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 961 962 963 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)964 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) { 965 TEST_REQUIRES_ARM_NEON; 966 GemmMicrokernelTester() 967 .mr(4) 968 .nr(16) 969 .kr(1) 970 .sr(1) 971 .m(4) 972 .n(16) 973 .k(8) 974 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 975 } 976 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cn)977 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) { 978 TEST_REQUIRES_ARM_NEON; 979 GemmMicrokernelTester() 980 .mr(4) 981 .nr(16) 982 .kr(1) 983 .sr(1) 984 .m(4) 985 .n(16) 986 .k(8) 987 .cn_stride(19) 988 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 989 } 990 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)991 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) { 992 TEST_REQUIRES_ARM_NEON; 993 for (uint32_t n = 1; n <= 16; n++) { 994 for (uint32_t m = 1; m <= 4; m++) { 995 GemmMicrokernelTester() 996 .mr(4) 997 .nr(16) 998 .kr(1) 999 .sr(1) 1000 .m(m) 1001 .n(n) 1002 .k(8) 1003 .iterations(1) 1004 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1005 } 1006 } 1007 } 1008 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)1009 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) { 1010 TEST_REQUIRES_ARM_NEON; 1011 for (uint32_t m = 1; m <= 4; m++) { 1012 GemmMicrokernelTester() 1013 .mr(4) 1014 .nr(16) 1015 .kr(1) 1016 .sr(1) 1017 .m(m) 1018 .n(16) 1019 .k(8) 1020 .iterations(1) 1021 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1022 } 1023 } 1024 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)1025 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) { 1026 TEST_REQUIRES_ARM_NEON; 1027 for (uint32_t n = 1; n <= 16; n++) { 1028 GemmMicrokernelTester() 1029 .mr(4) 1030 .nr(16) 1031 .kr(1) 1032 .sr(1) 1033 .m(4) 1034 .n(n) 1035 .k(8) 1036 .iterations(1) 1037 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1038 } 1039 } 1040 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)1041 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) { 1042 TEST_REQUIRES_ARM_NEON; 1043 for (size_t k = 1; k < 8; k++) { 1044 GemmMicrokernelTester() 1045 .mr(4) 1046 .nr(16) 1047 .kr(1) 1048 .sr(1) 1049 .m(4) 1050 .n(16) 1051 .k(k) 1052 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1053 } 1054 } 1055 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)1056 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) { 1057 TEST_REQUIRES_ARM_NEON; 1058 for (size_t k = 1; k < 8; k++) { 1059 for (uint32_t n = 1; n <= 16; n++) { 1060 for (uint32_t m = 1; m <= 4; m++) { 1061 GemmMicrokernelTester() 1062 .mr(4) 1063 .nr(16) 1064 .kr(1) 1065 .sr(1) 1066 .m(m) 1067 .n(n) 1068 .k(k) 1069 .iterations(1) 1070 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1071 } 1072 } 1073 } 1074 } 1075 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)1076 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) { 1077 TEST_REQUIRES_ARM_NEON; 1078 for (size_t k = 9; k < 16; k++) { 1079 GemmMicrokernelTester() 1080 .mr(4) 1081 .nr(16) 1082 .kr(1) 1083 .sr(1) 1084 .m(4) 1085 .n(16) 1086 .k(k) 1087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1088 } 1089 } 1090 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)1091 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) { 1092 TEST_REQUIRES_ARM_NEON; 1093 for (size_t k = 9; k < 16; k++) { 1094 for (uint32_t n = 1; n <= 16; n++) { 1095 for (uint32_t m = 1; m <= 4; m++) { 1096 GemmMicrokernelTester() 1097 .mr(4) 1098 .nr(16) 1099 .kr(1) 1100 .sr(1) 1101 .m(m) 1102 .n(n) 1103 .k(k) 1104 .iterations(1) 1105 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1106 } 1107 } 1108 } 1109 } 1110 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8)1111 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) { 1112 TEST_REQUIRES_ARM_NEON; 1113 for (size_t k = 16; k <= 80; k += 8) { 1114 GemmMicrokernelTester() 1115 .mr(4) 1116 .nr(16) 1117 .kr(1) 1118 .sr(1) 1119 .m(4) 1120 .n(16) 1121 .k(k) 1122 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1123 } 1124 } 1125 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)1126 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) { 1127 TEST_REQUIRES_ARM_NEON; 1128 for (size_t k = 16; k <= 80; k += 8) { 1129 for (uint32_t n = 1; n <= 16; n++) { 1130 for (uint32_t m = 1; m <= 4; m++) { 1131 GemmMicrokernelTester() 1132 .mr(4) 1133 .nr(16) 1134 .kr(1) 1135 .sr(1) 1136 .m(m) 1137 .n(n) 1138 .k(k) 1139 .iterations(1) 1140 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1141 } 1142 } 1143 } 1144 } 1145 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16)1146 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) { 1147 TEST_REQUIRES_ARM_NEON; 1148 for (uint32_t n = 17; n < 32; n++) { 1149 for (size_t k = 1; k <= 40; k += 9) { 1150 GemmMicrokernelTester() 1151 .mr(4) 1152 .nr(16) 1153 .kr(1) 1154 .sr(1) 1155 .m(4) 1156 .n(n) 1157 .k(k) 1158 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1159 } 1160 } 1161 } 1162 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_strided_cn)1163 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) { 1164 TEST_REQUIRES_ARM_NEON; 1165 for (uint32_t n = 17; n < 32; n++) { 1166 for (size_t k = 1; k <= 40; k += 9) { 1167 GemmMicrokernelTester() 1168 .mr(4) 1169 .nr(16) 1170 .kr(1) 1171 .sr(1) 1172 .m(4) 1173 .n(n) 1174 .k(k) 1175 .cn_stride(19) 1176 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1177 } 1178 } 1179 } 1180 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_subtile)1181 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) { 1182 TEST_REQUIRES_ARM_NEON; 1183 for (uint32_t n = 17; n < 32; n++) { 1184 for (size_t k = 1; k <= 40; k += 9) { 1185 for (uint32_t m = 1; m <= 4; m++) { 1186 GemmMicrokernelTester() 1187 .mr(4) 1188 .nr(16) 1189 .kr(1) 1190 .sr(1) 1191 .m(m) 1192 .n(n) 1193 .k(k) 1194 .iterations(1) 1195 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1196 } 1197 } 1198 } 1199 } 1200 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16)1201 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) { 1202 TEST_REQUIRES_ARM_NEON; 1203 for (uint32_t n = 32; n <= 48; n += 16) { 1204 for (size_t k = 1; k <= 40; k += 9) { 1205 GemmMicrokernelTester() 1206 .mr(4) 1207 .nr(16) 1208 .kr(1) 1209 .sr(1) 1210 .m(4) 1211 .n(n) 1212 .k(k) 1213 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1214 } 1215 } 1216 } 1217 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_strided_cn)1218 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) { 1219 TEST_REQUIRES_ARM_NEON; 1220 for (uint32_t n = 32; n <= 48; n += 16) { 1221 for (size_t k = 1; k <= 40; k += 9) { 1222 GemmMicrokernelTester() 1223 .mr(4) 1224 .nr(16) 1225 .kr(1) 1226 .sr(1) 1227 .m(4) 1228 .n(n) 1229 .k(k) 1230 .cn_stride(19) 1231 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1232 } 1233 } 1234 } 1235 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_subtile)1236 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) { 1237 TEST_REQUIRES_ARM_NEON; 1238 for (uint32_t n = 32; n <= 48; n += 16) { 1239 for (size_t k = 1; k <= 40; k += 9) { 1240 for (uint32_t m = 1; m <= 4; m++) { 1241 GemmMicrokernelTester() 1242 .mr(4) 1243 .nr(16) 1244 .kr(1) 1245 .sr(1) 1246 .m(m) 1247 .n(n) 1248 .k(k) 1249 .iterations(1) 1250 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1251 } 1252 } 1253 } 1254 } 1255 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel)1256 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) { 1257 TEST_REQUIRES_ARM_NEON; 1258 for (size_t k = 1; k <= 40; k += 9) { 1259 GemmMicrokernelTester() 1260 .mr(4) 1261 .nr(16) 1262 .kr(1) 1263 .sr(1) 1264 .m(4) 1265 .n(16) 1266 .k(k) 1267 .ks(3) 1268 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1269 } 1270 } 1271 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)1272 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) { 1273 TEST_REQUIRES_ARM_NEON; 1274 for (size_t k = 1; k <= 40; k += 9) { 1275 for (uint32_t n = 1; n <= 16; n++) { 1276 for (uint32_t m = 1; m <= 4; m++) { 1277 GemmMicrokernelTester() 1278 .mr(4) 1279 .nr(16) 1280 .kr(1) 1281 .sr(1) 1282 .m(m) 1283 .n(n) 1284 .k(k) 1285 .ks(3) 1286 .iterations(1) 1287 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1288 } 1289 } 1290 } 1291 } 1292 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_gt_16_small_kernel)1293 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_small_kernel) { 1294 TEST_REQUIRES_ARM_NEON; 1295 for (uint32_t n = 17; n < 32; n++) { 1296 for (size_t k = 1; k <= 40; k += 9) { 1297 GemmMicrokernelTester() 1298 .mr(4) 1299 .nr(16) 1300 .kr(1) 1301 .sr(1) 1302 .m(4) 1303 .n(n) 1304 .k(k) 1305 .ks(3) 1306 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1307 } 1308 } 1309 } 1310 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,n_div_16_small_kernel)1311 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_small_kernel) { 1312 TEST_REQUIRES_ARM_NEON; 1313 for (uint32_t n = 32; n <= 48; n += 16) { 1314 for (size_t k = 1; k <= 40; k += 9) { 1315 GemmMicrokernelTester() 1316 .mr(4) 1317 .nr(16) 1318 .kr(1) 1319 .sr(1) 1320 .m(4) 1321 .n(n) 1322 .k(k) 1323 .ks(3) 1324 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1325 } 1326 } 1327 } 1328 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)1329 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) { 1330 TEST_REQUIRES_ARM_NEON; 1331 for (size_t k = 1; k <= 40; k += 9) { 1332 for (uint32_t n = 1; n <= 16; n++) { 1333 for (uint32_t m = 1; m <= 4; m++) { 1334 GemmMicrokernelTester() 1335 .mr(4) 1336 .nr(16) 1337 .kr(1) 1338 .sr(1) 1339 .m(m) 1340 .n(n) 1341 .k(k) 1342 .cm_stride(19) 1343 .iterations(1) 1344 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1345 } 1346 } 1347 } 1348 } 1349 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,a_offset)1350 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) { 1351 TEST_REQUIRES_ARM_NEON; 1352 for (size_t k = 1; k <= 40; k += 9) { 1353 GemmMicrokernelTester() 1354 .mr(4) 1355 .nr(16) 1356 .kr(1) 1357 .sr(1) 1358 .m(4) 1359 .n(16) 1360 .k(k) 1361 .ks(3) 1362 .a_offset(163) 1363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1364 } 1365 } 1366 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,zero)1367 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) { 1368 TEST_REQUIRES_ARM_NEON; 1369 for (size_t k = 1; k <= 40; k += 9) { 1370 for (uint32_t mz = 0; mz < 4; mz++) { 1371 GemmMicrokernelTester() 1372 .mr(4) 1373 .nr(16) 1374 .kr(1) 1375 .sr(1) 1376 .m(4) 1377 .n(16) 1378 .k(k) 1379 .ks(3) 1380 .a_offset(163) 1381 .zero_index(mz) 1382 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1383 } 1384 } 1385 } 1386 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmin)1387 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) { 1388 TEST_REQUIRES_ARM_NEON; 1389 GemmMicrokernelTester() 1390 .mr(4) 1391 .nr(16) 1392 .kr(1) 1393 .sr(1) 1394 .m(4) 1395 .n(16) 1396 .k(8) 1397 .qmin(128) 1398 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1399 } 1400 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,qmax)1401 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) { 1402 TEST_REQUIRES_ARM_NEON; 1403 GemmMicrokernelTester() 1404 .mr(4) 1405 .nr(16) 1406 .kr(1) 1407 .sr(1) 1408 .m(4) 1409 .n(16) 1410 .k(8) 1411 .qmax(128) 1412 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1413 } 1414 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64,strided_cm)1415 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) { 1416 TEST_REQUIRES_ARM_NEON; 1417 GemmMicrokernelTester() 1418 .mr(4) 1419 .nr(16) 1420 .kr(1) 1421 .sr(1) 1422 .m(4) 1423 .n(16) 1424 .k(8) 1425 .cm_stride(19) 1426 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1427 } 1428 #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY 1429 1430 1431 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_eq_16)1432 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16) { 1433 TEST_REQUIRES_ARM_NEON; 1434 GemmMicrokernelTester() 1435 .mr(1) 1436 .nr(8) 1437 .kr(2) 1438 .sr(1) 1439 .m(1) 1440 .n(8) 1441 .k(16) 1442 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1443 } 1444 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,strided_cn)1445 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cn) { 1446 TEST_REQUIRES_ARM_NEON; 1447 GemmMicrokernelTester() 1448 .mr(1) 1449 .nr(8) 1450 .kr(2) 1451 .sr(1) 1452 .m(1) 1453 .n(8) 1454 .k(16) 1455 .cn_stride(11) 1456 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1457 } 1458 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile)1459 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile) { 1460 TEST_REQUIRES_ARM_NEON; 1461 for (uint32_t n = 1; n <= 8; n++) { 1462 for (uint32_t m = 1; m <= 1; m++) { 1463 GemmMicrokernelTester() 1464 .mr(1) 1465 .nr(8) 1466 .kr(2) 1467 .sr(1) 1468 .m(m) 1469 .n(n) 1470 .k(16) 1471 .iterations(1) 1472 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1473 } 1474 } 1475 } 1476 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_m)1477 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) { 1478 TEST_REQUIRES_ARM_NEON; 1479 for (uint32_t m = 1; m <= 1; m++) { 1480 GemmMicrokernelTester() 1481 .mr(1) 1482 .nr(8) 1483 .kr(2) 1484 .sr(1) 1485 .m(m) 1486 .n(8) 1487 .k(16) 1488 .iterations(1) 1489 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1490 } 1491 } 1492 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_eq_16_subtile_n)1493 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) { 1494 TEST_REQUIRES_ARM_NEON; 1495 for (uint32_t n = 1; n <= 8; n++) { 1496 GemmMicrokernelTester() 1497 .mr(1) 1498 .nr(8) 1499 .kr(2) 1500 .sr(1) 1501 .m(1) 1502 .n(n) 1503 .k(16) 1504 .iterations(1) 1505 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1506 } 1507 } 1508 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_lt_16)1509 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_lt_16) { 1510 TEST_REQUIRES_ARM_NEON; 1511 for (size_t k = 1; k < 16; k++) { 1512 GemmMicrokernelTester() 1513 .mr(1) 1514 .nr(8) 1515 .kr(2) 1516 .sr(1) 1517 .m(1) 1518 .n(8) 1519 .k(k) 1520 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1521 } 1522 } 1523 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_lt_16_subtile)1524 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_lt_16_subtile) { 1525 TEST_REQUIRES_ARM_NEON; 1526 for (size_t k = 1; k < 16; k++) { 1527 for (uint32_t n = 1; n <= 8; n++) { 1528 for (uint32_t m = 1; m <= 1; m++) { 1529 GemmMicrokernelTester() 1530 .mr(1) 1531 .nr(8) 1532 .kr(2) 1533 .sr(1) 1534 .m(m) 1535 .n(n) 1536 .k(k) 1537 .iterations(1) 1538 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1539 } 1540 } 1541 } 1542 } 1543 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_gt_16)1544 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_gt_16) { 1545 TEST_REQUIRES_ARM_NEON; 1546 for (size_t k = 17; k < 32; k++) { 1547 GemmMicrokernelTester() 1548 .mr(1) 1549 .nr(8) 1550 .kr(2) 1551 .sr(1) 1552 .m(1) 1553 .n(8) 1554 .k(k) 1555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1556 } 1557 } 1558 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_gt_16_subtile)1559 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_gt_16_subtile) { 1560 TEST_REQUIRES_ARM_NEON; 1561 for (size_t k = 17; k < 32; k++) { 1562 for (uint32_t n = 1; n <= 8; n++) { 1563 for (uint32_t m = 1; m <= 1; m++) { 1564 GemmMicrokernelTester() 1565 .mr(1) 1566 .nr(8) 1567 .kr(2) 1568 .sr(1) 1569 .m(m) 1570 .n(n) 1571 .k(k) 1572 .iterations(1) 1573 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1574 } 1575 } 1576 } 1577 } 1578 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_div_16)1579 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_div_16) { 1580 TEST_REQUIRES_ARM_NEON; 1581 for (size_t k = 32; k <= 160; k += 16) { 1582 GemmMicrokernelTester() 1583 .mr(1) 1584 .nr(8) 1585 .kr(2) 1586 .sr(1) 1587 .m(1) 1588 .n(8) 1589 .k(k) 1590 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1591 } 1592 } 1593 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,k_div_16_subtile)1594 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, k_div_16_subtile) { 1595 TEST_REQUIRES_ARM_NEON; 1596 for (size_t k = 32; k <= 160; k += 16) { 1597 for (uint32_t n = 1; n <= 8; n++) { 1598 for (uint32_t m = 1; m <= 1; m++) { 1599 GemmMicrokernelTester() 1600 .mr(1) 1601 .nr(8) 1602 .kr(2) 1603 .sr(1) 1604 .m(m) 1605 .n(n) 1606 .k(k) 1607 .iterations(1) 1608 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1609 } 1610 } 1611 } 1612 } 1613 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,n_gt_8)1614 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8) { 1615 TEST_REQUIRES_ARM_NEON; 1616 for (uint32_t n = 9; n < 16; n++) { 1617 for (size_t k = 1; k <= 80; k += 17) { 1618 GemmMicrokernelTester() 1619 .mr(1) 1620 .nr(8) 1621 .kr(2) 1622 .sr(1) 1623 .m(1) 1624 .n(n) 1625 .k(k) 1626 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1627 } 1628 } 1629 } 1630 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,n_gt_8_strided_cn)1631 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_strided_cn) { 1632 TEST_REQUIRES_ARM_NEON; 1633 for (uint32_t n = 9; n < 16; n++) { 1634 for (size_t k = 1; k <= 80; k += 17) { 1635 GemmMicrokernelTester() 1636 .mr(1) 1637 .nr(8) 1638 .kr(2) 1639 .sr(1) 1640 .m(1) 1641 .n(n) 1642 .k(k) 1643 .cn_stride(11) 1644 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1645 } 1646 } 1647 } 1648 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,n_gt_8_subtile)1649 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_subtile) { 1650 TEST_REQUIRES_ARM_NEON; 1651 for (uint32_t n = 9; n < 16; n++) { 1652 for (size_t k = 1; k <= 80; k += 17) { 1653 for (uint32_t m = 1; m <= 1; m++) { 1654 GemmMicrokernelTester() 1655 .mr(1) 1656 .nr(8) 1657 .kr(2) 1658 .sr(1) 1659 .m(m) 1660 .n(n) 1661 .k(k) 1662 .iterations(1) 1663 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1664 } 1665 } 1666 } 1667 } 1668 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,n_div_8)1669 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8) { 1670 TEST_REQUIRES_ARM_NEON; 1671 for (uint32_t n = 16; n <= 24; n += 8) { 1672 for (size_t k = 1; k <= 80; k += 17) { 1673 GemmMicrokernelTester() 1674 .mr(1) 1675 .nr(8) 1676 .kr(2) 1677 .sr(1) 1678 .m(1) 1679 .n(n) 1680 .k(k) 1681 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1682 } 1683 } 1684 } 1685 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,n_div_8_strided_cn)1686 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_strided_cn) { 1687 TEST_REQUIRES_ARM_NEON; 1688 for (uint32_t n = 16; n <= 24; n += 8) { 1689 for (size_t k = 1; k <= 80; k += 17) { 1690 GemmMicrokernelTester() 1691 .mr(1) 1692 .nr(8) 1693 .kr(2) 1694 .sr(1) 1695 .m(1) 1696 .n(n) 1697 .k(k) 1698 .cn_stride(11) 1699 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1700 } 1701 } 1702 } 1703 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,n_div_8_subtile)1704 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_subtile) { 1705 TEST_REQUIRES_ARM_NEON; 1706 for (uint32_t n = 16; n <= 24; n += 8) { 1707 for (size_t k = 1; k <= 80; k += 17) { 1708 for (uint32_t m = 1; m <= 1; m++) { 1709 GemmMicrokernelTester() 1710 .mr(1) 1711 .nr(8) 1712 .kr(2) 1713 .sr(1) 1714 .m(m) 1715 .n(n) 1716 .k(k) 1717 .iterations(1) 1718 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1719 } 1720 } 1721 } 1722 } 1723 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,small_kernel)1724 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, small_kernel) { 1725 TEST_REQUIRES_ARM_NEON; 1726 for (size_t k = 1; k <= 80; k += 17) { 1727 GemmMicrokernelTester() 1728 .mr(1) 1729 .nr(8) 1730 .kr(2) 1731 .sr(1) 1732 .m(1) 1733 .n(8) 1734 .k(k) 1735 .ks(3) 1736 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1737 } 1738 } 1739 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,small_kernel_subtile)1740 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, small_kernel_subtile) { 1741 TEST_REQUIRES_ARM_NEON; 1742 for (size_t k = 1; k <= 80; k += 17) { 1743 for (uint32_t n = 1; n <= 8; n++) { 1744 for (uint32_t m = 1; m <= 1; m++) { 1745 GemmMicrokernelTester() 1746 .mr(1) 1747 .nr(8) 1748 .kr(2) 1749 .sr(1) 1750 .m(m) 1751 .n(n) 1752 .k(k) 1753 .ks(3) 1754 .iterations(1) 1755 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1756 } 1757 } 1758 } 1759 } 1760 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,n_gt_8_small_kernel)1761 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_gt_8_small_kernel) { 1762 TEST_REQUIRES_ARM_NEON; 1763 for (uint32_t n = 9; n < 16; n++) { 1764 for (size_t k = 1; k <= 80; k += 17) { 1765 GemmMicrokernelTester() 1766 .mr(1) 1767 .nr(8) 1768 .kr(2) 1769 .sr(1) 1770 .m(1) 1771 .n(n) 1772 .k(k) 1773 .ks(3) 1774 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1775 } 1776 } 1777 } 1778 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,n_div_8_small_kernel)1779 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, n_div_8_small_kernel) { 1780 TEST_REQUIRES_ARM_NEON; 1781 for (uint32_t n = 16; n <= 24; n += 8) { 1782 for (size_t k = 1; k <= 80; k += 17) { 1783 GemmMicrokernelTester() 1784 .mr(1) 1785 .nr(8) 1786 .kr(2) 1787 .sr(1) 1788 .m(1) 1789 .n(n) 1790 .k(k) 1791 .ks(3) 1792 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1793 } 1794 } 1795 } 1796 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,strided_cm_subtile)1797 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cm_subtile) { 1798 TEST_REQUIRES_ARM_NEON; 1799 for (size_t k = 1; k <= 80; k += 17) { 1800 for (uint32_t n = 1; n <= 8; n++) { 1801 for (uint32_t m = 1; m <= 1; m++) { 1802 GemmMicrokernelTester() 1803 .mr(1) 1804 .nr(8) 1805 .kr(2) 1806 .sr(1) 1807 .m(m) 1808 .n(n) 1809 .k(k) 1810 .cm_stride(11) 1811 .iterations(1) 1812 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1813 } 1814 } 1815 } 1816 } 1817 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,a_offset)1818 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, a_offset) { 1819 TEST_REQUIRES_ARM_NEON; 1820 for (size_t k = 1; k <= 80; k += 17) { 1821 GemmMicrokernelTester() 1822 .mr(1) 1823 .nr(8) 1824 .kr(2) 1825 .sr(1) 1826 .m(1) 1827 .n(8) 1828 .k(k) 1829 .ks(3) 1830 .a_offset(83) 1831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1832 } 1833 } 1834 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,zero)1835 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, zero) { 1836 TEST_REQUIRES_ARM_NEON; 1837 for (size_t k = 1; k <= 80; k += 17) { 1838 for (uint32_t mz = 0; mz < 1; mz++) { 1839 GemmMicrokernelTester() 1840 .mr(1) 1841 .nr(8) 1842 .kr(2) 1843 .sr(1) 1844 .m(1) 1845 .n(8) 1846 .k(k) 1847 .ks(3) 1848 .a_offset(83) 1849 .zero_index(mz) 1850 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1851 } 1852 } 1853 } 1854 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,qmin)1855 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, qmin) { 1856 TEST_REQUIRES_ARM_NEON; 1857 GemmMicrokernelTester() 1858 .mr(1) 1859 .nr(8) 1860 .kr(2) 1861 .sr(1) 1862 .m(1) 1863 .n(8) 1864 .k(16) 1865 .qmin(128) 1866 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1867 } 1868 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,qmax)1869 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, qmax) { 1870 TEST_REQUIRES_ARM_NEON; 1871 GemmMicrokernelTester() 1872 .mr(1) 1873 .nr(8) 1874 .kr(2) 1875 .sr(1) 1876 .m(1) 1877 .n(8) 1878 .k(16) 1879 .qmax(128) 1880 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1881 } 1882 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R,strided_cm)1883 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD4R, strided_cm) { 1884 TEST_REQUIRES_ARM_NEON; 1885 GemmMicrokernelTester() 1886 .mr(1) 1887 .nr(8) 1888 .kr(2) 1889 .sr(1) 1890 .m(1) 1891 .n(8) 1892 .k(16) 1893 .cm_stride(11) 1894 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1895 } 1896 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1897 1898 1899 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8)1900 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8) { 1901 TEST_REQUIRES_ARM_NEON; 1902 GemmMicrokernelTester() 1903 .mr(1) 1904 .nr(8) 1905 .kr(2) 1906 .sr(1) 1907 .m(1) 1908 .n(8) 1909 .k(8) 1910 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1911 } 1912 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cn)1913 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cn) { 1914 TEST_REQUIRES_ARM_NEON; 1915 GemmMicrokernelTester() 1916 .mr(1) 1917 .nr(8) 1918 .kr(2) 1919 .sr(1) 1920 .m(1) 1921 .n(8) 1922 .k(8) 1923 .cn_stride(11) 1924 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1925 } 1926 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile)1927 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile) { 1928 TEST_REQUIRES_ARM_NEON; 1929 for (uint32_t n = 1; n <= 8; n++) { 1930 for (uint32_t m = 1; m <= 1; m++) { 1931 GemmMicrokernelTester() 1932 .mr(1) 1933 .nr(8) 1934 .kr(2) 1935 .sr(1) 1936 .m(m) 1937 .n(n) 1938 .k(8) 1939 .iterations(1) 1940 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1941 } 1942 } 1943 } 1944 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile_m)1945 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile_m) { 1946 TEST_REQUIRES_ARM_NEON; 1947 for (uint32_t m = 1; m <= 1; m++) { 1948 GemmMicrokernelTester() 1949 .mr(1) 1950 .nr(8) 1951 .kr(2) 1952 .sr(1) 1953 .m(m) 1954 .n(8) 1955 .k(8) 1956 .iterations(1) 1957 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1958 } 1959 } 1960 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_eq_8_subtile_n)1961 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_eq_8_subtile_n) { 1962 TEST_REQUIRES_ARM_NEON; 1963 for (uint32_t n = 1; n <= 8; n++) { 1964 GemmMicrokernelTester() 1965 .mr(1) 1966 .nr(8) 1967 .kr(2) 1968 .sr(1) 1969 .m(1) 1970 .n(n) 1971 .k(8) 1972 .iterations(1) 1973 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1974 } 1975 } 1976 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_lt_8)1977 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8) { 1978 TEST_REQUIRES_ARM_NEON; 1979 for (size_t k = 1; k < 8; k++) { 1980 GemmMicrokernelTester() 1981 .mr(1) 1982 .nr(8) 1983 .kr(2) 1984 .sr(1) 1985 .m(1) 1986 .n(8) 1987 .k(k) 1988 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1989 } 1990 } 1991 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_lt_8_subtile)1992 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_lt_8_subtile) { 1993 TEST_REQUIRES_ARM_NEON; 1994 for (size_t k = 1; k < 8; k++) { 1995 for (uint32_t n = 1; n <= 8; n++) { 1996 for (uint32_t m = 1; m <= 1; m++) { 1997 GemmMicrokernelTester() 1998 .mr(1) 1999 .nr(8) 2000 .kr(2) 2001 .sr(1) 2002 .m(m) 2003 .n(n) 2004 .k(k) 2005 .iterations(1) 2006 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2007 } 2008 } 2009 } 2010 } 2011 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_gt_8)2012 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8) { 2013 TEST_REQUIRES_ARM_NEON; 2014 for (size_t k = 9; k < 16; k++) { 2015 GemmMicrokernelTester() 2016 .mr(1) 2017 .nr(8) 2018 .kr(2) 2019 .sr(1) 2020 .m(1) 2021 .n(8) 2022 .k(k) 2023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2024 } 2025 } 2026 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_gt_8_subtile)2027 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_gt_8_subtile) { 2028 TEST_REQUIRES_ARM_NEON; 2029 for (size_t k = 9; k < 16; k++) { 2030 for (uint32_t n = 1; n <= 8; n++) { 2031 for (uint32_t m = 1; m <= 1; m++) { 2032 GemmMicrokernelTester() 2033 .mr(1) 2034 .nr(8) 2035 .kr(2) 2036 .sr(1) 2037 .m(m) 2038 .n(n) 2039 .k(k) 2040 .iterations(1) 2041 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2042 } 2043 } 2044 } 2045 } 2046 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_div_8)2047 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8) { 2048 TEST_REQUIRES_ARM_NEON; 2049 for (size_t k = 16; k <= 80; k += 8) { 2050 GemmMicrokernelTester() 2051 .mr(1) 2052 .nr(8) 2053 .kr(2) 2054 .sr(1) 2055 .m(1) 2056 .n(8) 2057 .k(k) 2058 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2059 } 2060 } 2061 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,k_div_8_subtile)2062 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, k_div_8_subtile) { 2063 TEST_REQUIRES_ARM_NEON; 2064 for (size_t k = 16; k <= 80; k += 8) { 2065 for (uint32_t n = 1; n <= 8; n++) { 2066 for (uint32_t m = 1; m <= 1; m++) { 2067 GemmMicrokernelTester() 2068 .mr(1) 2069 .nr(8) 2070 .kr(2) 2071 .sr(1) 2072 .m(m) 2073 .n(n) 2074 .k(k) 2075 .iterations(1) 2076 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2077 } 2078 } 2079 } 2080 } 2081 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8)2082 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8) { 2083 TEST_REQUIRES_ARM_NEON; 2084 for (uint32_t n = 9; n < 16; n++) { 2085 for (size_t k = 1; k <= 40; k += 9) { 2086 GemmMicrokernelTester() 2087 .mr(1) 2088 .nr(8) 2089 .kr(2) 2090 .sr(1) 2091 .m(1) 2092 .n(n) 2093 .k(k) 2094 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2095 } 2096 } 2097 } 2098 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_strided_cn)2099 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_strided_cn) { 2100 TEST_REQUIRES_ARM_NEON; 2101 for (uint32_t n = 9; n < 16; n++) { 2102 for (size_t k = 1; k <= 40; k += 9) { 2103 GemmMicrokernelTester() 2104 .mr(1) 2105 .nr(8) 2106 .kr(2) 2107 .sr(1) 2108 .m(1) 2109 .n(n) 2110 .k(k) 2111 .cn_stride(11) 2112 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2113 } 2114 } 2115 } 2116 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_subtile)2117 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_subtile) { 2118 TEST_REQUIRES_ARM_NEON; 2119 for (uint32_t n = 9; n < 16; n++) { 2120 for (size_t k = 1; k <= 40; k += 9) { 2121 for (uint32_t m = 1; m <= 1; m++) { 2122 GemmMicrokernelTester() 2123 .mr(1) 2124 .nr(8) 2125 .kr(2) 2126 .sr(1) 2127 .m(m) 2128 .n(n) 2129 .k(k) 2130 .iterations(1) 2131 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2132 } 2133 } 2134 } 2135 } 2136 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8)2137 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8) { 2138 TEST_REQUIRES_ARM_NEON; 2139 for (uint32_t n = 16; n <= 24; n += 8) { 2140 for (size_t k = 1; k <= 40; k += 9) { 2141 GemmMicrokernelTester() 2142 .mr(1) 2143 .nr(8) 2144 .kr(2) 2145 .sr(1) 2146 .m(1) 2147 .n(n) 2148 .k(k) 2149 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2150 } 2151 } 2152 } 2153 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_strided_cn)2154 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_strided_cn) { 2155 TEST_REQUIRES_ARM_NEON; 2156 for (uint32_t n = 16; n <= 24; n += 8) { 2157 for (size_t k = 1; k <= 40; k += 9) { 2158 GemmMicrokernelTester() 2159 .mr(1) 2160 .nr(8) 2161 .kr(2) 2162 .sr(1) 2163 .m(1) 2164 .n(n) 2165 .k(k) 2166 .cn_stride(11) 2167 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2168 } 2169 } 2170 } 2171 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_subtile)2172 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_subtile) { 2173 TEST_REQUIRES_ARM_NEON; 2174 for (uint32_t n = 16; n <= 24; n += 8) { 2175 for (size_t k = 1; k <= 40; k += 9) { 2176 for (uint32_t m = 1; m <= 1; m++) { 2177 GemmMicrokernelTester() 2178 .mr(1) 2179 .nr(8) 2180 .kr(2) 2181 .sr(1) 2182 .m(m) 2183 .n(n) 2184 .k(k) 2185 .iterations(1) 2186 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2187 } 2188 } 2189 } 2190 } 2191 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,small_kernel)2192 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, small_kernel) { 2193 TEST_REQUIRES_ARM_NEON; 2194 for (size_t k = 1; k <= 40; k += 9) { 2195 GemmMicrokernelTester() 2196 .mr(1) 2197 .nr(8) 2198 .kr(2) 2199 .sr(1) 2200 .m(1) 2201 .n(8) 2202 .k(k) 2203 .ks(3) 2204 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2205 } 2206 } 2207 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,small_kernel_subtile)2208 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, small_kernel_subtile) { 2209 TEST_REQUIRES_ARM_NEON; 2210 for (size_t k = 1; k <= 40; k += 9) { 2211 for (uint32_t n = 1; n <= 8; n++) { 2212 for (uint32_t m = 1; m <= 1; m++) { 2213 GemmMicrokernelTester() 2214 .mr(1) 2215 .nr(8) 2216 .kr(2) 2217 .sr(1) 2218 .m(m) 2219 .n(n) 2220 .k(k) 2221 .ks(3) 2222 .iterations(1) 2223 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2224 } 2225 } 2226 } 2227 } 2228 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_gt_8_small_kernel)2229 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_gt_8_small_kernel) { 2230 TEST_REQUIRES_ARM_NEON; 2231 for (uint32_t n = 9; n < 16; n++) { 2232 for (size_t k = 1; k <= 40; k += 9) { 2233 GemmMicrokernelTester() 2234 .mr(1) 2235 .nr(8) 2236 .kr(2) 2237 .sr(1) 2238 .m(1) 2239 .n(n) 2240 .k(k) 2241 .ks(3) 2242 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2243 } 2244 } 2245 } 2246 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,n_div_8_small_kernel)2247 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, n_div_8_small_kernel) { 2248 TEST_REQUIRES_ARM_NEON; 2249 for (uint32_t n = 16; n <= 24; n += 8) { 2250 for (size_t k = 1; k <= 40; k += 9) { 2251 GemmMicrokernelTester() 2252 .mr(1) 2253 .nr(8) 2254 .kr(2) 2255 .sr(1) 2256 .m(1) 2257 .n(n) 2258 .k(k) 2259 .ks(3) 2260 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2261 } 2262 } 2263 } 2264 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cm_subtile)2265 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cm_subtile) { 2266 TEST_REQUIRES_ARM_NEON; 2267 for (size_t k = 1; k <= 40; k += 9) { 2268 for (uint32_t n = 1; n <= 8; n++) { 2269 for (uint32_t m = 1; m <= 1; m++) { 2270 GemmMicrokernelTester() 2271 .mr(1) 2272 .nr(8) 2273 .kr(2) 2274 .sr(1) 2275 .m(m) 2276 .n(n) 2277 .k(k) 2278 .cm_stride(11) 2279 .iterations(1) 2280 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2281 } 2282 } 2283 } 2284 } 2285 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,a_offset)2286 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, a_offset) { 2287 TEST_REQUIRES_ARM_NEON; 2288 for (size_t k = 1; k <= 40; k += 9) { 2289 GemmMicrokernelTester() 2290 .mr(1) 2291 .nr(8) 2292 .kr(2) 2293 .sr(1) 2294 .m(1) 2295 .n(8) 2296 .k(k) 2297 .ks(3) 2298 .a_offset(43) 2299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2300 } 2301 } 2302 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,zero)2303 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, zero) { 2304 TEST_REQUIRES_ARM_NEON; 2305 for (size_t k = 1; k <= 40; k += 9) { 2306 for (uint32_t mz = 0; mz < 1; mz++) { 2307 GemmMicrokernelTester() 2308 .mr(1) 2309 .nr(8) 2310 .kr(2) 2311 .sr(1) 2312 .m(1) 2313 .n(8) 2314 .k(k) 2315 .ks(3) 2316 .a_offset(43) 2317 .zero_index(mz) 2318 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2319 } 2320 } 2321 } 2322 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,qmin)2323 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, qmin) { 2324 TEST_REQUIRES_ARM_NEON; 2325 GemmMicrokernelTester() 2326 .mr(1) 2327 .nr(8) 2328 .kr(2) 2329 .sr(1) 2330 .m(1) 2331 .n(8) 2332 .k(8) 2333 .qmin(128) 2334 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2335 } 2336 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,qmax)2337 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, qmax) { 2338 TEST_REQUIRES_ARM_NEON; 2339 GemmMicrokernelTester() 2340 .mr(1) 2341 .nr(8) 2342 .kr(2) 2343 .sr(1) 2344 .m(1) 2345 .n(8) 2346 .k(8) 2347 .qmax(128) 2348 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2349 } 2350 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R,strided_cm)2351 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD4R, strided_cm) { 2352 TEST_REQUIRES_ARM_NEON; 2353 GemmMicrokernelTester() 2354 .mr(1) 2355 .nr(8) 2356 .kr(2) 2357 .sr(1) 2358 .m(1) 2359 .n(8) 2360 .k(8) 2361 .cm_stride(11) 2362 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2363 } 2364 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2365 2366 2367 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_eq_16)2368 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16) { 2369 TEST_REQUIRES_ARM_NEON; 2370 GemmMicrokernelTester() 2371 .mr(1) 2372 .nr(8) 2373 .kr(8) 2374 .sr(1) 2375 .m(1) 2376 .n(8) 2377 .k(16) 2378 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2379 } 2380 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,strided_cn)2381 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cn) { 2382 TEST_REQUIRES_ARM_NEON; 2383 GemmMicrokernelTester() 2384 .mr(1) 2385 .nr(8) 2386 .kr(8) 2387 .sr(1) 2388 .m(1) 2389 .n(8) 2390 .k(16) 2391 .cn_stride(11) 2392 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2393 } 2394 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_eq_16_subtile)2395 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile) { 2396 TEST_REQUIRES_ARM_NEON; 2397 for (uint32_t n = 1; n <= 8; n++) { 2398 for (uint32_t m = 1; m <= 1; m++) { 2399 GemmMicrokernelTester() 2400 .mr(1) 2401 .nr(8) 2402 .kr(8) 2403 .sr(1) 2404 .m(m) 2405 .n(n) 2406 .k(16) 2407 .iterations(1) 2408 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2409 } 2410 } 2411 } 2412 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_eq_16_subtile_m)2413 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile_m) { 2414 TEST_REQUIRES_ARM_NEON; 2415 for (uint32_t m = 1; m <= 1; m++) { 2416 GemmMicrokernelTester() 2417 .mr(1) 2418 .nr(8) 2419 .kr(8) 2420 .sr(1) 2421 .m(m) 2422 .n(8) 2423 .k(16) 2424 .iterations(1) 2425 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2426 } 2427 } 2428 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_eq_16_subtile_n)2429 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile_n) { 2430 TEST_REQUIRES_ARM_NEON; 2431 for (uint32_t n = 1; n <= 8; n++) { 2432 GemmMicrokernelTester() 2433 .mr(1) 2434 .nr(8) 2435 .kr(8) 2436 .sr(1) 2437 .m(1) 2438 .n(n) 2439 .k(16) 2440 .iterations(1) 2441 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2442 } 2443 } 2444 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_lt_16)2445 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_lt_16) { 2446 TEST_REQUIRES_ARM_NEON; 2447 for (size_t k = 1; k < 16; k++) { 2448 GemmMicrokernelTester() 2449 .mr(1) 2450 .nr(8) 2451 .kr(8) 2452 .sr(1) 2453 .m(1) 2454 .n(8) 2455 .k(k) 2456 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2457 } 2458 } 2459 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_lt_16_subtile)2460 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_lt_16_subtile) { 2461 TEST_REQUIRES_ARM_NEON; 2462 for (size_t k = 1; k < 16; k++) { 2463 for (uint32_t n = 1; n <= 8; n++) { 2464 for (uint32_t m = 1; m <= 1; m++) { 2465 GemmMicrokernelTester() 2466 .mr(1) 2467 .nr(8) 2468 .kr(8) 2469 .sr(1) 2470 .m(m) 2471 .n(n) 2472 .k(k) 2473 .iterations(1) 2474 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2475 } 2476 } 2477 } 2478 } 2479 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_gt_16)2480 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_gt_16) { 2481 TEST_REQUIRES_ARM_NEON; 2482 for (size_t k = 17; k < 32; k++) { 2483 GemmMicrokernelTester() 2484 .mr(1) 2485 .nr(8) 2486 .kr(8) 2487 .sr(1) 2488 .m(1) 2489 .n(8) 2490 .k(k) 2491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2492 } 2493 } 2494 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_gt_16_subtile)2495 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_gt_16_subtile) { 2496 TEST_REQUIRES_ARM_NEON; 2497 for (size_t k = 17; k < 32; k++) { 2498 for (uint32_t n = 1; n <= 8; n++) { 2499 for (uint32_t m = 1; m <= 1; m++) { 2500 GemmMicrokernelTester() 2501 .mr(1) 2502 .nr(8) 2503 .kr(8) 2504 .sr(1) 2505 .m(m) 2506 .n(n) 2507 .k(k) 2508 .iterations(1) 2509 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2510 } 2511 } 2512 } 2513 } 2514 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_div_16)2515 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_div_16) { 2516 TEST_REQUIRES_ARM_NEON; 2517 for (size_t k = 32; k <= 160; k += 16) { 2518 GemmMicrokernelTester() 2519 .mr(1) 2520 .nr(8) 2521 .kr(8) 2522 .sr(1) 2523 .m(1) 2524 .n(8) 2525 .k(k) 2526 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2527 } 2528 } 2529 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,k_div_16_subtile)2530 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_div_16_subtile) { 2531 TEST_REQUIRES_ARM_NEON; 2532 for (size_t k = 32; k <= 160; k += 16) { 2533 for (uint32_t n = 1; n <= 8; n++) { 2534 for (uint32_t m = 1; m <= 1; m++) { 2535 GemmMicrokernelTester() 2536 .mr(1) 2537 .nr(8) 2538 .kr(8) 2539 .sr(1) 2540 .m(m) 2541 .n(n) 2542 .k(k) 2543 .iterations(1) 2544 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2545 } 2546 } 2547 } 2548 } 2549 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,n_gt_8)2550 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8) { 2551 TEST_REQUIRES_ARM_NEON; 2552 for (uint32_t n = 9; n < 16; n++) { 2553 for (size_t k = 1; k <= 80; k += 17) { 2554 GemmMicrokernelTester() 2555 .mr(1) 2556 .nr(8) 2557 .kr(8) 2558 .sr(1) 2559 .m(1) 2560 .n(n) 2561 .k(k) 2562 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2563 } 2564 } 2565 } 2566 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,n_gt_8_strided_cn)2567 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_strided_cn) { 2568 TEST_REQUIRES_ARM_NEON; 2569 for (uint32_t n = 9; n < 16; n++) { 2570 for (size_t k = 1; k <= 80; k += 17) { 2571 GemmMicrokernelTester() 2572 .mr(1) 2573 .nr(8) 2574 .kr(8) 2575 .sr(1) 2576 .m(1) 2577 .n(n) 2578 .k(k) 2579 .cn_stride(11) 2580 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2581 } 2582 } 2583 } 2584 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,n_gt_8_subtile)2585 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_subtile) { 2586 TEST_REQUIRES_ARM_NEON; 2587 for (uint32_t n = 9; n < 16; n++) { 2588 for (size_t k = 1; k <= 80; k += 17) { 2589 for (uint32_t m = 1; m <= 1; m++) { 2590 GemmMicrokernelTester() 2591 .mr(1) 2592 .nr(8) 2593 .kr(8) 2594 .sr(1) 2595 .m(m) 2596 .n(n) 2597 .k(k) 2598 .iterations(1) 2599 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2600 } 2601 } 2602 } 2603 } 2604 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,n_div_8)2605 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8) { 2606 TEST_REQUIRES_ARM_NEON; 2607 for (uint32_t n = 16; n <= 24; n += 8) { 2608 for (size_t k = 1; k <= 80; k += 17) { 2609 GemmMicrokernelTester() 2610 .mr(1) 2611 .nr(8) 2612 .kr(8) 2613 .sr(1) 2614 .m(1) 2615 .n(n) 2616 .k(k) 2617 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2618 } 2619 } 2620 } 2621 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,n_div_8_strided_cn)2622 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_strided_cn) { 2623 TEST_REQUIRES_ARM_NEON; 2624 for (uint32_t n = 16; n <= 24; n += 8) { 2625 for (size_t k = 1; k <= 80; k += 17) { 2626 GemmMicrokernelTester() 2627 .mr(1) 2628 .nr(8) 2629 .kr(8) 2630 .sr(1) 2631 .m(1) 2632 .n(n) 2633 .k(k) 2634 .cn_stride(11) 2635 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2636 } 2637 } 2638 } 2639 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,n_div_8_subtile)2640 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_subtile) { 2641 TEST_REQUIRES_ARM_NEON; 2642 for (uint32_t n = 16; n <= 24; n += 8) { 2643 for (size_t k = 1; k <= 80; k += 17) { 2644 for (uint32_t m = 1; m <= 1; m++) { 2645 GemmMicrokernelTester() 2646 .mr(1) 2647 .nr(8) 2648 .kr(8) 2649 .sr(1) 2650 .m(m) 2651 .n(n) 2652 .k(k) 2653 .iterations(1) 2654 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2655 } 2656 } 2657 } 2658 } 2659 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,small_kernel)2660 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, small_kernel) { 2661 TEST_REQUIRES_ARM_NEON; 2662 for (size_t k = 1; k <= 80; k += 17) { 2663 GemmMicrokernelTester() 2664 .mr(1) 2665 .nr(8) 2666 .kr(8) 2667 .sr(1) 2668 .m(1) 2669 .n(8) 2670 .k(k) 2671 .ks(3) 2672 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2673 } 2674 } 2675 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,small_kernel_subtile)2676 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, small_kernel_subtile) { 2677 TEST_REQUIRES_ARM_NEON; 2678 for (size_t k = 1; k <= 80; k += 17) { 2679 for (uint32_t n = 1; n <= 8; n++) { 2680 for (uint32_t m = 1; m <= 1; m++) { 2681 GemmMicrokernelTester() 2682 .mr(1) 2683 .nr(8) 2684 .kr(8) 2685 .sr(1) 2686 .m(m) 2687 .n(n) 2688 .k(k) 2689 .ks(3) 2690 .iterations(1) 2691 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2692 } 2693 } 2694 } 2695 } 2696 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,n_gt_8_small_kernel)2697 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_small_kernel) { 2698 TEST_REQUIRES_ARM_NEON; 2699 for (uint32_t n = 9; n < 16; n++) { 2700 for (size_t k = 1; k <= 80; k += 17) { 2701 GemmMicrokernelTester() 2702 .mr(1) 2703 .nr(8) 2704 .kr(8) 2705 .sr(1) 2706 .m(1) 2707 .n(n) 2708 .k(k) 2709 .ks(3) 2710 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2711 } 2712 } 2713 } 2714 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,n_div_8_small_kernel)2715 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_small_kernel) { 2716 TEST_REQUIRES_ARM_NEON; 2717 for (uint32_t n = 16; n <= 24; n += 8) { 2718 for (size_t k = 1; k <= 80; k += 17) { 2719 GemmMicrokernelTester() 2720 .mr(1) 2721 .nr(8) 2722 .kr(8) 2723 .sr(1) 2724 .m(1) 2725 .n(n) 2726 .k(k) 2727 .ks(3) 2728 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2729 } 2730 } 2731 } 2732 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,strided_cm_subtile)2733 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cm_subtile) { 2734 TEST_REQUIRES_ARM_NEON; 2735 for (size_t k = 1; k <= 80; k += 17) { 2736 for (uint32_t n = 1; n <= 8; n++) { 2737 for (uint32_t m = 1; m <= 1; m++) { 2738 GemmMicrokernelTester() 2739 .mr(1) 2740 .nr(8) 2741 .kr(8) 2742 .sr(1) 2743 .m(m) 2744 .n(n) 2745 .k(k) 2746 .cm_stride(11) 2747 .iterations(1) 2748 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2749 } 2750 } 2751 } 2752 } 2753 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,a_offset)2754 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, a_offset) { 2755 TEST_REQUIRES_ARM_NEON; 2756 for (size_t k = 1; k <= 80; k += 17) { 2757 GemmMicrokernelTester() 2758 .mr(1) 2759 .nr(8) 2760 .kr(8) 2761 .sr(1) 2762 .m(1) 2763 .n(8) 2764 .k(k) 2765 .ks(3) 2766 .a_offset(83) 2767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2768 } 2769 } 2770 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,zero)2771 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, zero) { 2772 TEST_REQUIRES_ARM_NEON; 2773 for (size_t k = 1; k <= 80; k += 17) { 2774 for (uint32_t mz = 0; mz < 1; mz++) { 2775 GemmMicrokernelTester() 2776 .mr(1) 2777 .nr(8) 2778 .kr(8) 2779 .sr(1) 2780 .m(1) 2781 .n(8) 2782 .k(k) 2783 .ks(3) 2784 .a_offset(83) 2785 .zero_index(mz) 2786 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2787 } 2788 } 2789 } 2790 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,qmin)2791 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, qmin) { 2792 TEST_REQUIRES_ARM_NEON; 2793 GemmMicrokernelTester() 2794 .mr(1) 2795 .nr(8) 2796 .kr(8) 2797 .sr(1) 2798 .m(1) 2799 .n(8) 2800 .k(16) 2801 .qmin(128) 2802 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2803 } 2804 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,qmax)2805 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, qmax) { 2806 TEST_REQUIRES_ARM_NEON; 2807 GemmMicrokernelTester() 2808 .mr(1) 2809 .nr(8) 2810 .kr(8) 2811 .sr(1) 2812 .m(1) 2813 .n(8) 2814 .k(16) 2815 .qmax(128) 2816 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2817 } 2818 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL,strided_cm)2819 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cm) { 2820 TEST_REQUIRES_ARM_NEON; 2821 GemmMicrokernelTester() 2822 .mr(1) 2823 .nr(8) 2824 .kr(8) 2825 .sr(1) 2826 .m(1) 2827 .n(8) 2828 .k(16) 2829 .cm_stride(11) 2830 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2831 } 2832 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2833 2834 2835 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_eq_16)2836 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16) { 2837 TEST_REQUIRES_ARM_NEON; 2838 GemmMicrokernelTester() 2839 .mr(1) 2840 .nr(8) 2841 .kr(16) 2842 .sr(1) 2843 .m(1) 2844 .n(8) 2845 .k(16) 2846 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2847 } 2848 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,strided_cn)2849 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cn) { 2850 TEST_REQUIRES_ARM_NEON; 2851 GemmMicrokernelTester() 2852 .mr(1) 2853 .nr(8) 2854 .kr(16) 2855 .sr(1) 2856 .m(1) 2857 .n(8) 2858 .k(16) 2859 .cn_stride(11) 2860 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2861 } 2862 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_eq_16_subtile)2863 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile) { 2864 TEST_REQUIRES_ARM_NEON; 2865 for (uint32_t n = 1; n <= 8; n++) { 2866 for (uint32_t m = 1; m <= 1; m++) { 2867 GemmMicrokernelTester() 2868 .mr(1) 2869 .nr(8) 2870 .kr(16) 2871 .sr(1) 2872 .m(m) 2873 .n(n) 2874 .k(16) 2875 .iterations(1) 2876 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2877 } 2878 } 2879 } 2880 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_eq_16_subtile_m)2881 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile_m) { 2882 TEST_REQUIRES_ARM_NEON; 2883 for (uint32_t m = 1; m <= 1; m++) { 2884 GemmMicrokernelTester() 2885 .mr(1) 2886 .nr(8) 2887 .kr(16) 2888 .sr(1) 2889 .m(m) 2890 .n(8) 2891 .k(16) 2892 .iterations(1) 2893 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2894 } 2895 } 2896 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_eq_16_subtile_n)2897 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_eq_16_subtile_n) { 2898 TEST_REQUIRES_ARM_NEON; 2899 for (uint32_t n = 1; n <= 8; n++) { 2900 GemmMicrokernelTester() 2901 .mr(1) 2902 .nr(8) 2903 .kr(16) 2904 .sr(1) 2905 .m(1) 2906 .n(n) 2907 .k(16) 2908 .iterations(1) 2909 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2910 } 2911 } 2912 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_lt_16)2913 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_lt_16) { 2914 TEST_REQUIRES_ARM_NEON; 2915 for (size_t k = 1; k < 16; k++) { 2916 GemmMicrokernelTester() 2917 .mr(1) 2918 .nr(8) 2919 .kr(16) 2920 .sr(1) 2921 .m(1) 2922 .n(8) 2923 .k(k) 2924 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2925 } 2926 } 2927 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_lt_16_subtile)2928 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_lt_16_subtile) { 2929 TEST_REQUIRES_ARM_NEON; 2930 for (size_t k = 1; k < 16; k++) { 2931 for (uint32_t n = 1; n <= 8; n++) { 2932 for (uint32_t m = 1; m <= 1; m++) { 2933 GemmMicrokernelTester() 2934 .mr(1) 2935 .nr(8) 2936 .kr(16) 2937 .sr(1) 2938 .m(m) 2939 .n(n) 2940 .k(k) 2941 .iterations(1) 2942 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2943 } 2944 } 2945 } 2946 } 2947 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_gt_16)2948 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_gt_16) { 2949 TEST_REQUIRES_ARM_NEON; 2950 for (size_t k = 17; k < 32; k++) { 2951 GemmMicrokernelTester() 2952 .mr(1) 2953 .nr(8) 2954 .kr(16) 2955 .sr(1) 2956 .m(1) 2957 .n(8) 2958 .k(k) 2959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2960 } 2961 } 2962 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_gt_16_subtile)2963 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_gt_16_subtile) { 2964 TEST_REQUIRES_ARM_NEON; 2965 for (size_t k = 17; k < 32; k++) { 2966 for (uint32_t n = 1; n <= 8; n++) { 2967 for (uint32_t m = 1; m <= 1; m++) { 2968 GemmMicrokernelTester() 2969 .mr(1) 2970 .nr(8) 2971 .kr(16) 2972 .sr(1) 2973 .m(m) 2974 .n(n) 2975 .k(k) 2976 .iterations(1) 2977 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2978 } 2979 } 2980 } 2981 } 2982 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_div_16)2983 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_div_16) { 2984 TEST_REQUIRES_ARM_NEON; 2985 for (size_t k = 32; k <= 160; k += 16) { 2986 GemmMicrokernelTester() 2987 .mr(1) 2988 .nr(8) 2989 .kr(16) 2990 .sr(1) 2991 .m(1) 2992 .n(8) 2993 .k(k) 2994 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2995 } 2996 } 2997 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,k_div_16_subtile)2998 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, k_div_16_subtile) { 2999 TEST_REQUIRES_ARM_NEON; 3000 for (size_t k = 32; k <= 160; k += 16) { 3001 for (uint32_t n = 1; n <= 8; n++) { 3002 for (uint32_t m = 1; m <= 1; m++) { 3003 GemmMicrokernelTester() 3004 .mr(1) 3005 .nr(8) 3006 .kr(16) 3007 .sr(1) 3008 .m(m) 3009 .n(n) 3010 .k(k) 3011 .iterations(1) 3012 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3013 } 3014 } 3015 } 3016 } 3017 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,n_gt_8)3018 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8) { 3019 TEST_REQUIRES_ARM_NEON; 3020 for (uint32_t n = 9; n < 16; n++) { 3021 for (size_t k = 1; k <= 80; k += 17) { 3022 GemmMicrokernelTester() 3023 .mr(1) 3024 .nr(8) 3025 .kr(16) 3026 .sr(1) 3027 .m(1) 3028 .n(n) 3029 .k(k) 3030 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3031 } 3032 } 3033 } 3034 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,n_gt_8_strided_cn)3035 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_strided_cn) { 3036 TEST_REQUIRES_ARM_NEON; 3037 for (uint32_t n = 9; n < 16; n++) { 3038 for (size_t k = 1; k <= 80; k += 17) { 3039 GemmMicrokernelTester() 3040 .mr(1) 3041 .nr(8) 3042 .kr(16) 3043 .sr(1) 3044 .m(1) 3045 .n(n) 3046 .k(k) 3047 .cn_stride(11) 3048 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3049 } 3050 } 3051 } 3052 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,n_gt_8_subtile)3053 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_subtile) { 3054 TEST_REQUIRES_ARM_NEON; 3055 for (uint32_t n = 9; n < 16; n++) { 3056 for (size_t k = 1; k <= 80; k += 17) { 3057 for (uint32_t m = 1; m <= 1; m++) { 3058 GemmMicrokernelTester() 3059 .mr(1) 3060 .nr(8) 3061 .kr(16) 3062 .sr(1) 3063 .m(m) 3064 .n(n) 3065 .k(k) 3066 .iterations(1) 3067 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3068 } 3069 } 3070 } 3071 } 3072 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,n_div_8)3073 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8) { 3074 TEST_REQUIRES_ARM_NEON; 3075 for (uint32_t n = 16; n <= 24; n += 8) { 3076 for (size_t k = 1; k <= 80; k += 17) { 3077 GemmMicrokernelTester() 3078 .mr(1) 3079 .nr(8) 3080 .kr(16) 3081 .sr(1) 3082 .m(1) 3083 .n(n) 3084 .k(k) 3085 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3086 } 3087 } 3088 } 3089 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,n_div_8_strided_cn)3090 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_strided_cn) { 3091 TEST_REQUIRES_ARM_NEON; 3092 for (uint32_t n = 16; n <= 24; n += 8) { 3093 for (size_t k = 1; k <= 80; k += 17) { 3094 GemmMicrokernelTester() 3095 .mr(1) 3096 .nr(8) 3097 .kr(16) 3098 .sr(1) 3099 .m(1) 3100 .n(n) 3101 .k(k) 3102 .cn_stride(11) 3103 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3104 } 3105 } 3106 } 3107 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,n_div_8_subtile)3108 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_subtile) { 3109 TEST_REQUIRES_ARM_NEON; 3110 for (uint32_t n = 16; n <= 24; n += 8) { 3111 for (size_t k = 1; k <= 80; k += 17) { 3112 for (uint32_t m = 1; m <= 1; m++) { 3113 GemmMicrokernelTester() 3114 .mr(1) 3115 .nr(8) 3116 .kr(16) 3117 .sr(1) 3118 .m(m) 3119 .n(n) 3120 .k(k) 3121 .iterations(1) 3122 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3123 } 3124 } 3125 } 3126 } 3127 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,small_kernel)3128 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, small_kernel) { 3129 TEST_REQUIRES_ARM_NEON; 3130 for (size_t k = 1; k <= 80; k += 17) { 3131 GemmMicrokernelTester() 3132 .mr(1) 3133 .nr(8) 3134 .kr(16) 3135 .sr(1) 3136 .m(1) 3137 .n(8) 3138 .k(k) 3139 .ks(3) 3140 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3141 } 3142 } 3143 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,small_kernel_subtile)3144 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, small_kernel_subtile) { 3145 TEST_REQUIRES_ARM_NEON; 3146 for (size_t k = 1; k <= 80; k += 17) { 3147 for (uint32_t n = 1; n <= 8; n++) { 3148 for (uint32_t m = 1; m <= 1; m++) { 3149 GemmMicrokernelTester() 3150 .mr(1) 3151 .nr(8) 3152 .kr(16) 3153 .sr(1) 3154 .m(m) 3155 .n(n) 3156 .k(k) 3157 .ks(3) 3158 .iterations(1) 3159 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3160 } 3161 } 3162 } 3163 } 3164 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,n_gt_8_small_kernel)3165 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_gt_8_small_kernel) { 3166 TEST_REQUIRES_ARM_NEON; 3167 for (uint32_t n = 9; n < 16; n++) { 3168 for (size_t k = 1; k <= 80; k += 17) { 3169 GemmMicrokernelTester() 3170 .mr(1) 3171 .nr(8) 3172 .kr(16) 3173 .sr(1) 3174 .m(1) 3175 .n(n) 3176 .k(k) 3177 .ks(3) 3178 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3179 } 3180 } 3181 } 3182 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,n_div_8_small_kernel)3183 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, n_div_8_small_kernel) { 3184 TEST_REQUIRES_ARM_NEON; 3185 for (uint32_t n = 16; n <= 24; n += 8) { 3186 for (size_t k = 1; k <= 80; k += 17) { 3187 GemmMicrokernelTester() 3188 .mr(1) 3189 .nr(8) 3190 .kr(16) 3191 .sr(1) 3192 .m(1) 3193 .n(n) 3194 .k(k) 3195 .ks(3) 3196 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3197 } 3198 } 3199 } 3200 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,strided_cm_subtile)3201 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cm_subtile) { 3202 TEST_REQUIRES_ARM_NEON; 3203 for (size_t k = 1; k <= 80; k += 17) { 3204 for (uint32_t n = 1; n <= 8; n++) { 3205 for (uint32_t m = 1; m <= 1; m++) { 3206 GemmMicrokernelTester() 3207 .mr(1) 3208 .nr(8) 3209 .kr(16) 3210 .sr(1) 3211 .m(m) 3212 .n(n) 3213 .k(k) 3214 .cm_stride(11) 3215 .iterations(1) 3216 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3217 } 3218 } 3219 } 3220 } 3221 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,a_offset)3222 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, a_offset) { 3223 TEST_REQUIRES_ARM_NEON; 3224 for (size_t k = 1; k <= 80; k += 17) { 3225 GemmMicrokernelTester() 3226 .mr(1) 3227 .nr(8) 3228 .kr(16) 3229 .sr(1) 3230 .m(1) 3231 .n(8) 3232 .k(k) 3233 .ks(3) 3234 .a_offset(83) 3235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3236 } 3237 } 3238 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,zero)3239 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, zero) { 3240 TEST_REQUIRES_ARM_NEON; 3241 for (size_t k = 1; k <= 80; k += 17) { 3242 for (uint32_t mz = 0; mz < 1; mz++) { 3243 GemmMicrokernelTester() 3244 .mr(1) 3245 .nr(8) 3246 .kr(16) 3247 .sr(1) 3248 .m(1) 3249 .n(8) 3250 .k(k) 3251 .ks(3) 3252 .a_offset(83) 3253 .zero_index(mz) 3254 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3255 } 3256 } 3257 } 3258 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,qmin)3259 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, qmin) { 3260 TEST_REQUIRES_ARM_NEON; 3261 GemmMicrokernelTester() 3262 .mr(1) 3263 .nr(8) 3264 .kr(16) 3265 .sr(1) 3266 .m(1) 3267 .n(8) 3268 .k(16) 3269 .qmin(128) 3270 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3271 } 3272 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,qmax)3273 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, qmax) { 3274 TEST_REQUIRES_ARM_NEON; 3275 GemmMicrokernelTester() 3276 .mr(1) 3277 .nr(8) 3278 .kr(16) 3279 .sr(1) 3280 .m(1) 3281 .n(8) 3282 .k(16) 3283 .qmax(128) 3284 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3285 } 3286 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL,strided_cm)3287 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C16__NEON_MLAL, strided_cm) { 3288 TEST_REQUIRES_ARM_NEON; 3289 GemmMicrokernelTester() 3290 .mr(1) 3291 .nr(8) 3292 .kr(16) 3293 .sr(1) 3294 .m(1) 3295 .n(8) 3296 .k(16) 3297 .cm_stride(11) 3298 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3299 } 3300 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3301 3302 3303 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16)3304 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16) { 3305 TEST_REQUIRES_ARM_NEON; 3306 GemmMicrokernelTester() 3307 .mr(1) 3308 .nr(16) 3309 .kr(2) 3310 .sr(1) 3311 .m(1) 3312 .n(16) 3313 .k(16) 3314 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3315 } 3316 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cn)3317 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cn) { 3318 TEST_REQUIRES_ARM_NEON; 3319 GemmMicrokernelTester() 3320 .mr(1) 3321 .nr(16) 3322 .kr(2) 3323 .sr(1) 3324 .m(1) 3325 .n(16) 3326 .k(16) 3327 .cn_stride(19) 3328 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3329 } 3330 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile)3331 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 3332 TEST_REQUIRES_ARM_NEON; 3333 for (uint32_t n = 1; n <= 16; n++) { 3334 for (uint32_t m = 1; m <= 1; m++) { 3335 GemmMicrokernelTester() 3336 .mr(1) 3337 .nr(16) 3338 .kr(2) 3339 .sr(1) 3340 .m(m) 3341 .n(n) 3342 .k(16) 3343 .iterations(1) 3344 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3345 } 3346 } 3347 } 3348 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)3349 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 3350 TEST_REQUIRES_ARM_NEON; 3351 for (uint32_t m = 1; m <= 1; m++) { 3352 GemmMicrokernelTester() 3353 .mr(1) 3354 .nr(16) 3355 .kr(2) 3356 .sr(1) 3357 .m(m) 3358 .n(16) 3359 .k(16) 3360 .iterations(1) 3361 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3362 } 3363 } 3364 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)3365 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 3366 TEST_REQUIRES_ARM_NEON; 3367 for (uint32_t n = 1; n <= 16; n++) { 3368 GemmMicrokernelTester() 3369 .mr(1) 3370 .nr(16) 3371 .kr(2) 3372 .sr(1) 3373 .m(1) 3374 .n(n) 3375 .k(16) 3376 .iterations(1) 3377 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3378 } 3379 } 3380 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_lt_16)3381 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16) { 3382 TEST_REQUIRES_ARM_NEON; 3383 for (size_t k = 1; k < 16; k++) { 3384 GemmMicrokernelTester() 3385 .mr(1) 3386 .nr(16) 3387 .kr(2) 3388 .sr(1) 3389 .m(1) 3390 .n(16) 3391 .k(k) 3392 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3393 } 3394 } 3395 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_lt_16_subtile)3396 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 3397 TEST_REQUIRES_ARM_NEON; 3398 for (size_t k = 1; k < 16; k++) { 3399 for (uint32_t n = 1; n <= 16; n++) { 3400 for (uint32_t m = 1; m <= 1; m++) { 3401 GemmMicrokernelTester() 3402 .mr(1) 3403 .nr(16) 3404 .kr(2) 3405 .sr(1) 3406 .m(m) 3407 .n(n) 3408 .k(k) 3409 .iterations(1) 3410 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3411 } 3412 } 3413 } 3414 } 3415 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_gt_16)3416 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16) { 3417 TEST_REQUIRES_ARM_NEON; 3418 for (size_t k = 17; k < 32; k++) { 3419 GemmMicrokernelTester() 3420 .mr(1) 3421 .nr(16) 3422 .kr(2) 3423 .sr(1) 3424 .m(1) 3425 .n(16) 3426 .k(k) 3427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3428 } 3429 } 3430 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_gt_16_subtile)3431 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 3432 TEST_REQUIRES_ARM_NEON; 3433 for (size_t k = 17; k < 32; k++) { 3434 for (uint32_t n = 1; n <= 16; n++) { 3435 for (uint32_t m = 1; m <= 1; m++) { 3436 GemmMicrokernelTester() 3437 .mr(1) 3438 .nr(16) 3439 .kr(2) 3440 .sr(1) 3441 .m(m) 3442 .n(n) 3443 .k(k) 3444 .iterations(1) 3445 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3446 } 3447 } 3448 } 3449 } 3450 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_div_16)3451 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16) { 3452 TEST_REQUIRES_ARM_NEON; 3453 for (size_t k = 32; k <= 160; k += 16) { 3454 GemmMicrokernelTester() 3455 .mr(1) 3456 .nr(16) 3457 .kr(2) 3458 .sr(1) 3459 .m(1) 3460 .n(16) 3461 .k(k) 3462 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3463 } 3464 } 3465 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,k_div_16_subtile)3466 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, k_div_16_subtile) { 3467 TEST_REQUIRES_ARM_NEON; 3468 for (size_t k = 32; k <= 160; k += 16) { 3469 for (uint32_t n = 1; n <= 16; n++) { 3470 for (uint32_t m = 1; m <= 1; m++) { 3471 GemmMicrokernelTester() 3472 .mr(1) 3473 .nr(16) 3474 .kr(2) 3475 .sr(1) 3476 .m(m) 3477 .n(n) 3478 .k(k) 3479 .iterations(1) 3480 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3481 } 3482 } 3483 } 3484 } 3485 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16)3486 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16) { 3487 TEST_REQUIRES_ARM_NEON; 3488 for (uint32_t n = 17; n < 32; n++) { 3489 for (size_t k = 1; k <= 80; k += 17) { 3490 GemmMicrokernelTester() 3491 .mr(1) 3492 .nr(16) 3493 .kr(2) 3494 .sr(1) 3495 .m(1) 3496 .n(n) 3497 .k(k) 3498 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3499 } 3500 } 3501 } 3502 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_strided_cn)3503 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 3504 TEST_REQUIRES_ARM_NEON; 3505 for (uint32_t n = 17; n < 32; n++) { 3506 for (size_t k = 1; k <= 80; k += 17) { 3507 GemmMicrokernelTester() 3508 .mr(1) 3509 .nr(16) 3510 .kr(2) 3511 .sr(1) 3512 .m(1) 3513 .n(n) 3514 .k(k) 3515 .cn_stride(19) 3516 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3517 } 3518 } 3519 } 3520 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_subtile)3521 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) { 3522 TEST_REQUIRES_ARM_NEON; 3523 for (uint32_t n = 17; n < 32; n++) { 3524 for (size_t k = 1; k <= 80; k += 17) { 3525 for (uint32_t m = 1; m <= 1; m++) { 3526 GemmMicrokernelTester() 3527 .mr(1) 3528 .nr(16) 3529 .kr(2) 3530 .sr(1) 3531 .m(m) 3532 .n(n) 3533 .k(k) 3534 .iterations(1) 3535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3536 } 3537 } 3538 } 3539 } 3540 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16)3541 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16) { 3542 TEST_REQUIRES_ARM_NEON; 3543 for (uint32_t n = 32; n <= 48; n += 16) { 3544 for (size_t k = 1; k <= 80; k += 17) { 3545 GemmMicrokernelTester() 3546 .mr(1) 3547 .nr(16) 3548 .kr(2) 3549 .sr(1) 3550 .m(1) 3551 .n(n) 3552 .k(k) 3553 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3554 } 3555 } 3556 } 3557 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_strided_cn)3558 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) { 3559 TEST_REQUIRES_ARM_NEON; 3560 for (uint32_t n = 32; n <= 48; n += 16) { 3561 for (size_t k = 1; k <= 80; k += 17) { 3562 GemmMicrokernelTester() 3563 .mr(1) 3564 .nr(16) 3565 .kr(2) 3566 .sr(1) 3567 .m(1) 3568 .n(n) 3569 .k(k) 3570 .cn_stride(19) 3571 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3572 } 3573 } 3574 } 3575 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_subtile)3576 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_subtile) { 3577 TEST_REQUIRES_ARM_NEON; 3578 for (uint32_t n = 32; n <= 48; n += 16) { 3579 for (size_t k = 1; k <= 80; k += 17) { 3580 for (uint32_t m = 1; m <= 1; m++) { 3581 GemmMicrokernelTester() 3582 .mr(1) 3583 .nr(16) 3584 .kr(2) 3585 .sr(1) 3586 .m(m) 3587 .n(n) 3588 .k(k) 3589 .iterations(1) 3590 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3591 } 3592 } 3593 } 3594 } 3595 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,small_kernel)3596 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, small_kernel) { 3597 TEST_REQUIRES_ARM_NEON; 3598 for (size_t k = 1; k <= 80; k += 17) { 3599 GemmMicrokernelTester() 3600 .mr(1) 3601 .nr(16) 3602 .kr(2) 3603 .sr(1) 3604 .m(1) 3605 .n(16) 3606 .k(k) 3607 .ks(3) 3608 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3609 } 3610 } 3611 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,small_kernel_subtile)3612 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, small_kernel_subtile) { 3613 TEST_REQUIRES_ARM_NEON; 3614 for (size_t k = 1; k <= 80; k += 17) { 3615 for (uint32_t n = 1; n <= 16; n++) { 3616 for (uint32_t m = 1; m <= 1; m++) { 3617 GemmMicrokernelTester() 3618 .mr(1) 3619 .nr(16) 3620 .kr(2) 3621 .sr(1) 3622 .m(m) 3623 .n(n) 3624 .k(k) 3625 .ks(3) 3626 .iterations(1) 3627 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3628 } 3629 } 3630 } 3631 } 3632 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_gt_16_small_kernel)3633 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_gt_16_small_kernel) { 3634 TEST_REQUIRES_ARM_NEON; 3635 for (uint32_t n = 17; n < 32; n++) { 3636 for (size_t k = 1; k <= 80; k += 17) { 3637 GemmMicrokernelTester() 3638 .mr(1) 3639 .nr(16) 3640 .kr(2) 3641 .sr(1) 3642 .m(1) 3643 .n(n) 3644 .k(k) 3645 .ks(3) 3646 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3647 } 3648 } 3649 } 3650 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,n_div_16_small_kernel)3651 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, n_div_16_small_kernel) { 3652 TEST_REQUIRES_ARM_NEON; 3653 for (uint32_t n = 32; n <= 48; n += 16) { 3654 for (size_t k = 1; k <= 80; k += 17) { 3655 GemmMicrokernelTester() 3656 .mr(1) 3657 .nr(16) 3658 .kr(2) 3659 .sr(1) 3660 .m(1) 3661 .n(n) 3662 .k(k) 3663 .ks(3) 3664 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3665 } 3666 } 3667 } 3668 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cm_subtile)3669 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cm_subtile) { 3670 TEST_REQUIRES_ARM_NEON; 3671 for (size_t k = 1; k <= 80; k += 17) { 3672 for (uint32_t n = 1; n <= 16; n++) { 3673 for (uint32_t m = 1; m <= 1; m++) { 3674 GemmMicrokernelTester() 3675 .mr(1) 3676 .nr(16) 3677 .kr(2) 3678 .sr(1) 3679 .m(m) 3680 .n(n) 3681 .k(k) 3682 .cm_stride(19) 3683 .iterations(1) 3684 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3685 } 3686 } 3687 } 3688 } 3689 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,a_offset)3690 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, a_offset) { 3691 TEST_REQUIRES_ARM_NEON; 3692 for (size_t k = 1; k <= 80; k += 17) { 3693 GemmMicrokernelTester() 3694 .mr(1) 3695 .nr(16) 3696 .kr(2) 3697 .sr(1) 3698 .m(1) 3699 .n(16) 3700 .k(k) 3701 .ks(3) 3702 .a_offset(83) 3703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3704 } 3705 } 3706 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,zero)3707 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, zero) { 3708 TEST_REQUIRES_ARM_NEON; 3709 for (size_t k = 1; k <= 80; k += 17) { 3710 for (uint32_t mz = 0; mz < 1; mz++) { 3711 GemmMicrokernelTester() 3712 .mr(1) 3713 .nr(16) 3714 .kr(2) 3715 .sr(1) 3716 .m(1) 3717 .n(16) 3718 .k(k) 3719 .ks(3) 3720 .a_offset(83) 3721 .zero_index(mz) 3722 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3723 } 3724 } 3725 } 3726 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,qmin)3727 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, qmin) { 3728 TEST_REQUIRES_ARM_NEON; 3729 GemmMicrokernelTester() 3730 .mr(1) 3731 .nr(16) 3732 .kr(2) 3733 .sr(1) 3734 .m(1) 3735 .n(16) 3736 .k(16) 3737 .qmin(128) 3738 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3739 } 3740 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,qmax)3741 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, qmax) { 3742 TEST_REQUIRES_ARM_NEON; 3743 GemmMicrokernelTester() 3744 .mr(1) 3745 .nr(16) 3746 .kr(2) 3747 .sr(1) 3748 .m(1) 3749 .n(16) 3750 .k(16) 3751 .qmax(128) 3752 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3753 } 3754 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R,strided_cm)3755 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD2R, strided_cm) { 3756 TEST_REQUIRES_ARM_NEON; 3757 GemmMicrokernelTester() 3758 .mr(1) 3759 .nr(16) 3760 .kr(2) 3761 .sr(1) 3762 .m(1) 3763 .n(16) 3764 .k(16) 3765 .cm_stride(19) 3766 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3767 } 3768 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3769 3770 3771 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_eq_8)3772 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_eq_8) { 3773 TEST_REQUIRES_ARM_NEON; 3774 GemmMicrokernelTester() 3775 .mr(1) 3776 .nr(16) 3777 .kr(2) 3778 .sr(1) 3779 .m(1) 3780 .n(16) 3781 .k(8) 3782 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3783 } 3784 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,strided_cn)3785 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, strided_cn) { 3786 TEST_REQUIRES_ARM_NEON; 3787 GemmMicrokernelTester() 3788 .mr(1) 3789 .nr(16) 3790 .kr(2) 3791 .sr(1) 3792 .m(1) 3793 .n(16) 3794 .k(8) 3795 .cn_stride(19) 3796 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3797 } 3798 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_eq_8_subtile)3799 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_eq_8_subtile) { 3800 TEST_REQUIRES_ARM_NEON; 3801 for (uint32_t n = 1; n <= 16; n++) { 3802 for (uint32_t m = 1; m <= 1; m++) { 3803 GemmMicrokernelTester() 3804 .mr(1) 3805 .nr(16) 3806 .kr(2) 3807 .sr(1) 3808 .m(m) 3809 .n(n) 3810 .k(8) 3811 .iterations(1) 3812 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3813 } 3814 } 3815 } 3816 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_eq_8_subtile_m)3817 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) { 3818 TEST_REQUIRES_ARM_NEON; 3819 for (uint32_t m = 1; m <= 1; m++) { 3820 GemmMicrokernelTester() 3821 .mr(1) 3822 .nr(16) 3823 .kr(2) 3824 .sr(1) 3825 .m(m) 3826 .n(16) 3827 .k(8) 3828 .iterations(1) 3829 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3830 } 3831 } 3832 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_eq_8_subtile_n)3833 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) { 3834 TEST_REQUIRES_ARM_NEON; 3835 for (uint32_t n = 1; n <= 16; n++) { 3836 GemmMicrokernelTester() 3837 .mr(1) 3838 .nr(16) 3839 .kr(2) 3840 .sr(1) 3841 .m(1) 3842 .n(n) 3843 .k(8) 3844 .iterations(1) 3845 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3846 } 3847 } 3848 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_lt_8)3849 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_lt_8) { 3850 TEST_REQUIRES_ARM_NEON; 3851 for (size_t k = 1; k < 8; k++) { 3852 GemmMicrokernelTester() 3853 .mr(1) 3854 .nr(16) 3855 .kr(2) 3856 .sr(1) 3857 .m(1) 3858 .n(16) 3859 .k(k) 3860 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3861 } 3862 } 3863 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_lt_8_subtile)3864 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_lt_8_subtile) { 3865 TEST_REQUIRES_ARM_NEON; 3866 for (size_t k = 1; k < 8; k++) { 3867 for (uint32_t n = 1; n <= 16; n++) { 3868 for (uint32_t m = 1; m <= 1; m++) { 3869 GemmMicrokernelTester() 3870 .mr(1) 3871 .nr(16) 3872 .kr(2) 3873 .sr(1) 3874 .m(m) 3875 .n(n) 3876 .k(k) 3877 .iterations(1) 3878 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3879 } 3880 } 3881 } 3882 } 3883 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_gt_8)3884 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_gt_8) { 3885 TEST_REQUIRES_ARM_NEON; 3886 for (size_t k = 9; k < 16; k++) { 3887 GemmMicrokernelTester() 3888 .mr(1) 3889 .nr(16) 3890 .kr(2) 3891 .sr(1) 3892 .m(1) 3893 .n(16) 3894 .k(k) 3895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3896 } 3897 } 3898 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_gt_8_subtile)3899 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_gt_8_subtile) { 3900 TEST_REQUIRES_ARM_NEON; 3901 for (size_t k = 9; k < 16; k++) { 3902 for (uint32_t n = 1; n <= 16; n++) { 3903 for (uint32_t m = 1; m <= 1; m++) { 3904 GemmMicrokernelTester() 3905 .mr(1) 3906 .nr(16) 3907 .kr(2) 3908 .sr(1) 3909 .m(m) 3910 .n(n) 3911 .k(k) 3912 .iterations(1) 3913 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3914 } 3915 } 3916 } 3917 } 3918 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_div_8)3919 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_div_8) { 3920 TEST_REQUIRES_ARM_NEON; 3921 for (size_t k = 16; k <= 80; k += 8) { 3922 GemmMicrokernelTester() 3923 .mr(1) 3924 .nr(16) 3925 .kr(2) 3926 .sr(1) 3927 .m(1) 3928 .n(16) 3929 .k(k) 3930 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3931 } 3932 } 3933 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,k_div_8_subtile)3934 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, k_div_8_subtile) { 3935 TEST_REQUIRES_ARM_NEON; 3936 for (size_t k = 16; k <= 80; k += 8) { 3937 for (uint32_t n = 1; n <= 16; n++) { 3938 for (uint32_t m = 1; m <= 1; m++) { 3939 GemmMicrokernelTester() 3940 .mr(1) 3941 .nr(16) 3942 .kr(2) 3943 .sr(1) 3944 .m(m) 3945 .n(n) 3946 .k(k) 3947 .iterations(1) 3948 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3949 } 3950 } 3951 } 3952 } 3953 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,n_gt_16)3954 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_gt_16) { 3955 TEST_REQUIRES_ARM_NEON; 3956 for (uint32_t n = 17; n < 32; n++) { 3957 for (size_t k = 1; k <= 40; k += 9) { 3958 GemmMicrokernelTester() 3959 .mr(1) 3960 .nr(16) 3961 .kr(2) 3962 .sr(1) 3963 .m(1) 3964 .n(n) 3965 .k(k) 3966 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3967 } 3968 } 3969 } 3970 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,n_gt_16_strided_cn)3971 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) { 3972 TEST_REQUIRES_ARM_NEON; 3973 for (uint32_t n = 17; n < 32; n++) { 3974 for (size_t k = 1; k <= 40; k += 9) { 3975 GemmMicrokernelTester() 3976 .mr(1) 3977 .nr(16) 3978 .kr(2) 3979 .sr(1) 3980 .m(1) 3981 .n(n) 3982 .k(k) 3983 .cn_stride(19) 3984 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3985 } 3986 } 3987 } 3988 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,n_gt_16_subtile)3989 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_gt_16_subtile) { 3990 TEST_REQUIRES_ARM_NEON; 3991 for (uint32_t n = 17; n < 32; n++) { 3992 for (size_t k = 1; k <= 40; k += 9) { 3993 for (uint32_t m = 1; m <= 1; m++) { 3994 GemmMicrokernelTester() 3995 .mr(1) 3996 .nr(16) 3997 .kr(2) 3998 .sr(1) 3999 .m(m) 4000 .n(n) 4001 .k(k) 4002 .iterations(1) 4003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4004 } 4005 } 4006 } 4007 } 4008 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,n_div_16)4009 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_div_16) { 4010 TEST_REQUIRES_ARM_NEON; 4011 for (uint32_t n = 32; n <= 48; n += 16) { 4012 for (size_t k = 1; k <= 40; k += 9) { 4013 GemmMicrokernelTester() 4014 .mr(1) 4015 .nr(16) 4016 .kr(2) 4017 .sr(1) 4018 .m(1) 4019 .n(n) 4020 .k(k) 4021 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4022 } 4023 } 4024 } 4025 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,n_div_16_strided_cn)4026 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) { 4027 TEST_REQUIRES_ARM_NEON; 4028 for (uint32_t n = 32; n <= 48; n += 16) { 4029 for (size_t k = 1; k <= 40; k += 9) { 4030 GemmMicrokernelTester() 4031 .mr(1) 4032 .nr(16) 4033 .kr(2) 4034 .sr(1) 4035 .m(1) 4036 .n(n) 4037 .k(k) 4038 .cn_stride(19) 4039 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4040 } 4041 } 4042 } 4043 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,n_div_16_subtile)4044 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_div_16_subtile) { 4045 TEST_REQUIRES_ARM_NEON; 4046 for (uint32_t n = 32; n <= 48; n += 16) { 4047 for (size_t k = 1; k <= 40; k += 9) { 4048 for (uint32_t m = 1; m <= 1; m++) { 4049 GemmMicrokernelTester() 4050 .mr(1) 4051 .nr(16) 4052 .kr(2) 4053 .sr(1) 4054 .m(m) 4055 .n(n) 4056 .k(k) 4057 .iterations(1) 4058 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4059 } 4060 } 4061 } 4062 } 4063 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,small_kernel)4064 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, small_kernel) { 4065 TEST_REQUIRES_ARM_NEON; 4066 for (size_t k = 1; k <= 40; k += 9) { 4067 GemmMicrokernelTester() 4068 .mr(1) 4069 .nr(16) 4070 .kr(2) 4071 .sr(1) 4072 .m(1) 4073 .n(16) 4074 .k(k) 4075 .ks(3) 4076 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4077 } 4078 } 4079 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,small_kernel_subtile)4080 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, small_kernel_subtile) { 4081 TEST_REQUIRES_ARM_NEON; 4082 for (size_t k = 1; k <= 40; k += 9) { 4083 for (uint32_t n = 1; n <= 16; n++) { 4084 for (uint32_t m = 1; m <= 1; m++) { 4085 GemmMicrokernelTester() 4086 .mr(1) 4087 .nr(16) 4088 .kr(2) 4089 .sr(1) 4090 .m(m) 4091 .n(n) 4092 .k(k) 4093 .ks(3) 4094 .iterations(1) 4095 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4096 } 4097 } 4098 } 4099 } 4100 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,n_gt_16_small_kernel)4101 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_gt_16_small_kernel) { 4102 TEST_REQUIRES_ARM_NEON; 4103 for (uint32_t n = 17; n < 32; n++) { 4104 for (size_t k = 1; k <= 40; k += 9) { 4105 GemmMicrokernelTester() 4106 .mr(1) 4107 .nr(16) 4108 .kr(2) 4109 .sr(1) 4110 .m(1) 4111 .n(n) 4112 .k(k) 4113 .ks(3) 4114 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4115 } 4116 } 4117 } 4118 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,n_div_16_small_kernel)4119 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, n_div_16_small_kernel) { 4120 TEST_REQUIRES_ARM_NEON; 4121 for (uint32_t n = 32; n <= 48; n += 16) { 4122 for (size_t k = 1; k <= 40; k += 9) { 4123 GemmMicrokernelTester() 4124 .mr(1) 4125 .nr(16) 4126 .kr(2) 4127 .sr(1) 4128 .m(1) 4129 .n(n) 4130 .k(k) 4131 .ks(3) 4132 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4133 } 4134 } 4135 } 4136 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,strided_cm_subtile)4137 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, strided_cm_subtile) { 4138 TEST_REQUIRES_ARM_NEON; 4139 for (size_t k = 1; k <= 40; k += 9) { 4140 for (uint32_t n = 1; n <= 16; n++) { 4141 for (uint32_t m = 1; m <= 1; m++) { 4142 GemmMicrokernelTester() 4143 .mr(1) 4144 .nr(16) 4145 .kr(2) 4146 .sr(1) 4147 .m(m) 4148 .n(n) 4149 .k(k) 4150 .cm_stride(19) 4151 .iterations(1) 4152 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4153 } 4154 } 4155 } 4156 } 4157 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,a_offset)4158 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, a_offset) { 4159 TEST_REQUIRES_ARM_NEON; 4160 for (size_t k = 1; k <= 40; k += 9) { 4161 GemmMicrokernelTester() 4162 .mr(1) 4163 .nr(16) 4164 .kr(2) 4165 .sr(1) 4166 .m(1) 4167 .n(16) 4168 .k(k) 4169 .ks(3) 4170 .a_offset(43) 4171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4172 } 4173 } 4174 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,zero)4175 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, zero) { 4176 TEST_REQUIRES_ARM_NEON; 4177 for (size_t k = 1; k <= 40; k += 9) { 4178 for (uint32_t mz = 0; mz < 1; mz++) { 4179 GemmMicrokernelTester() 4180 .mr(1) 4181 .nr(16) 4182 .kr(2) 4183 .sr(1) 4184 .m(1) 4185 .n(16) 4186 .k(k) 4187 .ks(3) 4188 .a_offset(43) 4189 .zero_index(mz) 4190 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4191 } 4192 } 4193 } 4194 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,qmin)4195 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, qmin) { 4196 TEST_REQUIRES_ARM_NEON; 4197 GemmMicrokernelTester() 4198 .mr(1) 4199 .nr(16) 4200 .kr(2) 4201 .sr(1) 4202 .m(1) 4203 .n(16) 4204 .k(8) 4205 .qmin(128) 4206 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4207 } 4208 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,qmax)4209 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, qmax) { 4210 TEST_REQUIRES_ARM_NEON; 4211 GemmMicrokernelTester() 4212 .mr(1) 4213 .nr(16) 4214 .kr(2) 4215 .sr(1) 4216 .m(1) 4217 .n(16) 4218 .k(8) 4219 .qmax(128) 4220 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4221 } 4222 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R,strided_cm)4223 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD2R, strided_cm) { 4224 TEST_REQUIRES_ARM_NEON; 4225 GemmMicrokernelTester() 4226 .mr(1) 4227 .nr(16) 4228 .kr(2) 4229 .sr(1) 4230 .m(1) 4231 .n(16) 4232 .k(8) 4233 .cm_stride(19) 4234 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4235 } 4236 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4237 4238 4239 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_eq_16)4240 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_eq_16) { 4241 TEST_REQUIRES_ARM_NEON; 4242 GemmMicrokernelTester() 4243 .mr(1) 4244 .nr(16) 4245 .kr(4) 4246 .sr(1) 4247 .m(1) 4248 .n(16) 4249 .k(16) 4250 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4251 } 4252 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,strided_cn)4253 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, strided_cn) { 4254 TEST_REQUIRES_ARM_NEON; 4255 GemmMicrokernelTester() 4256 .mr(1) 4257 .nr(16) 4258 .kr(4) 4259 .sr(1) 4260 .m(1) 4261 .n(16) 4262 .k(16) 4263 .cn_stride(19) 4264 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4265 } 4266 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_eq_16_subtile)4267 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 4268 TEST_REQUIRES_ARM_NEON; 4269 for (uint32_t n = 1; n <= 16; n++) { 4270 for (uint32_t m = 1; m <= 1; m++) { 4271 GemmMicrokernelTester() 4272 .mr(1) 4273 .nr(16) 4274 .kr(4) 4275 .sr(1) 4276 .m(m) 4277 .n(n) 4278 .k(16) 4279 .iterations(1) 4280 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4281 } 4282 } 4283 } 4284 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)4285 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 4286 TEST_REQUIRES_ARM_NEON; 4287 for (uint32_t m = 1; m <= 1; m++) { 4288 GemmMicrokernelTester() 4289 .mr(1) 4290 .nr(16) 4291 .kr(4) 4292 .sr(1) 4293 .m(m) 4294 .n(16) 4295 .k(16) 4296 .iterations(1) 4297 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4298 } 4299 } 4300 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)4301 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 4302 TEST_REQUIRES_ARM_NEON; 4303 for (uint32_t n = 1; n <= 16; n++) { 4304 GemmMicrokernelTester() 4305 .mr(1) 4306 .nr(16) 4307 .kr(4) 4308 .sr(1) 4309 .m(1) 4310 .n(n) 4311 .k(16) 4312 .iterations(1) 4313 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4314 } 4315 } 4316 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_lt_16)4317 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_lt_16) { 4318 TEST_REQUIRES_ARM_NEON; 4319 for (size_t k = 1; k < 16; k++) { 4320 GemmMicrokernelTester() 4321 .mr(1) 4322 .nr(16) 4323 .kr(4) 4324 .sr(1) 4325 .m(1) 4326 .n(16) 4327 .k(k) 4328 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4329 } 4330 } 4331 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_lt_16_subtile)4332 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 4333 TEST_REQUIRES_ARM_NEON; 4334 for (size_t k = 1; k < 16; k++) { 4335 for (uint32_t n = 1; n <= 16; n++) { 4336 for (uint32_t m = 1; m <= 1; m++) { 4337 GemmMicrokernelTester() 4338 .mr(1) 4339 .nr(16) 4340 .kr(4) 4341 .sr(1) 4342 .m(m) 4343 .n(n) 4344 .k(k) 4345 .iterations(1) 4346 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4347 } 4348 } 4349 } 4350 } 4351 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_gt_16)4352 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_gt_16) { 4353 TEST_REQUIRES_ARM_NEON; 4354 for (size_t k = 17; k < 32; k++) { 4355 GemmMicrokernelTester() 4356 .mr(1) 4357 .nr(16) 4358 .kr(4) 4359 .sr(1) 4360 .m(1) 4361 .n(16) 4362 .k(k) 4363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4364 } 4365 } 4366 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_gt_16_subtile)4367 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 4368 TEST_REQUIRES_ARM_NEON; 4369 for (size_t k = 17; k < 32; k++) { 4370 for (uint32_t n = 1; n <= 16; n++) { 4371 for (uint32_t m = 1; m <= 1; m++) { 4372 GemmMicrokernelTester() 4373 .mr(1) 4374 .nr(16) 4375 .kr(4) 4376 .sr(1) 4377 .m(m) 4378 .n(n) 4379 .k(k) 4380 .iterations(1) 4381 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4382 } 4383 } 4384 } 4385 } 4386 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_div_16)4387 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_div_16) { 4388 TEST_REQUIRES_ARM_NEON; 4389 for (size_t k = 32; k <= 160; k += 16) { 4390 GemmMicrokernelTester() 4391 .mr(1) 4392 .nr(16) 4393 .kr(4) 4394 .sr(1) 4395 .m(1) 4396 .n(16) 4397 .k(k) 4398 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4399 } 4400 } 4401 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,k_div_16_subtile)4402 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, k_div_16_subtile) { 4403 TEST_REQUIRES_ARM_NEON; 4404 for (size_t k = 32; k <= 160; k += 16) { 4405 for (uint32_t n = 1; n <= 16; n++) { 4406 for (uint32_t m = 1; m <= 1; m++) { 4407 GemmMicrokernelTester() 4408 .mr(1) 4409 .nr(16) 4410 .kr(4) 4411 .sr(1) 4412 .m(m) 4413 .n(n) 4414 .k(k) 4415 .iterations(1) 4416 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4417 } 4418 } 4419 } 4420 } 4421 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,n_gt_16)4422 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_gt_16) { 4423 TEST_REQUIRES_ARM_NEON; 4424 for (uint32_t n = 17; n < 32; n++) { 4425 for (size_t k = 1; k <= 80; k += 17) { 4426 GemmMicrokernelTester() 4427 .mr(1) 4428 .nr(16) 4429 .kr(4) 4430 .sr(1) 4431 .m(1) 4432 .n(n) 4433 .k(k) 4434 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4435 } 4436 } 4437 } 4438 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,n_gt_16_strided_cn)4439 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 4440 TEST_REQUIRES_ARM_NEON; 4441 for (uint32_t n = 17; n < 32; n++) { 4442 for (size_t k = 1; k <= 80; k += 17) { 4443 GemmMicrokernelTester() 4444 .mr(1) 4445 .nr(16) 4446 .kr(4) 4447 .sr(1) 4448 .m(1) 4449 .n(n) 4450 .k(k) 4451 .cn_stride(19) 4452 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4453 } 4454 } 4455 } 4456 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,n_gt_16_subtile)4457 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) { 4458 TEST_REQUIRES_ARM_NEON; 4459 for (uint32_t n = 17; n < 32; n++) { 4460 for (size_t k = 1; k <= 80; k += 17) { 4461 for (uint32_t m = 1; m <= 1; m++) { 4462 GemmMicrokernelTester() 4463 .mr(1) 4464 .nr(16) 4465 .kr(4) 4466 .sr(1) 4467 .m(m) 4468 .n(n) 4469 .k(k) 4470 .iterations(1) 4471 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4472 } 4473 } 4474 } 4475 } 4476 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,n_div_16)4477 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_div_16) { 4478 TEST_REQUIRES_ARM_NEON; 4479 for (uint32_t n = 32; n <= 48; n += 16) { 4480 for (size_t k = 1; k <= 80; k += 17) { 4481 GemmMicrokernelTester() 4482 .mr(1) 4483 .nr(16) 4484 .kr(4) 4485 .sr(1) 4486 .m(1) 4487 .n(n) 4488 .k(k) 4489 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4490 } 4491 } 4492 } 4493 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,n_div_16_strided_cn)4494 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) { 4495 TEST_REQUIRES_ARM_NEON; 4496 for (uint32_t n = 32; n <= 48; n += 16) { 4497 for (size_t k = 1; k <= 80; k += 17) { 4498 GemmMicrokernelTester() 4499 .mr(1) 4500 .nr(16) 4501 .kr(4) 4502 .sr(1) 4503 .m(1) 4504 .n(n) 4505 .k(k) 4506 .cn_stride(19) 4507 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4508 } 4509 } 4510 } 4511 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,n_div_16_subtile)4512 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_div_16_subtile) { 4513 TEST_REQUIRES_ARM_NEON; 4514 for (uint32_t n = 32; n <= 48; n += 16) { 4515 for (size_t k = 1; k <= 80; k += 17) { 4516 for (uint32_t m = 1; m <= 1; m++) { 4517 GemmMicrokernelTester() 4518 .mr(1) 4519 .nr(16) 4520 .kr(4) 4521 .sr(1) 4522 .m(m) 4523 .n(n) 4524 .k(k) 4525 .iterations(1) 4526 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4527 } 4528 } 4529 } 4530 } 4531 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,small_kernel)4532 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, small_kernel) { 4533 TEST_REQUIRES_ARM_NEON; 4534 for (size_t k = 1; k <= 80; k += 17) { 4535 GemmMicrokernelTester() 4536 .mr(1) 4537 .nr(16) 4538 .kr(4) 4539 .sr(1) 4540 .m(1) 4541 .n(16) 4542 .k(k) 4543 .ks(3) 4544 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4545 } 4546 } 4547 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,small_kernel_subtile)4548 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, small_kernel_subtile) { 4549 TEST_REQUIRES_ARM_NEON; 4550 for (size_t k = 1; k <= 80; k += 17) { 4551 for (uint32_t n = 1; n <= 16; n++) { 4552 for (uint32_t m = 1; m <= 1; m++) { 4553 GemmMicrokernelTester() 4554 .mr(1) 4555 .nr(16) 4556 .kr(4) 4557 .sr(1) 4558 .m(m) 4559 .n(n) 4560 .k(k) 4561 .ks(3) 4562 .iterations(1) 4563 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4564 } 4565 } 4566 } 4567 } 4568 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,n_gt_16_small_kernel)4569 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_gt_16_small_kernel) { 4570 TEST_REQUIRES_ARM_NEON; 4571 for (uint32_t n = 17; n < 32; n++) { 4572 for (size_t k = 1; k <= 80; k += 17) { 4573 GemmMicrokernelTester() 4574 .mr(1) 4575 .nr(16) 4576 .kr(4) 4577 .sr(1) 4578 .m(1) 4579 .n(n) 4580 .k(k) 4581 .ks(3) 4582 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4583 } 4584 } 4585 } 4586 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,n_div_16_small_kernel)4587 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, n_div_16_small_kernel) { 4588 TEST_REQUIRES_ARM_NEON; 4589 for (uint32_t n = 32; n <= 48; n += 16) { 4590 for (size_t k = 1; k <= 80; k += 17) { 4591 GemmMicrokernelTester() 4592 .mr(1) 4593 .nr(16) 4594 .kr(4) 4595 .sr(1) 4596 .m(1) 4597 .n(n) 4598 .k(k) 4599 .ks(3) 4600 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4601 } 4602 } 4603 } 4604 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,strided_cm_subtile)4605 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, strided_cm_subtile) { 4606 TEST_REQUIRES_ARM_NEON; 4607 for (size_t k = 1; k <= 80; k += 17) { 4608 for (uint32_t n = 1; n <= 16; n++) { 4609 for (uint32_t m = 1; m <= 1; m++) { 4610 GemmMicrokernelTester() 4611 .mr(1) 4612 .nr(16) 4613 .kr(4) 4614 .sr(1) 4615 .m(m) 4616 .n(n) 4617 .k(k) 4618 .cm_stride(19) 4619 .iterations(1) 4620 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4621 } 4622 } 4623 } 4624 } 4625 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,a_offset)4626 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, a_offset) { 4627 TEST_REQUIRES_ARM_NEON; 4628 for (size_t k = 1; k <= 80; k += 17) { 4629 GemmMicrokernelTester() 4630 .mr(1) 4631 .nr(16) 4632 .kr(4) 4633 .sr(1) 4634 .m(1) 4635 .n(16) 4636 .k(k) 4637 .ks(3) 4638 .a_offset(83) 4639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4640 } 4641 } 4642 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,zero)4643 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, zero) { 4644 TEST_REQUIRES_ARM_NEON; 4645 for (size_t k = 1; k <= 80; k += 17) { 4646 for (uint32_t mz = 0; mz < 1; mz++) { 4647 GemmMicrokernelTester() 4648 .mr(1) 4649 .nr(16) 4650 .kr(4) 4651 .sr(1) 4652 .m(1) 4653 .n(16) 4654 .k(k) 4655 .ks(3) 4656 .a_offset(83) 4657 .zero_index(mz) 4658 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4659 } 4660 } 4661 } 4662 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,qmin)4663 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, qmin) { 4664 TEST_REQUIRES_ARM_NEON; 4665 GemmMicrokernelTester() 4666 .mr(1) 4667 .nr(16) 4668 .kr(4) 4669 .sr(1) 4670 .m(1) 4671 .n(16) 4672 .k(16) 4673 .qmin(128) 4674 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4675 } 4676 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,qmax)4677 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, qmax) { 4678 TEST_REQUIRES_ARM_NEON; 4679 GemmMicrokernelTester() 4680 .mr(1) 4681 .nr(16) 4682 .kr(4) 4683 .sr(1) 4684 .m(1) 4685 .n(16) 4686 .k(16) 4687 .qmax(128) 4688 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4689 } 4690 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R,strided_cm)4691 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MLAL_LD2R, strided_cm) { 4692 TEST_REQUIRES_ARM_NEON; 4693 GemmMicrokernelTester() 4694 .mr(1) 4695 .nr(16) 4696 .kr(4) 4697 .sr(1) 4698 .m(1) 4699 .n(16) 4700 .k(16) 4701 .cm_stride(19) 4702 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4703 } 4704 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 4705 4706 4707 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_eq_8)4708 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_eq_8) { 4709 TEST_REQUIRES_ARM_NEON; 4710 GemmMicrokernelTester() 4711 .mr(1) 4712 .nr(16) 4713 .kr(4) 4714 .sr(1) 4715 .m(1) 4716 .n(16) 4717 .k(8) 4718 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4719 } 4720 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,strided_cn)4721 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, strided_cn) { 4722 TEST_REQUIRES_ARM_NEON; 4723 GemmMicrokernelTester() 4724 .mr(1) 4725 .nr(16) 4726 .kr(4) 4727 .sr(1) 4728 .m(1) 4729 .n(16) 4730 .k(8) 4731 .cn_stride(19) 4732 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4733 } 4734 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_eq_8_subtile)4735 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_eq_8_subtile) { 4736 TEST_REQUIRES_ARM_NEON; 4737 for (uint32_t n = 1; n <= 16; n++) { 4738 for (uint32_t m = 1; m <= 1; m++) { 4739 GemmMicrokernelTester() 4740 .mr(1) 4741 .nr(16) 4742 .kr(4) 4743 .sr(1) 4744 .m(m) 4745 .n(n) 4746 .k(8) 4747 .iterations(1) 4748 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4749 } 4750 } 4751 } 4752 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_eq_8_subtile_m)4753 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 4754 TEST_REQUIRES_ARM_NEON; 4755 for (uint32_t m = 1; m <= 1; m++) { 4756 GemmMicrokernelTester() 4757 .mr(1) 4758 .nr(16) 4759 .kr(4) 4760 .sr(1) 4761 .m(m) 4762 .n(16) 4763 .k(8) 4764 .iterations(1) 4765 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4766 } 4767 } 4768 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_eq_8_subtile_n)4769 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 4770 TEST_REQUIRES_ARM_NEON; 4771 for (uint32_t n = 1; n <= 16; n++) { 4772 GemmMicrokernelTester() 4773 .mr(1) 4774 .nr(16) 4775 .kr(4) 4776 .sr(1) 4777 .m(1) 4778 .n(n) 4779 .k(8) 4780 .iterations(1) 4781 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4782 } 4783 } 4784 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_lt_8)4785 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_lt_8) { 4786 TEST_REQUIRES_ARM_NEON; 4787 for (size_t k = 1; k < 8; k++) { 4788 GemmMicrokernelTester() 4789 .mr(1) 4790 .nr(16) 4791 .kr(4) 4792 .sr(1) 4793 .m(1) 4794 .n(16) 4795 .k(k) 4796 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4797 } 4798 } 4799 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_lt_8_subtile)4800 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_lt_8_subtile) { 4801 TEST_REQUIRES_ARM_NEON; 4802 for (size_t k = 1; k < 8; k++) { 4803 for (uint32_t n = 1; n <= 16; n++) { 4804 for (uint32_t m = 1; m <= 1; m++) { 4805 GemmMicrokernelTester() 4806 .mr(1) 4807 .nr(16) 4808 .kr(4) 4809 .sr(1) 4810 .m(m) 4811 .n(n) 4812 .k(k) 4813 .iterations(1) 4814 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4815 } 4816 } 4817 } 4818 } 4819 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_gt_8)4820 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_gt_8) { 4821 TEST_REQUIRES_ARM_NEON; 4822 for (size_t k = 9; k < 16; k++) { 4823 GemmMicrokernelTester() 4824 .mr(1) 4825 .nr(16) 4826 .kr(4) 4827 .sr(1) 4828 .m(1) 4829 .n(16) 4830 .k(k) 4831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4832 } 4833 } 4834 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_gt_8_subtile)4835 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_gt_8_subtile) { 4836 TEST_REQUIRES_ARM_NEON; 4837 for (size_t k = 9; k < 16; k++) { 4838 for (uint32_t n = 1; n <= 16; n++) { 4839 for (uint32_t m = 1; m <= 1; m++) { 4840 GemmMicrokernelTester() 4841 .mr(1) 4842 .nr(16) 4843 .kr(4) 4844 .sr(1) 4845 .m(m) 4846 .n(n) 4847 .k(k) 4848 .iterations(1) 4849 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4850 } 4851 } 4852 } 4853 } 4854 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_div_8)4855 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_div_8) { 4856 TEST_REQUIRES_ARM_NEON; 4857 for (size_t k = 16; k <= 80; k += 8) { 4858 GemmMicrokernelTester() 4859 .mr(1) 4860 .nr(16) 4861 .kr(4) 4862 .sr(1) 4863 .m(1) 4864 .n(16) 4865 .k(k) 4866 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4867 } 4868 } 4869 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,k_div_8_subtile)4870 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, k_div_8_subtile) { 4871 TEST_REQUIRES_ARM_NEON; 4872 for (size_t k = 16; k <= 80; k += 8) { 4873 for (uint32_t n = 1; n <= 16; n++) { 4874 for (uint32_t m = 1; m <= 1; m++) { 4875 GemmMicrokernelTester() 4876 .mr(1) 4877 .nr(16) 4878 .kr(4) 4879 .sr(1) 4880 .m(m) 4881 .n(n) 4882 .k(k) 4883 .iterations(1) 4884 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4885 } 4886 } 4887 } 4888 } 4889 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,n_gt_16)4890 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_gt_16) { 4891 TEST_REQUIRES_ARM_NEON; 4892 for (uint32_t n = 17; n < 32; n++) { 4893 for (size_t k = 1; k <= 40; k += 9) { 4894 GemmMicrokernelTester() 4895 .mr(1) 4896 .nr(16) 4897 .kr(4) 4898 .sr(1) 4899 .m(1) 4900 .n(n) 4901 .k(k) 4902 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4903 } 4904 } 4905 } 4906 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,n_gt_16_strided_cn)4907 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) { 4908 TEST_REQUIRES_ARM_NEON; 4909 for (uint32_t n = 17; n < 32; n++) { 4910 for (size_t k = 1; k <= 40; k += 9) { 4911 GemmMicrokernelTester() 4912 .mr(1) 4913 .nr(16) 4914 .kr(4) 4915 .sr(1) 4916 .m(1) 4917 .n(n) 4918 .k(k) 4919 .cn_stride(19) 4920 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4921 } 4922 } 4923 } 4924 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,n_gt_16_subtile)4925 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_gt_16_subtile) { 4926 TEST_REQUIRES_ARM_NEON; 4927 for (uint32_t n = 17; n < 32; n++) { 4928 for (size_t k = 1; k <= 40; k += 9) { 4929 for (uint32_t m = 1; m <= 1; m++) { 4930 GemmMicrokernelTester() 4931 .mr(1) 4932 .nr(16) 4933 .kr(4) 4934 .sr(1) 4935 .m(m) 4936 .n(n) 4937 .k(k) 4938 .iterations(1) 4939 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4940 } 4941 } 4942 } 4943 } 4944 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,n_div_16)4945 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_div_16) { 4946 TEST_REQUIRES_ARM_NEON; 4947 for (uint32_t n = 32; n <= 48; n += 16) { 4948 for (size_t k = 1; k <= 40; k += 9) { 4949 GemmMicrokernelTester() 4950 .mr(1) 4951 .nr(16) 4952 .kr(4) 4953 .sr(1) 4954 .m(1) 4955 .n(n) 4956 .k(k) 4957 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4958 } 4959 } 4960 } 4961 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,n_div_16_strided_cn)4962 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) { 4963 TEST_REQUIRES_ARM_NEON; 4964 for (uint32_t n = 32; n <= 48; n += 16) { 4965 for (size_t k = 1; k <= 40; k += 9) { 4966 GemmMicrokernelTester() 4967 .mr(1) 4968 .nr(16) 4969 .kr(4) 4970 .sr(1) 4971 .m(1) 4972 .n(n) 4973 .k(k) 4974 .cn_stride(19) 4975 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4976 } 4977 } 4978 } 4979 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,n_div_16_subtile)4980 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_div_16_subtile) { 4981 TEST_REQUIRES_ARM_NEON; 4982 for (uint32_t n = 32; n <= 48; n += 16) { 4983 for (size_t k = 1; k <= 40; k += 9) { 4984 for (uint32_t m = 1; m <= 1; m++) { 4985 GemmMicrokernelTester() 4986 .mr(1) 4987 .nr(16) 4988 .kr(4) 4989 .sr(1) 4990 .m(m) 4991 .n(n) 4992 .k(k) 4993 .iterations(1) 4994 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 4995 } 4996 } 4997 } 4998 } 4999 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,small_kernel)5000 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, small_kernel) { 5001 TEST_REQUIRES_ARM_NEON; 5002 for (size_t k = 1; k <= 40; k += 9) { 5003 GemmMicrokernelTester() 5004 .mr(1) 5005 .nr(16) 5006 .kr(4) 5007 .sr(1) 5008 .m(1) 5009 .n(16) 5010 .k(k) 5011 .ks(3) 5012 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5013 } 5014 } 5015 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,small_kernel_subtile)5016 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, small_kernel_subtile) { 5017 TEST_REQUIRES_ARM_NEON; 5018 for (size_t k = 1; k <= 40; k += 9) { 5019 for (uint32_t n = 1; n <= 16; n++) { 5020 for (uint32_t m = 1; m <= 1; m++) { 5021 GemmMicrokernelTester() 5022 .mr(1) 5023 .nr(16) 5024 .kr(4) 5025 .sr(1) 5026 .m(m) 5027 .n(n) 5028 .k(k) 5029 .ks(3) 5030 .iterations(1) 5031 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5032 } 5033 } 5034 } 5035 } 5036 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,n_gt_16_small_kernel)5037 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_gt_16_small_kernel) { 5038 TEST_REQUIRES_ARM_NEON; 5039 for (uint32_t n = 17; n < 32; n++) { 5040 for (size_t k = 1; k <= 40; k += 9) { 5041 GemmMicrokernelTester() 5042 .mr(1) 5043 .nr(16) 5044 .kr(4) 5045 .sr(1) 5046 .m(1) 5047 .n(n) 5048 .k(k) 5049 .ks(3) 5050 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5051 } 5052 } 5053 } 5054 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,n_div_16_small_kernel)5055 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, n_div_16_small_kernel) { 5056 TEST_REQUIRES_ARM_NEON; 5057 for (uint32_t n = 32; n <= 48; n += 16) { 5058 for (size_t k = 1; k <= 40; k += 9) { 5059 GemmMicrokernelTester() 5060 .mr(1) 5061 .nr(16) 5062 .kr(4) 5063 .sr(1) 5064 .m(1) 5065 .n(n) 5066 .k(k) 5067 .ks(3) 5068 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5069 } 5070 } 5071 } 5072 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,strided_cm_subtile)5073 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, strided_cm_subtile) { 5074 TEST_REQUIRES_ARM_NEON; 5075 for (size_t k = 1; k <= 40; k += 9) { 5076 for (uint32_t n = 1; n <= 16; n++) { 5077 for (uint32_t m = 1; m <= 1; m++) { 5078 GemmMicrokernelTester() 5079 .mr(1) 5080 .nr(16) 5081 .kr(4) 5082 .sr(1) 5083 .m(m) 5084 .n(n) 5085 .k(k) 5086 .cm_stride(19) 5087 .iterations(1) 5088 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5089 } 5090 } 5091 } 5092 } 5093 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,a_offset)5094 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, a_offset) { 5095 TEST_REQUIRES_ARM_NEON; 5096 for (size_t k = 1; k <= 40; k += 9) { 5097 GemmMicrokernelTester() 5098 .mr(1) 5099 .nr(16) 5100 .kr(4) 5101 .sr(1) 5102 .m(1) 5103 .n(16) 5104 .k(k) 5105 .ks(3) 5106 .a_offset(43) 5107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5108 } 5109 } 5110 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,zero)5111 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, zero) { 5112 TEST_REQUIRES_ARM_NEON; 5113 for (size_t k = 1; k <= 40; k += 9) { 5114 for (uint32_t mz = 0; mz < 1; mz++) { 5115 GemmMicrokernelTester() 5116 .mr(1) 5117 .nr(16) 5118 .kr(4) 5119 .sr(1) 5120 .m(1) 5121 .n(16) 5122 .k(k) 5123 .ks(3) 5124 .a_offset(43) 5125 .zero_index(mz) 5126 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5127 } 5128 } 5129 } 5130 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,qmin)5131 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, qmin) { 5132 TEST_REQUIRES_ARM_NEON; 5133 GemmMicrokernelTester() 5134 .mr(1) 5135 .nr(16) 5136 .kr(4) 5137 .sr(1) 5138 .m(1) 5139 .n(16) 5140 .k(8) 5141 .qmin(128) 5142 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5143 } 5144 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,qmax)5145 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, qmax) { 5146 TEST_REQUIRES_ARM_NEON; 5147 GemmMicrokernelTester() 5148 .mr(1) 5149 .nr(16) 5150 .kr(4) 5151 .sr(1) 5152 .m(1) 5153 .n(16) 5154 .k(8) 5155 .qmax(128) 5156 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5157 } 5158 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R,strided_cm)5159 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_LD2R, strided_cm) { 5160 TEST_REQUIRES_ARM_NEON; 5161 GemmMicrokernelTester() 5162 .mr(1) 5163 .nr(16) 5164 .kr(4) 5165 .sr(1) 5166 .m(1) 5167 .n(16) 5168 .k(8) 5169 .cm_stride(19) 5170 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5171 } 5172 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5173 5174 5175 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_eq_8)5176 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8) { 5177 TEST_REQUIRES_ARM_NEON; 5178 GemmMicrokernelTester() 5179 .mr(2) 5180 .nr(8) 5181 .kr(2) 5182 .sr(1) 5183 .m(2) 5184 .n(8) 5185 .k(8) 5186 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5187 } 5188 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,strided_cn)5189 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cn) { 5190 TEST_REQUIRES_ARM_NEON; 5191 GemmMicrokernelTester() 5192 .mr(2) 5193 .nr(8) 5194 .kr(2) 5195 .sr(1) 5196 .m(2) 5197 .n(8) 5198 .k(8) 5199 .cn_stride(11) 5200 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5201 } 5202 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_eq_8_subtile)5203 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile) { 5204 TEST_REQUIRES_ARM_NEON; 5205 for (uint32_t n = 1; n <= 8; n++) { 5206 for (uint32_t m = 1; m <= 2; m++) { 5207 GemmMicrokernelTester() 5208 .mr(2) 5209 .nr(8) 5210 .kr(2) 5211 .sr(1) 5212 .m(m) 5213 .n(n) 5214 .k(8) 5215 .iterations(1) 5216 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5217 } 5218 } 5219 } 5220 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_eq_8_subtile_m)5221 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) { 5222 TEST_REQUIRES_ARM_NEON; 5223 for (uint32_t m = 1; m <= 2; m++) { 5224 GemmMicrokernelTester() 5225 .mr(2) 5226 .nr(8) 5227 .kr(2) 5228 .sr(1) 5229 .m(m) 5230 .n(8) 5231 .k(8) 5232 .iterations(1) 5233 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5234 } 5235 } 5236 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_eq_8_subtile_n)5237 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) { 5238 TEST_REQUIRES_ARM_NEON; 5239 for (uint32_t n = 1; n <= 8; n++) { 5240 GemmMicrokernelTester() 5241 .mr(2) 5242 .nr(8) 5243 .kr(2) 5244 .sr(1) 5245 .m(2) 5246 .n(n) 5247 .k(8) 5248 .iterations(1) 5249 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5250 } 5251 } 5252 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_lt_8)5253 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_lt_8) { 5254 TEST_REQUIRES_ARM_NEON; 5255 for (size_t k = 1; k < 8; k++) { 5256 GemmMicrokernelTester() 5257 .mr(2) 5258 .nr(8) 5259 .kr(2) 5260 .sr(1) 5261 .m(2) 5262 .n(8) 5263 .k(k) 5264 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5265 } 5266 } 5267 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_lt_8_subtile)5268 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_lt_8_subtile) { 5269 TEST_REQUIRES_ARM_NEON; 5270 for (size_t k = 1; k < 8; k++) { 5271 for (uint32_t n = 1; n <= 8; n++) { 5272 for (uint32_t m = 1; m <= 2; m++) { 5273 GemmMicrokernelTester() 5274 .mr(2) 5275 .nr(8) 5276 .kr(2) 5277 .sr(1) 5278 .m(m) 5279 .n(n) 5280 .k(k) 5281 .iterations(1) 5282 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5283 } 5284 } 5285 } 5286 } 5287 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_gt_8)5288 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_gt_8) { 5289 TEST_REQUIRES_ARM_NEON; 5290 for (size_t k = 9; k < 16; k++) { 5291 GemmMicrokernelTester() 5292 .mr(2) 5293 .nr(8) 5294 .kr(2) 5295 .sr(1) 5296 .m(2) 5297 .n(8) 5298 .k(k) 5299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5300 } 5301 } 5302 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_gt_8_subtile)5303 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_gt_8_subtile) { 5304 TEST_REQUIRES_ARM_NEON; 5305 for (size_t k = 9; k < 16; k++) { 5306 for (uint32_t n = 1; n <= 8; n++) { 5307 for (uint32_t m = 1; m <= 2; m++) { 5308 GemmMicrokernelTester() 5309 .mr(2) 5310 .nr(8) 5311 .kr(2) 5312 .sr(1) 5313 .m(m) 5314 .n(n) 5315 .k(k) 5316 .iterations(1) 5317 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5318 } 5319 } 5320 } 5321 } 5322 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_div_8)5323 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_div_8) { 5324 TEST_REQUIRES_ARM_NEON; 5325 for (size_t k = 16; k <= 80; k += 8) { 5326 GemmMicrokernelTester() 5327 .mr(2) 5328 .nr(8) 5329 .kr(2) 5330 .sr(1) 5331 .m(2) 5332 .n(8) 5333 .k(k) 5334 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5335 } 5336 } 5337 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,k_div_8_subtile)5338 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, k_div_8_subtile) { 5339 TEST_REQUIRES_ARM_NEON; 5340 for (size_t k = 16; k <= 80; k += 8) { 5341 for (uint32_t n = 1; n <= 8; n++) { 5342 for (uint32_t m = 1; m <= 2; m++) { 5343 GemmMicrokernelTester() 5344 .mr(2) 5345 .nr(8) 5346 .kr(2) 5347 .sr(1) 5348 .m(m) 5349 .n(n) 5350 .k(k) 5351 .iterations(1) 5352 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5353 } 5354 } 5355 } 5356 } 5357 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,n_gt_8)5358 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8) { 5359 TEST_REQUIRES_ARM_NEON; 5360 for (uint32_t n = 9; n < 16; n++) { 5361 for (size_t k = 1; k <= 40; k += 9) { 5362 GemmMicrokernelTester() 5363 .mr(2) 5364 .nr(8) 5365 .kr(2) 5366 .sr(1) 5367 .m(2) 5368 .n(n) 5369 .k(k) 5370 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5371 } 5372 } 5373 } 5374 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,n_gt_8_strided_cn)5375 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) { 5376 TEST_REQUIRES_ARM_NEON; 5377 for (uint32_t n = 9; n < 16; n++) { 5378 for (size_t k = 1; k <= 40; k += 9) { 5379 GemmMicrokernelTester() 5380 .mr(2) 5381 .nr(8) 5382 .kr(2) 5383 .sr(1) 5384 .m(2) 5385 .n(n) 5386 .k(k) 5387 .cn_stride(11) 5388 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5389 } 5390 } 5391 } 5392 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,n_gt_8_subtile)5393 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_subtile) { 5394 TEST_REQUIRES_ARM_NEON; 5395 for (uint32_t n = 9; n < 16; n++) { 5396 for (size_t k = 1; k <= 40; k += 9) { 5397 for (uint32_t m = 1; m <= 2; m++) { 5398 GemmMicrokernelTester() 5399 .mr(2) 5400 .nr(8) 5401 .kr(2) 5402 .sr(1) 5403 .m(m) 5404 .n(n) 5405 .k(k) 5406 .iterations(1) 5407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5408 } 5409 } 5410 } 5411 } 5412 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,n_div_8)5413 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8) { 5414 TEST_REQUIRES_ARM_NEON; 5415 for (uint32_t n = 16; n <= 24; n += 8) { 5416 for (size_t k = 1; k <= 40; k += 9) { 5417 GemmMicrokernelTester() 5418 .mr(2) 5419 .nr(8) 5420 .kr(2) 5421 .sr(1) 5422 .m(2) 5423 .n(n) 5424 .k(k) 5425 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5426 } 5427 } 5428 } 5429 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,n_div_8_strided_cn)5430 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_strided_cn) { 5431 TEST_REQUIRES_ARM_NEON; 5432 for (uint32_t n = 16; n <= 24; n += 8) { 5433 for (size_t k = 1; k <= 40; k += 9) { 5434 GemmMicrokernelTester() 5435 .mr(2) 5436 .nr(8) 5437 .kr(2) 5438 .sr(1) 5439 .m(2) 5440 .n(n) 5441 .k(k) 5442 .cn_stride(11) 5443 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5444 } 5445 } 5446 } 5447 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,n_div_8_subtile)5448 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_subtile) { 5449 TEST_REQUIRES_ARM_NEON; 5450 for (uint32_t n = 16; n <= 24; n += 8) { 5451 for (size_t k = 1; k <= 40; k += 9) { 5452 for (uint32_t m = 1; m <= 2; m++) { 5453 GemmMicrokernelTester() 5454 .mr(2) 5455 .nr(8) 5456 .kr(2) 5457 .sr(1) 5458 .m(m) 5459 .n(n) 5460 .k(k) 5461 .iterations(1) 5462 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5463 } 5464 } 5465 } 5466 } 5467 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,small_kernel)5468 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, small_kernel) { 5469 TEST_REQUIRES_ARM_NEON; 5470 for (size_t k = 1; k <= 40; k += 9) { 5471 GemmMicrokernelTester() 5472 .mr(2) 5473 .nr(8) 5474 .kr(2) 5475 .sr(1) 5476 .m(2) 5477 .n(8) 5478 .k(k) 5479 .ks(3) 5480 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5481 } 5482 } 5483 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,small_kernel_subtile)5484 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, small_kernel_subtile) { 5485 TEST_REQUIRES_ARM_NEON; 5486 for (size_t k = 1; k <= 40; k += 9) { 5487 for (uint32_t n = 1; n <= 8; n++) { 5488 for (uint32_t m = 1; m <= 2; m++) { 5489 GemmMicrokernelTester() 5490 .mr(2) 5491 .nr(8) 5492 .kr(2) 5493 .sr(1) 5494 .m(m) 5495 .n(n) 5496 .k(k) 5497 .ks(3) 5498 .iterations(1) 5499 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5500 } 5501 } 5502 } 5503 } 5504 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,n_gt_8_small_kernel)5505 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) { 5506 TEST_REQUIRES_ARM_NEON; 5507 for (uint32_t n = 9; n < 16; n++) { 5508 for (size_t k = 1; k <= 40; k += 9) { 5509 GemmMicrokernelTester() 5510 .mr(2) 5511 .nr(8) 5512 .kr(2) 5513 .sr(1) 5514 .m(2) 5515 .n(n) 5516 .k(k) 5517 .ks(3) 5518 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5519 } 5520 } 5521 } 5522 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,n_div_8_small_kernel)5523 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, n_div_8_small_kernel) { 5524 TEST_REQUIRES_ARM_NEON; 5525 for (uint32_t n = 16; n <= 24; n += 8) { 5526 for (size_t k = 1; k <= 40; k += 9) { 5527 GemmMicrokernelTester() 5528 .mr(2) 5529 .nr(8) 5530 .kr(2) 5531 .sr(1) 5532 .m(2) 5533 .n(n) 5534 .k(k) 5535 .ks(3) 5536 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5537 } 5538 } 5539 } 5540 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,strided_cm_subtile)5541 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cm_subtile) { 5542 TEST_REQUIRES_ARM_NEON; 5543 for (size_t k = 1; k <= 40; k += 9) { 5544 for (uint32_t n = 1; n <= 8; n++) { 5545 for (uint32_t m = 1; m <= 2; m++) { 5546 GemmMicrokernelTester() 5547 .mr(2) 5548 .nr(8) 5549 .kr(2) 5550 .sr(1) 5551 .m(m) 5552 .n(n) 5553 .k(k) 5554 .cm_stride(11) 5555 .iterations(1) 5556 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5557 } 5558 } 5559 } 5560 } 5561 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,a_offset)5562 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, a_offset) { 5563 TEST_REQUIRES_ARM_NEON; 5564 for (size_t k = 1; k <= 40; k += 9) { 5565 GemmMicrokernelTester() 5566 .mr(2) 5567 .nr(8) 5568 .kr(2) 5569 .sr(1) 5570 .m(2) 5571 .n(8) 5572 .k(k) 5573 .ks(3) 5574 .a_offset(83) 5575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5576 } 5577 } 5578 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,zero)5579 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, zero) { 5580 TEST_REQUIRES_ARM_NEON; 5581 for (size_t k = 1; k <= 40; k += 9) { 5582 for (uint32_t mz = 0; mz < 2; mz++) { 5583 GemmMicrokernelTester() 5584 .mr(2) 5585 .nr(8) 5586 .kr(2) 5587 .sr(1) 5588 .m(2) 5589 .n(8) 5590 .k(k) 5591 .ks(3) 5592 .a_offset(83) 5593 .zero_index(mz) 5594 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5595 } 5596 } 5597 } 5598 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,qmin)5599 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, qmin) { 5600 TEST_REQUIRES_ARM_NEON; 5601 GemmMicrokernelTester() 5602 .mr(2) 5603 .nr(8) 5604 .kr(2) 5605 .sr(1) 5606 .m(2) 5607 .n(8) 5608 .k(8) 5609 .qmin(128) 5610 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5611 } 5612 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,qmax)5613 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, qmax) { 5614 TEST_REQUIRES_ARM_NEON; 5615 GemmMicrokernelTester() 5616 .mr(2) 5617 .nr(8) 5618 .kr(2) 5619 .sr(1) 5620 .m(2) 5621 .n(8) 5622 .k(8) 5623 .qmax(128) 5624 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5625 } 5626 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP,strided_cm)5627 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_DUP, strided_cm) { 5628 TEST_REQUIRES_ARM_NEON; 5629 GemmMicrokernelTester() 5630 .mr(2) 5631 .nr(8) 5632 .kr(2) 5633 .sr(1) 5634 .m(2) 5635 .n(8) 5636 .k(8) 5637 .cm_stride(11) 5638 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5639 } 5640 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 5641 5642 5643 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8)5644 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8) { 5645 TEST_REQUIRES_ARM_NEON; 5646 GemmMicrokernelTester() 5647 .mr(2) 5648 .nr(8) 5649 .kr(4) 5650 .sr(1) 5651 .m(2) 5652 .n(8) 5653 .k(8) 5654 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5655 } 5656 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cn)5657 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cn) { 5658 TEST_REQUIRES_ARM_NEON; 5659 GemmMicrokernelTester() 5660 .mr(2) 5661 .nr(8) 5662 .kr(4) 5663 .sr(1) 5664 .m(2) 5665 .n(8) 5666 .k(8) 5667 .cn_stride(11) 5668 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5669 } 5670 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile)5671 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile) { 5672 TEST_REQUIRES_ARM_NEON; 5673 for (uint32_t n = 1; n <= 8; n++) { 5674 for (uint32_t m = 1; m <= 2; m++) { 5675 GemmMicrokernelTester() 5676 .mr(2) 5677 .nr(8) 5678 .kr(4) 5679 .sr(1) 5680 .m(m) 5681 .n(n) 5682 .k(8) 5683 .iterations(1) 5684 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5685 } 5686 } 5687 } 5688 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile_m)5689 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) { 5690 TEST_REQUIRES_ARM_NEON; 5691 for (uint32_t m = 1; m <= 2; m++) { 5692 GemmMicrokernelTester() 5693 .mr(2) 5694 .nr(8) 5695 .kr(4) 5696 .sr(1) 5697 .m(m) 5698 .n(8) 5699 .k(8) 5700 .iterations(1) 5701 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5702 } 5703 } 5704 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_eq_8_subtile_n)5705 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) { 5706 TEST_REQUIRES_ARM_NEON; 5707 for (uint32_t n = 1; n <= 8; n++) { 5708 GemmMicrokernelTester() 5709 .mr(2) 5710 .nr(8) 5711 .kr(4) 5712 .sr(1) 5713 .m(2) 5714 .n(n) 5715 .k(8) 5716 .iterations(1) 5717 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5718 } 5719 } 5720 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_lt_8)5721 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8) { 5722 TEST_REQUIRES_ARM_NEON; 5723 for (size_t k = 1; k < 8; k++) { 5724 GemmMicrokernelTester() 5725 .mr(2) 5726 .nr(8) 5727 .kr(4) 5728 .sr(1) 5729 .m(2) 5730 .n(8) 5731 .k(k) 5732 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5733 } 5734 } 5735 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_lt_8_subtile)5736 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_lt_8_subtile) { 5737 TEST_REQUIRES_ARM_NEON; 5738 for (size_t k = 1; k < 8; k++) { 5739 for (uint32_t n = 1; n <= 8; n++) { 5740 for (uint32_t m = 1; m <= 2; m++) { 5741 GemmMicrokernelTester() 5742 .mr(2) 5743 .nr(8) 5744 .kr(4) 5745 .sr(1) 5746 .m(m) 5747 .n(n) 5748 .k(k) 5749 .iterations(1) 5750 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5751 } 5752 } 5753 } 5754 } 5755 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_gt_8)5756 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8) { 5757 TEST_REQUIRES_ARM_NEON; 5758 for (size_t k = 9; k < 16; k++) { 5759 GemmMicrokernelTester() 5760 .mr(2) 5761 .nr(8) 5762 .kr(4) 5763 .sr(1) 5764 .m(2) 5765 .n(8) 5766 .k(k) 5767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5768 } 5769 } 5770 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_gt_8_subtile)5771 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_gt_8_subtile) { 5772 TEST_REQUIRES_ARM_NEON; 5773 for (size_t k = 9; k < 16; k++) { 5774 for (uint32_t n = 1; n <= 8; n++) { 5775 for (uint32_t m = 1; m <= 2; m++) { 5776 GemmMicrokernelTester() 5777 .mr(2) 5778 .nr(8) 5779 .kr(4) 5780 .sr(1) 5781 .m(m) 5782 .n(n) 5783 .k(k) 5784 .iterations(1) 5785 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5786 } 5787 } 5788 } 5789 } 5790 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_div_8)5791 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8) { 5792 TEST_REQUIRES_ARM_NEON; 5793 for (size_t k = 16; k <= 80; k += 8) { 5794 GemmMicrokernelTester() 5795 .mr(2) 5796 .nr(8) 5797 .kr(4) 5798 .sr(1) 5799 .m(2) 5800 .n(8) 5801 .k(k) 5802 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5803 } 5804 } 5805 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,k_div_8_subtile)5806 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, k_div_8_subtile) { 5807 TEST_REQUIRES_ARM_NEON; 5808 for (size_t k = 16; k <= 80; k += 8) { 5809 for (uint32_t n = 1; n <= 8; n++) { 5810 for (uint32_t m = 1; m <= 2; m++) { 5811 GemmMicrokernelTester() 5812 .mr(2) 5813 .nr(8) 5814 .kr(4) 5815 .sr(1) 5816 .m(m) 5817 .n(n) 5818 .k(k) 5819 .iterations(1) 5820 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5821 } 5822 } 5823 } 5824 } 5825 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8)5826 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8) { 5827 TEST_REQUIRES_ARM_NEON; 5828 for (uint32_t n = 9; n < 16; n++) { 5829 for (size_t k = 1; k <= 40; k += 9) { 5830 GemmMicrokernelTester() 5831 .mr(2) 5832 .nr(8) 5833 .kr(4) 5834 .sr(1) 5835 .m(2) 5836 .n(n) 5837 .k(k) 5838 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5839 } 5840 } 5841 } 5842 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_strided_cn)5843 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) { 5844 TEST_REQUIRES_ARM_NEON; 5845 for (uint32_t n = 9; n < 16; n++) { 5846 for (size_t k = 1; k <= 40; k += 9) { 5847 GemmMicrokernelTester() 5848 .mr(2) 5849 .nr(8) 5850 .kr(4) 5851 .sr(1) 5852 .m(2) 5853 .n(n) 5854 .k(k) 5855 .cn_stride(11) 5856 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5857 } 5858 } 5859 } 5860 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_subtile)5861 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_subtile) { 5862 TEST_REQUIRES_ARM_NEON; 5863 for (uint32_t n = 9; n < 16; n++) { 5864 for (size_t k = 1; k <= 40; k += 9) { 5865 for (uint32_t m = 1; m <= 2; m++) { 5866 GemmMicrokernelTester() 5867 .mr(2) 5868 .nr(8) 5869 .kr(4) 5870 .sr(1) 5871 .m(m) 5872 .n(n) 5873 .k(k) 5874 .iterations(1) 5875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5876 } 5877 } 5878 } 5879 } 5880 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8)5881 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8) { 5882 TEST_REQUIRES_ARM_NEON; 5883 for (uint32_t n = 16; n <= 24; n += 8) { 5884 for (size_t k = 1; k <= 40; k += 9) { 5885 GemmMicrokernelTester() 5886 .mr(2) 5887 .nr(8) 5888 .kr(4) 5889 .sr(1) 5890 .m(2) 5891 .n(n) 5892 .k(k) 5893 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5894 } 5895 } 5896 } 5897 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_strided_cn)5898 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_strided_cn) { 5899 TEST_REQUIRES_ARM_NEON; 5900 for (uint32_t n = 16; n <= 24; n += 8) { 5901 for (size_t k = 1; k <= 40; k += 9) { 5902 GemmMicrokernelTester() 5903 .mr(2) 5904 .nr(8) 5905 .kr(4) 5906 .sr(1) 5907 .m(2) 5908 .n(n) 5909 .k(k) 5910 .cn_stride(11) 5911 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5912 } 5913 } 5914 } 5915 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_subtile)5916 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_subtile) { 5917 TEST_REQUIRES_ARM_NEON; 5918 for (uint32_t n = 16; n <= 24; n += 8) { 5919 for (size_t k = 1; k <= 40; k += 9) { 5920 for (uint32_t m = 1; m <= 2; m++) { 5921 GemmMicrokernelTester() 5922 .mr(2) 5923 .nr(8) 5924 .kr(4) 5925 .sr(1) 5926 .m(m) 5927 .n(n) 5928 .k(k) 5929 .iterations(1) 5930 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5931 } 5932 } 5933 } 5934 } 5935 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,small_kernel)5936 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, small_kernel) { 5937 TEST_REQUIRES_ARM_NEON; 5938 for (size_t k = 1; k <= 40; k += 9) { 5939 GemmMicrokernelTester() 5940 .mr(2) 5941 .nr(8) 5942 .kr(4) 5943 .sr(1) 5944 .m(2) 5945 .n(8) 5946 .k(k) 5947 .ks(3) 5948 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5949 } 5950 } 5951 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,small_kernel_subtile)5952 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, small_kernel_subtile) { 5953 TEST_REQUIRES_ARM_NEON; 5954 for (size_t k = 1; k <= 40; k += 9) { 5955 for (uint32_t n = 1; n <= 8; n++) { 5956 for (uint32_t m = 1; m <= 2; m++) { 5957 GemmMicrokernelTester() 5958 .mr(2) 5959 .nr(8) 5960 .kr(4) 5961 .sr(1) 5962 .m(m) 5963 .n(n) 5964 .k(k) 5965 .ks(3) 5966 .iterations(1) 5967 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5968 } 5969 } 5970 } 5971 } 5972 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_gt_8_small_kernel)5973 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) { 5974 TEST_REQUIRES_ARM_NEON; 5975 for (uint32_t n = 9; n < 16; n++) { 5976 for (size_t k = 1; k <= 40; k += 9) { 5977 GemmMicrokernelTester() 5978 .mr(2) 5979 .nr(8) 5980 .kr(4) 5981 .sr(1) 5982 .m(2) 5983 .n(n) 5984 .k(k) 5985 .ks(3) 5986 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 5987 } 5988 } 5989 } 5990 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,n_div_8_small_kernel)5991 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, n_div_8_small_kernel) { 5992 TEST_REQUIRES_ARM_NEON; 5993 for (uint32_t n = 16; n <= 24; n += 8) { 5994 for (size_t k = 1; k <= 40; k += 9) { 5995 GemmMicrokernelTester() 5996 .mr(2) 5997 .nr(8) 5998 .kr(4) 5999 .sr(1) 6000 .m(2) 6001 .n(n) 6002 .k(k) 6003 .ks(3) 6004 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6005 } 6006 } 6007 } 6008 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cm_subtile)6009 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cm_subtile) { 6010 TEST_REQUIRES_ARM_NEON; 6011 for (size_t k = 1; k <= 40; k += 9) { 6012 for (uint32_t n = 1; n <= 8; n++) { 6013 for (uint32_t m = 1; m <= 2; m++) { 6014 GemmMicrokernelTester() 6015 .mr(2) 6016 .nr(8) 6017 .kr(4) 6018 .sr(1) 6019 .m(m) 6020 .n(n) 6021 .k(k) 6022 .cm_stride(11) 6023 .iterations(1) 6024 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6025 } 6026 } 6027 } 6028 } 6029 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,a_offset)6030 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, a_offset) { 6031 TEST_REQUIRES_ARM_NEON; 6032 for (size_t k = 1; k <= 40; k += 9) { 6033 GemmMicrokernelTester() 6034 .mr(2) 6035 .nr(8) 6036 .kr(4) 6037 .sr(1) 6038 .m(2) 6039 .n(8) 6040 .k(k) 6041 .ks(3) 6042 .a_offset(83) 6043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6044 } 6045 } 6046 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,zero)6047 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, zero) { 6048 TEST_REQUIRES_ARM_NEON; 6049 for (size_t k = 1; k <= 40; k += 9) { 6050 for (uint32_t mz = 0; mz < 2; mz++) { 6051 GemmMicrokernelTester() 6052 .mr(2) 6053 .nr(8) 6054 .kr(4) 6055 .sr(1) 6056 .m(2) 6057 .n(8) 6058 .k(k) 6059 .ks(3) 6060 .a_offset(83) 6061 .zero_index(mz) 6062 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6063 } 6064 } 6065 } 6066 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,qmin)6067 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, qmin) { 6068 TEST_REQUIRES_ARM_NEON; 6069 GemmMicrokernelTester() 6070 .mr(2) 6071 .nr(8) 6072 .kr(4) 6073 .sr(1) 6074 .m(2) 6075 .n(8) 6076 .k(8) 6077 .qmin(128) 6078 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6079 } 6080 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,qmax)6081 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, qmax) { 6082 TEST_REQUIRES_ARM_NEON; 6083 GemmMicrokernelTester() 6084 .mr(2) 6085 .nr(8) 6086 .kr(4) 6087 .sr(1) 6088 .m(2) 6089 .n(8) 6090 .k(8) 6091 .qmax(128) 6092 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6093 } 6094 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP,strided_cm)6095 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_DUP, strided_cm) { 6096 TEST_REQUIRES_ARM_NEON; 6097 GemmMicrokernelTester() 6098 .mr(2) 6099 .nr(8) 6100 .kr(4) 6101 .sr(1) 6102 .m(2) 6103 .n(8) 6104 .k(8) 6105 .cm_stride(11) 6106 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6107 } 6108 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 6109 6110 6111 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_eq_8)6112 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8) { 6113 TEST_REQUIRES_ARM_NEON; 6114 GemmMicrokernelTester() 6115 .mr(2) 6116 .nr(8) 6117 .kr(8) 6118 .sr(1) 6119 .m(2) 6120 .n(8) 6121 .k(8) 6122 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6123 } 6124 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,strided_cn)6125 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cn) { 6126 TEST_REQUIRES_ARM_NEON; 6127 GemmMicrokernelTester() 6128 .mr(2) 6129 .nr(8) 6130 .kr(8) 6131 .sr(1) 6132 .m(2) 6133 .n(8) 6134 .k(8) 6135 .cn_stride(11) 6136 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6137 } 6138 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_eq_8_subtile)6139 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile) { 6140 TEST_REQUIRES_ARM_NEON; 6141 for (uint32_t n = 1; n <= 8; n++) { 6142 for (uint32_t m = 1; m <= 2; m++) { 6143 GemmMicrokernelTester() 6144 .mr(2) 6145 .nr(8) 6146 .kr(8) 6147 .sr(1) 6148 .m(m) 6149 .n(n) 6150 .k(8) 6151 .iterations(1) 6152 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6153 } 6154 } 6155 } 6156 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_eq_8_subtile_m)6157 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile_m) { 6158 TEST_REQUIRES_ARM_NEON; 6159 for (uint32_t m = 1; m <= 2; m++) { 6160 GemmMicrokernelTester() 6161 .mr(2) 6162 .nr(8) 6163 .kr(8) 6164 .sr(1) 6165 .m(m) 6166 .n(8) 6167 .k(8) 6168 .iterations(1) 6169 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6170 } 6171 } 6172 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_eq_8_subtile_n)6173 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile_n) { 6174 TEST_REQUIRES_ARM_NEON; 6175 for (uint32_t n = 1; n <= 8; n++) { 6176 GemmMicrokernelTester() 6177 .mr(2) 6178 .nr(8) 6179 .kr(8) 6180 .sr(1) 6181 .m(2) 6182 .n(n) 6183 .k(8) 6184 .iterations(1) 6185 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6186 } 6187 } 6188 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_lt_8)6189 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_lt_8) { 6190 TEST_REQUIRES_ARM_NEON; 6191 for (size_t k = 1; k < 8; k++) { 6192 GemmMicrokernelTester() 6193 .mr(2) 6194 .nr(8) 6195 .kr(8) 6196 .sr(1) 6197 .m(2) 6198 .n(8) 6199 .k(k) 6200 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6201 } 6202 } 6203 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_lt_8_subtile)6204 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_lt_8_subtile) { 6205 TEST_REQUIRES_ARM_NEON; 6206 for (size_t k = 1; k < 8; k++) { 6207 for (uint32_t n = 1; n <= 8; n++) { 6208 for (uint32_t m = 1; m <= 2; m++) { 6209 GemmMicrokernelTester() 6210 .mr(2) 6211 .nr(8) 6212 .kr(8) 6213 .sr(1) 6214 .m(m) 6215 .n(n) 6216 .k(k) 6217 .iterations(1) 6218 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6219 } 6220 } 6221 } 6222 } 6223 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_gt_8)6224 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_gt_8) { 6225 TEST_REQUIRES_ARM_NEON; 6226 for (size_t k = 9; k < 16; k++) { 6227 GemmMicrokernelTester() 6228 .mr(2) 6229 .nr(8) 6230 .kr(8) 6231 .sr(1) 6232 .m(2) 6233 .n(8) 6234 .k(k) 6235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6236 } 6237 } 6238 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_gt_8_subtile)6239 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_gt_8_subtile) { 6240 TEST_REQUIRES_ARM_NEON; 6241 for (size_t k = 9; k < 16; k++) { 6242 for (uint32_t n = 1; n <= 8; n++) { 6243 for (uint32_t m = 1; m <= 2; m++) { 6244 GemmMicrokernelTester() 6245 .mr(2) 6246 .nr(8) 6247 .kr(8) 6248 .sr(1) 6249 .m(m) 6250 .n(n) 6251 .k(k) 6252 .iterations(1) 6253 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6254 } 6255 } 6256 } 6257 } 6258 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_div_8)6259 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_div_8) { 6260 TEST_REQUIRES_ARM_NEON; 6261 for (size_t k = 16; k <= 80; k += 8) { 6262 GemmMicrokernelTester() 6263 .mr(2) 6264 .nr(8) 6265 .kr(8) 6266 .sr(1) 6267 .m(2) 6268 .n(8) 6269 .k(k) 6270 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6271 } 6272 } 6273 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,k_div_8_subtile)6274 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_div_8_subtile) { 6275 TEST_REQUIRES_ARM_NEON; 6276 for (size_t k = 16; k <= 80; k += 8) { 6277 for (uint32_t n = 1; n <= 8; n++) { 6278 for (uint32_t m = 1; m <= 2; m++) { 6279 GemmMicrokernelTester() 6280 .mr(2) 6281 .nr(8) 6282 .kr(8) 6283 .sr(1) 6284 .m(m) 6285 .n(n) 6286 .k(k) 6287 .iterations(1) 6288 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6289 } 6290 } 6291 } 6292 } 6293 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,n_gt_8)6294 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8) { 6295 TEST_REQUIRES_ARM_NEON; 6296 for (uint32_t n = 9; n < 16; n++) { 6297 for (size_t k = 1; k <= 40; k += 9) { 6298 GemmMicrokernelTester() 6299 .mr(2) 6300 .nr(8) 6301 .kr(8) 6302 .sr(1) 6303 .m(2) 6304 .n(n) 6305 .k(k) 6306 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6307 } 6308 } 6309 } 6310 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,n_gt_8_strided_cn)6311 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_strided_cn) { 6312 TEST_REQUIRES_ARM_NEON; 6313 for (uint32_t n = 9; n < 16; n++) { 6314 for (size_t k = 1; k <= 40; k += 9) { 6315 GemmMicrokernelTester() 6316 .mr(2) 6317 .nr(8) 6318 .kr(8) 6319 .sr(1) 6320 .m(2) 6321 .n(n) 6322 .k(k) 6323 .cn_stride(11) 6324 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6325 } 6326 } 6327 } 6328 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,n_gt_8_subtile)6329 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_subtile) { 6330 TEST_REQUIRES_ARM_NEON; 6331 for (uint32_t n = 9; n < 16; n++) { 6332 for (size_t k = 1; k <= 40; k += 9) { 6333 for (uint32_t m = 1; m <= 2; m++) { 6334 GemmMicrokernelTester() 6335 .mr(2) 6336 .nr(8) 6337 .kr(8) 6338 .sr(1) 6339 .m(m) 6340 .n(n) 6341 .k(k) 6342 .iterations(1) 6343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6344 } 6345 } 6346 } 6347 } 6348 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,n_div_8)6349 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8) { 6350 TEST_REQUIRES_ARM_NEON; 6351 for (uint32_t n = 16; n <= 24; n += 8) { 6352 for (size_t k = 1; k <= 40; k += 9) { 6353 GemmMicrokernelTester() 6354 .mr(2) 6355 .nr(8) 6356 .kr(8) 6357 .sr(1) 6358 .m(2) 6359 .n(n) 6360 .k(k) 6361 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6362 } 6363 } 6364 } 6365 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,n_div_8_strided_cn)6366 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_strided_cn) { 6367 TEST_REQUIRES_ARM_NEON; 6368 for (uint32_t n = 16; n <= 24; n += 8) { 6369 for (size_t k = 1; k <= 40; k += 9) { 6370 GemmMicrokernelTester() 6371 .mr(2) 6372 .nr(8) 6373 .kr(8) 6374 .sr(1) 6375 .m(2) 6376 .n(n) 6377 .k(k) 6378 .cn_stride(11) 6379 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6380 } 6381 } 6382 } 6383 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,n_div_8_subtile)6384 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_subtile) { 6385 TEST_REQUIRES_ARM_NEON; 6386 for (uint32_t n = 16; n <= 24; n += 8) { 6387 for (size_t k = 1; k <= 40; k += 9) { 6388 for (uint32_t m = 1; m <= 2; m++) { 6389 GemmMicrokernelTester() 6390 .mr(2) 6391 .nr(8) 6392 .kr(8) 6393 .sr(1) 6394 .m(m) 6395 .n(n) 6396 .k(k) 6397 .iterations(1) 6398 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6399 } 6400 } 6401 } 6402 } 6403 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,small_kernel)6404 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, small_kernel) { 6405 TEST_REQUIRES_ARM_NEON; 6406 for (size_t k = 1; k <= 40; k += 9) { 6407 GemmMicrokernelTester() 6408 .mr(2) 6409 .nr(8) 6410 .kr(8) 6411 .sr(1) 6412 .m(2) 6413 .n(8) 6414 .k(k) 6415 .ks(3) 6416 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6417 } 6418 } 6419 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,small_kernel_subtile)6420 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, small_kernel_subtile) { 6421 TEST_REQUIRES_ARM_NEON; 6422 for (size_t k = 1; k <= 40; k += 9) { 6423 for (uint32_t n = 1; n <= 8; n++) { 6424 for (uint32_t m = 1; m <= 2; m++) { 6425 GemmMicrokernelTester() 6426 .mr(2) 6427 .nr(8) 6428 .kr(8) 6429 .sr(1) 6430 .m(m) 6431 .n(n) 6432 .k(k) 6433 .ks(3) 6434 .iterations(1) 6435 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6436 } 6437 } 6438 } 6439 } 6440 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,n_gt_8_small_kernel)6441 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_small_kernel) { 6442 TEST_REQUIRES_ARM_NEON; 6443 for (uint32_t n = 9; n < 16; n++) { 6444 for (size_t k = 1; k <= 40; k += 9) { 6445 GemmMicrokernelTester() 6446 .mr(2) 6447 .nr(8) 6448 .kr(8) 6449 .sr(1) 6450 .m(2) 6451 .n(n) 6452 .k(k) 6453 .ks(3) 6454 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6455 } 6456 } 6457 } 6458 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,n_div_8_small_kernel)6459 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_small_kernel) { 6460 TEST_REQUIRES_ARM_NEON; 6461 for (uint32_t n = 16; n <= 24; n += 8) { 6462 for (size_t k = 1; k <= 40; k += 9) { 6463 GemmMicrokernelTester() 6464 .mr(2) 6465 .nr(8) 6466 .kr(8) 6467 .sr(1) 6468 .m(2) 6469 .n(n) 6470 .k(k) 6471 .ks(3) 6472 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6473 } 6474 } 6475 } 6476 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,strided_cm_subtile)6477 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cm_subtile) { 6478 TEST_REQUIRES_ARM_NEON; 6479 for (size_t k = 1; k <= 40; k += 9) { 6480 for (uint32_t n = 1; n <= 8; n++) { 6481 for (uint32_t m = 1; m <= 2; m++) { 6482 GemmMicrokernelTester() 6483 .mr(2) 6484 .nr(8) 6485 .kr(8) 6486 .sr(1) 6487 .m(m) 6488 .n(n) 6489 .k(k) 6490 .cm_stride(11) 6491 .iterations(1) 6492 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6493 } 6494 } 6495 } 6496 } 6497 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,a_offset)6498 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, a_offset) { 6499 TEST_REQUIRES_ARM_NEON; 6500 for (size_t k = 1; k <= 40; k += 9) { 6501 GemmMicrokernelTester() 6502 .mr(2) 6503 .nr(8) 6504 .kr(8) 6505 .sr(1) 6506 .m(2) 6507 .n(8) 6508 .k(k) 6509 .ks(3) 6510 .a_offset(83) 6511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6512 } 6513 } 6514 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,zero)6515 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, zero) { 6516 TEST_REQUIRES_ARM_NEON; 6517 for (size_t k = 1; k <= 40; k += 9) { 6518 for (uint32_t mz = 0; mz < 2; mz++) { 6519 GemmMicrokernelTester() 6520 .mr(2) 6521 .nr(8) 6522 .kr(8) 6523 .sr(1) 6524 .m(2) 6525 .n(8) 6526 .k(k) 6527 .ks(3) 6528 .a_offset(83) 6529 .zero_index(mz) 6530 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6531 } 6532 } 6533 } 6534 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,qmin)6535 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, qmin) { 6536 TEST_REQUIRES_ARM_NEON; 6537 GemmMicrokernelTester() 6538 .mr(2) 6539 .nr(8) 6540 .kr(8) 6541 .sr(1) 6542 .m(2) 6543 .n(8) 6544 .k(8) 6545 .qmin(128) 6546 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6547 } 6548 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,qmax)6549 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, qmax) { 6550 TEST_REQUIRES_ARM_NEON; 6551 GemmMicrokernelTester() 6552 .mr(2) 6553 .nr(8) 6554 .kr(8) 6555 .sr(1) 6556 .m(2) 6557 .n(8) 6558 .k(8) 6559 .qmax(128) 6560 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6561 } 6562 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL,strided_cm)6563 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cm) { 6564 TEST_REQUIRES_ARM_NEON; 6565 GemmMicrokernelTester() 6566 .mr(2) 6567 .nr(8) 6568 .kr(8) 6569 .sr(1) 6570 .m(2) 6571 .n(8) 6572 .k(8) 6573 .cm_stride(11) 6574 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6575 } 6576 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 6577 6578 6579 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_eq_16)6580 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16) { 6581 TEST_REQUIRES_ARM_NEON; 6582 GemmMicrokernelTester() 6583 .mr(2) 6584 .nr(8) 6585 .kr(16) 6586 .sr(1) 6587 .m(2) 6588 .n(8) 6589 .k(16) 6590 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6591 } 6592 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,strided_cn)6593 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cn) { 6594 TEST_REQUIRES_ARM_NEON; 6595 GemmMicrokernelTester() 6596 .mr(2) 6597 .nr(8) 6598 .kr(16) 6599 .sr(1) 6600 .m(2) 6601 .n(8) 6602 .k(16) 6603 .cn_stride(11) 6604 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6605 } 6606 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_eq_16_subtile)6607 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile) { 6608 TEST_REQUIRES_ARM_NEON; 6609 for (uint32_t n = 1; n <= 8; n++) { 6610 for (uint32_t m = 1; m <= 2; m++) { 6611 GemmMicrokernelTester() 6612 .mr(2) 6613 .nr(8) 6614 .kr(16) 6615 .sr(1) 6616 .m(m) 6617 .n(n) 6618 .k(16) 6619 .iterations(1) 6620 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6621 } 6622 } 6623 } 6624 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_eq_16_subtile_m)6625 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile_m) { 6626 TEST_REQUIRES_ARM_NEON; 6627 for (uint32_t m = 1; m <= 2; m++) { 6628 GemmMicrokernelTester() 6629 .mr(2) 6630 .nr(8) 6631 .kr(16) 6632 .sr(1) 6633 .m(m) 6634 .n(8) 6635 .k(16) 6636 .iterations(1) 6637 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6638 } 6639 } 6640 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_eq_16_subtile_n)6641 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_eq_16_subtile_n) { 6642 TEST_REQUIRES_ARM_NEON; 6643 for (uint32_t n = 1; n <= 8; n++) { 6644 GemmMicrokernelTester() 6645 .mr(2) 6646 .nr(8) 6647 .kr(16) 6648 .sr(1) 6649 .m(2) 6650 .n(n) 6651 .k(16) 6652 .iterations(1) 6653 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6654 } 6655 } 6656 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_lt_16)6657 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_lt_16) { 6658 TEST_REQUIRES_ARM_NEON; 6659 for (size_t k = 1; k < 16; k++) { 6660 GemmMicrokernelTester() 6661 .mr(2) 6662 .nr(8) 6663 .kr(16) 6664 .sr(1) 6665 .m(2) 6666 .n(8) 6667 .k(k) 6668 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6669 } 6670 } 6671 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_lt_16_subtile)6672 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_lt_16_subtile) { 6673 TEST_REQUIRES_ARM_NEON; 6674 for (size_t k = 1; k < 16; k++) { 6675 for (uint32_t n = 1; n <= 8; n++) { 6676 for (uint32_t m = 1; m <= 2; m++) { 6677 GemmMicrokernelTester() 6678 .mr(2) 6679 .nr(8) 6680 .kr(16) 6681 .sr(1) 6682 .m(m) 6683 .n(n) 6684 .k(k) 6685 .iterations(1) 6686 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6687 } 6688 } 6689 } 6690 } 6691 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_gt_16)6692 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_gt_16) { 6693 TEST_REQUIRES_ARM_NEON; 6694 for (size_t k = 17; k < 32; k++) { 6695 GemmMicrokernelTester() 6696 .mr(2) 6697 .nr(8) 6698 .kr(16) 6699 .sr(1) 6700 .m(2) 6701 .n(8) 6702 .k(k) 6703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6704 } 6705 } 6706 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_gt_16_subtile)6707 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_gt_16_subtile) { 6708 TEST_REQUIRES_ARM_NEON; 6709 for (size_t k = 17; k < 32; k++) { 6710 for (uint32_t n = 1; n <= 8; n++) { 6711 for (uint32_t m = 1; m <= 2; m++) { 6712 GemmMicrokernelTester() 6713 .mr(2) 6714 .nr(8) 6715 .kr(16) 6716 .sr(1) 6717 .m(m) 6718 .n(n) 6719 .k(k) 6720 .iterations(1) 6721 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6722 } 6723 } 6724 } 6725 } 6726 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_div_16)6727 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_div_16) { 6728 TEST_REQUIRES_ARM_NEON; 6729 for (size_t k = 32; k <= 160; k += 16) { 6730 GemmMicrokernelTester() 6731 .mr(2) 6732 .nr(8) 6733 .kr(16) 6734 .sr(1) 6735 .m(2) 6736 .n(8) 6737 .k(k) 6738 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6739 } 6740 } 6741 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,k_div_16_subtile)6742 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, k_div_16_subtile) { 6743 TEST_REQUIRES_ARM_NEON; 6744 for (size_t k = 32; k <= 160; k += 16) { 6745 for (uint32_t n = 1; n <= 8; n++) { 6746 for (uint32_t m = 1; m <= 2; m++) { 6747 GemmMicrokernelTester() 6748 .mr(2) 6749 .nr(8) 6750 .kr(16) 6751 .sr(1) 6752 .m(m) 6753 .n(n) 6754 .k(k) 6755 .iterations(1) 6756 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6757 } 6758 } 6759 } 6760 } 6761 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,n_gt_8)6762 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8) { 6763 TEST_REQUIRES_ARM_NEON; 6764 for (uint32_t n = 9; n < 16; n++) { 6765 for (size_t k = 1; k <= 80; k += 17) { 6766 GemmMicrokernelTester() 6767 .mr(2) 6768 .nr(8) 6769 .kr(16) 6770 .sr(1) 6771 .m(2) 6772 .n(n) 6773 .k(k) 6774 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6775 } 6776 } 6777 } 6778 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,n_gt_8_strided_cn)6779 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_strided_cn) { 6780 TEST_REQUIRES_ARM_NEON; 6781 for (uint32_t n = 9; n < 16; n++) { 6782 for (size_t k = 1; k <= 80; k += 17) { 6783 GemmMicrokernelTester() 6784 .mr(2) 6785 .nr(8) 6786 .kr(16) 6787 .sr(1) 6788 .m(2) 6789 .n(n) 6790 .k(k) 6791 .cn_stride(11) 6792 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6793 } 6794 } 6795 } 6796 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,n_gt_8_subtile)6797 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_subtile) { 6798 TEST_REQUIRES_ARM_NEON; 6799 for (uint32_t n = 9; n < 16; n++) { 6800 for (size_t k = 1; k <= 80; k += 17) { 6801 for (uint32_t m = 1; m <= 2; m++) { 6802 GemmMicrokernelTester() 6803 .mr(2) 6804 .nr(8) 6805 .kr(16) 6806 .sr(1) 6807 .m(m) 6808 .n(n) 6809 .k(k) 6810 .iterations(1) 6811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6812 } 6813 } 6814 } 6815 } 6816 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,n_div_8)6817 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8) { 6818 TEST_REQUIRES_ARM_NEON; 6819 for (uint32_t n = 16; n <= 24; n += 8) { 6820 for (size_t k = 1; k <= 80; k += 17) { 6821 GemmMicrokernelTester() 6822 .mr(2) 6823 .nr(8) 6824 .kr(16) 6825 .sr(1) 6826 .m(2) 6827 .n(n) 6828 .k(k) 6829 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6830 } 6831 } 6832 } 6833 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,n_div_8_strided_cn)6834 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_strided_cn) { 6835 TEST_REQUIRES_ARM_NEON; 6836 for (uint32_t n = 16; n <= 24; n += 8) { 6837 for (size_t k = 1; k <= 80; k += 17) { 6838 GemmMicrokernelTester() 6839 .mr(2) 6840 .nr(8) 6841 .kr(16) 6842 .sr(1) 6843 .m(2) 6844 .n(n) 6845 .k(k) 6846 .cn_stride(11) 6847 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6848 } 6849 } 6850 } 6851 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,n_div_8_subtile)6852 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_subtile) { 6853 TEST_REQUIRES_ARM_NEON; 6854 for (uint32_t n = 16; n <= 24; n += 8) { 6855 for (size_t k = 1; k <= 80; k += 17) { 6856 for (uint32_t m = 1; m <= 2; m++) { 6857 GemmMicrokernelTester() 6858 .mr(2) 6859 .nr(8) 6860 .kr(16) 6861 .sr(1) 6862 .m(m) 6863 .n(n) 6864 .k(k) 6865 .iterations(1) 6866 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6867 } 6868 } 6869 } 6870 } 6871 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,small_kernel)6872 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, small_kernel) { 6873 TEST_REQUIRES_ARM_NEON; 6874 for (size_t k = 1; k <= 80; k += 17) { 6875 GemmMicrokernelTester() 6876 .mr(2) 6877 .nr(8) 6878 .kr(16) 6879 .sr(1) 6880 .m(2) 6881 .n(8) 6882 .k(k) 6883 .ks(3) 6884 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6885 } 6886 } 6887 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,small_kernel_subtile)6888 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, small_kernel_subtile) { 6889 TEST_REQUIRES_ARM_NEON; 6890 for (size_t k = 1; k <= 80; k += 17) { 6891 for (uint32_t n = 1; n <= 8; n++) { 6892 for (uint32_t m = 1; m <= 2; m++) { 6893 GemmMicrokernelTester() 6894 .mr(2) 6895 .nr(8) 6896 .kr(16) 6897 .sr(1) 6898 .m(m) 6899 .n(n) 6900 .k(k) 6901 .ks(3) 6902 .iterations(1) 6903 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6904 } 6905 } 6906 } 6907 } 6908 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,n_gt_8_small_kernel)6909 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_gt_8_small_kernel) { 6910 TEST_REQUIRES_ARM_NEON; 6911 for (uint32_t n = 9; n < 16; n++) { 6912 for (size_t k = 1; k <= 80; k += 17) { 6913 GemmMicrokernelTester() 6914 .mr(2) 6915 .nr(8) 6916 .kr(16) 6917 .sr(1) 6918 .m(2) 6919 .n(n) 6920 .k(k) 6921 .ks(3) 6922 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6923 } 6924 } 6925 } 6926 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,n_div_8_small_kernel)6927 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, n_div_8_small_kernel) { 6928 TEST_REQUIRES_ARM_NEON; 6929 for (uint32_t n = 16; n <= 24; n += 8) { 6930 for (size_t k = 1; k <= 80; k += 17) { 6931 GemmMicrokernelTester() 6932 .mr(2) 6933 .nr(8) 6934 .kr(16) 6935 .sr(1) 6936 .m(2) 6937 .n(n) 6938 .k(k) 6939 .ks(3) 6940 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6941 } 6942 } 6943 } 6944 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,strided_cm_subtile)6945 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cm_subtile) { 6946 TEST_REQUIRES_ARM_NEON; 6947 for (size_t k = 1; k <= 80; k += 17) { 6948 for (uint32_t n = 1; n <= 8; n++) { 6949 for (uint32_t m = 1; m <= 2; m++) { 6950 GemmMicrokernelTester() 6951 .mr(2) 6952 .nr(8) 6953 .kr(16) 6954 .sr(1) 6955 .m(m) 6956 .n(n) 6957 .k(k) 6958 .cm_stride(11) 6959 .iterations(1) 6960 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6961 } 6962 } 6963 } 6964 } 6965 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,a_offset)6966 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, a_offset) { 6967 TEST_REQUIRES_ARM_NEON; 6968 for (size_t k = 1; k <= 80; k += 17) { 6969 GemmMicrokernelTester() 6970 .mr(2) 6971 .nr(8) 6972 .kr(16) 6973 .sr(1) 6974 .m(2) 6975 .n(8) 6976 .k(k) 6977 .ks(3) 6978 .a_offset(163) 6979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6980 } 6981 } 6982 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,zero)6983 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, zero) { 6984 TEST_REQUIRES_ARM_NEON; 6985 for (size_t k = 1; k <= 80; k += 17) { 6986 for (uint32_t mz = 0; mz < 2; mz++) { 6987 GemmMicrokernelTester() 6988 .mr(2) 6989 .nr(8) 6990 .kr(16) 6991 .sr(1) 6992 .m(2) 6993 .n(8) 6994 .k(k) 6995 .ks(3) 6996 .a_offset(163) 6997 .zero_index(mz) 6998 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 6999 } 7000 } 7001 } 7002 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,qmin)7003 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, qmin) { 7004 TEST_REQUIRES_ARM_NEON; 7005 GemmMicrokernelTester() 7006 .mr(2) 7007 .nr(8) 7008 .kr(16) 7009 .sr(1) 7010 .m(2) 7011 .n(8) 7012 .k(16) 7013 .qmin(128) 7014 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7015 } 7016 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,qmax)7017 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, qmax) { 7018 TEST_REQUIRES_ARM_NEON; 7019 GemmMicrokernelTester() 7020 .mr(2) 7021 .nr(8) 7022 .kr(16) 7023 .sr(1) 7024 .m(2) 7025 .n(8) 7026 .k(16) 7027 .qmax(128) 7028 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7029 } 7030 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL,strided_cm)7031 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C16__NEON_MLAL, strided_cm) { 7032 TEST_REQUIRES_ARM_NEON; 7033 GemmMicrokernelTester() 7034 .mr(2) 7035 .nr(8) 7036 .kr(16) 7037 .sr(1) 7038 .m(2) 7039 .n(8) 7040 .k(16) 7041 .cm_stride(11) 7042 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7043 } 7044 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7045 7046 7047 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8)7048 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8) { 7049 TEST_REQUIRES_ARM_NEON; 7050 GemmMicrokernelTester() 7051 .mr(2) 7052 .nr(16) 7053 .kr(1) 7054 .sr(1) 7055 .m(2) 7056 .n(16) 7057 .k(8) 7058 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7059 } 7060 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cn)7061 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cn) { 7062 TEST_REQUIRES_ARM_NEON; 7063 GemmMicrokernelTester() 7064 .mr(2) 7065 .nr(16) 7066 .kr(1) 7067 .sr(1) 7068 .m(2) 7069 .n(16) 7070 .k(8) 7071 .cn_stride(19) 7072 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7073 } 7074 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile)7075 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile) { 7076 TEST_REQUIRES_ARM_NEON; 7077 for (uint32_t n = 1; n <= 16; n++) { 7078 for (uint32_t m = 1; m <= 2; m++) { 7079 GemmMicrokernelTester() 7080 .mr(2) 7081 .nr(16) 7082 .kr(1) 7083 .sr(1) 7084 .m(m) 7085 .n(n) 7086 .k(8) 7087 .iterations(1) 7088 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7089 } 7090 } 7091 } 7092 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_m)7093 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) { 7094 TEST_REQUIRES_ARM_NEON; 7095 for (uint32_t m = 1; m <= 2; m++) { 7096 GemmMicrokernelTester() 7097 .mr(2) 7098 .nr(16) 7099 .kr(1) 7100 .sr(1) 7101 .m(m) 7102 .n(16) 7103 .k(8) 7104 .iterations(1) 7105 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7106 } 7107 } 7108 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_eq_8_subtile_n)7109 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) { 7110 TEST_REQUIRES_ARM_NEON; 7111 for (uint32_t n = 1; n <= 16; n++) { 7112 GemmMicrokernelTester() 7113 .mr(2) 7114 .nr(16) 7115 .kr(1) 7116 .sr(1) 7117 .m(2) 7118 .n(n) 7119 .k(8) 7120 .iterations(1) 7121 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7122 } 7123 } 7124 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8)7125 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8) { 7126 TEST_REQUIRES_ARM_NEON; 7127 for (size_t k = 1; k < 8; k++) { 7128 GemmMicrokernelTester() 7129 .mr(2) 7130 .nr(16) 7131 .kr(1) 7132 .sr(1) 7133 .m(2) 7134 .n(16) 7135 .k(k) 7136 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7137 } 7138 } 7139 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_lt_8_subtile)7140 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_lt_8_subtile) { 7141 TEST_REQUIRES_ARM_NEON; 7142 for (size_t k = 1; k < 8; k++) { 7143 for (uint32_t n = 1; n <= 16; n++) { 7144 for (uint32_t m = 1; m <= 2; m++) { 7145 GemmMicrokernelTester() 7146 .mr(2) 7147 .nr(16) 7148 .kr(1) 7149 .sr(1) 7150 .m(m) 7151 .n(n) 7152 .k(k) 7153 .iterations(1) 7154 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7155 } 7156 } 7157 } 7158 } 7159 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8)7160 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8) { 7161 TEST_REQUIRES_ARM_NEON; 7162 for (size_t k = 9; k < 16; k++) { 7163 GemmMicrokernelTester() 7164 .mr(2) 7165 .nr(16) 7166 .kr(1) 7167 .sr(1) 7168 .m(2) 7169 .n(16) 7170 .k(k) 7171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7172 } 7173 } 7174 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_gt_8_subtile)7175 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_gt_8_subtile) { 7176 TEST_REQUIRES_ARM_NEON; 7177 for (size_t k = 9; k < 16; k++) { 7178 for (uint32_t n = 1; n <= 16; n++) { 7179 for (uint32_t m = 1; m <= 2; m++) { 7180 GemmMicrokernelTester() 7181 .mr(2) 7182 .nr(16) 7183 .kr(1) 7184 .sr(1) 7185 .m(m) 7186 .n(n) 7187 .k(k) 7188 .iterations(1) 7189 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7190 } 7191 } 7192 } 7193 } 7194 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8)7195 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8) { 7196 TEST_REQUIRES_ARM_NEON; 7197 for (size_t k = 16; k <= 80; k += 8) { 7198 GemmMicrokernelTester() 7199 .mr(2) 7200 .nr(16) 7201 .kr(1) 7202 .sr(1) 7203 .m(2) 7204 .n(16) 7205 .k(k) 7206 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7207 } 7208 } 7209 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,k_div_8_subtile)7210 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, k_div_8_subtile) { 7211 TEST_REQUIRES_ARM_NEON; 7212 for (size_t k = 16; k <= 80; k += 8) { 7213 for (uint32_t n = 1; n <= 16; n++) { 7214 for (uint32_t m = 1; m <= 2; m++) { 7215 GemmMicrokernelTester() 7216 .mr(2) 7217 .nr(16) 7218 .kr(1) 7219 .sr(1) 7220 .m(m) 7221 .n(n) 7222 .k(k) 7223 .iterations(1) 7224 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7225 } 7226 } 7227 } 7228 } 7229 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16)7230 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16) { 7231 TEST_REQUIRES_ARM_NEON; 7232 for (uint32_t n = 17; n < 32; n++) { 7233 for (size_t k = 1; k <= 40; k += 9) { 7234 GemmMicrokernelTester() 7235 .mr(2) 7236 .nr(16) 7237 .kr(1) 7238 .sr(1) 7239 .m(2) 7240 .n(n) 7241 .k(k) 7242 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7243 } 7244 } 7245 } 7246 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_strided_cn)7247 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) { 7248 TEST_REQUIRES_ARM_NEON; 7249 for (uint32_t n = 17; n < 32; n++) { 7250 for (size_t k = 1; k <= 40; k += 9) { 7251 GemmMicrokernelTester() 7252 .mr(2) 7253 .nr(16) 7254 .kr(1) 7255 .sr(1) 7256 .m(2) 7257 .n(n) 7258 .k(k) 7259 .cn_stride(19) 7260 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7261 } 7262 } 7263 } 7264 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_subtile)7265 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_subtile) { 7266 TEST_REQUIRES_ARM_NEON; 7267 for (uint32_t n = 17; n < 32; n++) { 7268 for (size_t k = 1; k <= 40; k += 9) { 7269 for (uint32_t m = 1; m <= 2; m++) { 7270 GemmMicrokernelTester() 7271 .mr(2) 7272 .nr(16) 7273 .kr(1) 7274 .sr(1) 7275 .m(m) 7276 .n(n) 7277 .k(k) 7278 .iterations(1) 7279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7280 } 7281 } 7282 } 7283 } 7284 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16)7285 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16) { 7286 TEST_REQUIRES_ARM_NEON; 7287 for (uint32_t n = 32; n <= 48; n += 16) { 7288 for (size_t k = 1; k <= 40; k += 9) { 7289 GemmMicrokernelTester() 7290 .mr(2) 7291 .nr(16) 7292 .kr(1) 7293 .sr(1) 7294 .m(2) 7295 .n(n) 7296 .k(k) 7297 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7298 } 7299 } 7300 } 7301 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_strided_cn)7302 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) { 7303 TEST_REQUIRES_ARM_NEON; 7304 for (uint32_t n = 32; n <= 48; n += 16) { 7305 for (size_t k = 1; k <= 40; k += 9) { 7306 GemmMicrokernelTester() 7307 .mr(2) 7308 .nr(16) 7309 .kr(1) 7310 .sr(1) 7311 .m(2) 7312 .n(n) 7313 .k(k) 7314 .cn_stride(19) 7315 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7316 } 7317 } 7318 } 7319 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_subtile)7320 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_subtile) { 7321 TEST_REQUIRES_ARM_NEON; 7322 for (uint32_t n = 32; n <= 48; n += 16) { 7323 for (size_t k = 1; k <= 40; k += 9) { 7324 for (uint32_t m = 1; m <= 2; m++) { 7325 GemmMicrokernelTester() 7326 .mr(2) 7327 .nr(16) 7328 .kr(1) 7329 .sr(1) 7330 .m(m) 7331 .n(n) 7332 .k(k) 7333 .iterations(1) 7334 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7335 } 7336 } 7337 } 7338 } 7339 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,small_kernel)7340 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, small_kernel) { 7341 TEST_REQUIRES_ARM_NEON; 7342 for (size_t k = 1; k <= 40; k += 9) { 7343 GemmMicrokernelTester() 7344 .mr(2) 7345 .nr(16) 7346 .kr(1) 7347 .sr(1) 7348 .m(2) 7349 .n(16) 7350 .k(k) 7351 .ks(3) 7352 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7353 } 7354 } 7355 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,small_kernel_subtile)7356 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, small_kernel_subtile) { 7357 TEST_REQUIRES_ARM_NEON; 7358 for (size_t k = 1; k <= 40; k += 9) { 7359 for (uint32_t n = 1; n <= 16; n++) { 7360 for (uint32_t m = 1; m <= 2; m++) { 7361 GemmMicrokernelTester() 7362 .mr(2) 7363 .nr(16) 7364 .kr(1) 7365 .sr(1) 7366 .m(m) 7367 .n(n) 7368 .k(k) 7369 .ks(3) 7370 .iterations(1) 7371 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7372 } 7373 } 7374 } 7375 } 7376 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_gt_16_small_kernel)7377 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_gt_16_small_kernel) { 7378 TEST_REQUIRES_ARM_NEON; 7379 for (uint32_t n = 17; n < 32; n++) { 7380 for (size_t k = 1; k <= 40; k += 9) { 7381 GemmMicrokernelTester() 7382 .mr(2) 7383 .nr(16) 7384 .kr(1) 7385 .sr(1) 7386 .m(2) 7387 .n(n) 7388 .k(k) 7389 .ks(3) 7390 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7391 } 7392 } 7393 } 7394 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,n_div_16_small_kernel)7395 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, n_div_16_small_kernel) { 7396 TEST_REQUIRES_ARM_NEON; 7397 for (uint32_t n = 32; n <= 48; n += 16) { 7398 for (size_t k = 1; k <= 40; k += 9) { 7399 GemmMicrokernelTester() 7400 .mr(2) 7401 .nr(16) 7402 .kr(1) 7403 .sr(1) 7404 .m(2) 7405 .n(n) 7406 .k(k) 7407 .ks(3) 7408 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7409 } 7410 } 7411 } 7412 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm_subtile)7413 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm_subtile) { 7414 TEST_REQUIRES_ARM_NEON; 7415 for (size_t k = 1; k <= 40; k += 9) { 7416 for (uint32_t n = 1; n <= 16; n++) { 7417 for (uint32_t m = 1; m <= 2; m++) { 7418 GemmMicrokernelTester() 7419 .mr(2) 7420 .nr(16) 7421 .kr(1) 7422 .sr(1) 7423 .m(m) 7424 .n(n) 7425 .k(k) 7426 .cm_stride(19) 7427 .iterations(1) 7428 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7429 } 7430 } 7431 } 7432 } 7433 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,a_offset)7434 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, a_offset) { 7435 TEST_REQUIRES_ARM_NEON; 7436 for (size_t k = 1; k <= 40; k += 9) { 7437 GemmMicrokernelTester() 7438 .mr(2) 7439 .nr(16) 7440 .kr(1) 7441 .sr(1) 7442 .m(2) 7443 .n(16) 7444 .k(k) 7445 .ks(3) 7446 .a_offset(83) 7447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7448 } 7449 } 7450 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,zero)7451 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, zero) { 7452 TEST_REQUIRES_ARM_NEON; 7453 for (size_t k = 1; k <= 40; k += 9) { 7454 for (uint32_t mz = 0; mz < 2; mz++) { 7455 GemmMicrokernelTester() 7456 .mr(2) 7457 .nr(16) 7458 .kr(1) 7459 .sr(1) 7460 .m(2) 7461 .n(16) 7462 .k(k) 7463 .ks(3) 7464 .a_offset(83) 7465 .zero_index(mz) 7466 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7467 } 7468 } 7469 } 7470 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmin)7471 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmin) { 7472 TEST_REQUIRES_ARM_NEON; 7473 GemmMicrokernelTester() 7474 .mr(2) 7475 .nr(16) 7476 .kr(1) 7477 .sr(1) 7478 .m(2) 7479 .n(16) 7480 .k(8) 7481 .qmin(128) 7482 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7483 } 7484 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,qmax)7485 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, qmax) { 7486 TEST_REQUIRES_ARM_NEON; 7487 GemmMicrokernelTester() 7488 .mr(2) 7489 .nr(16) 7490 .kr(1) 7491 .sr(1) 7492 .m(2) 7493 .n(16) 7494 .k(8) 7495 .qmax(128) 7496 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7497 } 7498 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE,strided_cm)7499 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16__NEON_MLAL_LANE, strided_cm) { 7500 TEST_REQUIRES_ARM_NEON; 7501 GemmMicrokernelTester() 7502 .mr(2) 7503 .nr(16) 7504 .kr(1) 7505 .sr(1) 7506 .m(2) 7507 .n(16) 7508 .k(8) 7509 .cm_stride(19) 7510 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7511 } 7512 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7513 7514 7515 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_eq_16)7516 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16) { 7517 TEST_REQUIRES_ARM_NEON; 7518 GemmMicrokernelTester() 7519 .mr(2) 7520 .nr(16) 7521 .kr(2) 7522 .sr(1) 7523 .m(2) 7524 .n(16) 7525 .k(16) 7526 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7527 } 7528 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,strided_cn)7529 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, strided_cn) { 7530 TEST_REQUIRES_ARM_NEON; 7531 GemmMicrokernelTester() 7532 .mr(2) 7533 .nr(16) 7534 .kr(2) 7535 .sr(1) 7536 .m(2) 7537 .n(16) 7538 .k(16) 7539 .cn_stride(19) 7540 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7541 } 7542 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_eq_16_subtile)7543 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 7544 TEST_REQUIRES_ARM_NEON; 7545 for (uint32_t n = 1; n <= 16; n++) { 7546 for (uint32_t m = 1; m <= 2; m++) { 7547 GemmMicrokernelTester() 7548 .mr(2) 7549 .nr(16) 7550 .kr(2) 7551 .sr(1) 7552 .m(m) 7553 .n(n) 7554 .k(16) 7555 .iterations(1) 7556 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7557 } 7558 } 7559 } 7560 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)7561 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 7562 TEST_REQUIRES_ARM_NEON; 7563 for (uint32_t m = 1; m <= 2; m++) { 7564 GemmMicrokernelTester() 7565 .mr(2) 7566 .nr(16) 7567 .kr(2) 7568 .sr(1) 7569 .m(m) 7570 .n(16) 7571 .k(16) 7572 .iterations(1) 7573 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7574 } 7575 } 7576 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)7577 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 7578 TEST_REQUIRES_ARM_NEON; 7579 for (uint32_t n = 1; n <= 16; n++) { 7580 GemmMicrokernelTester() 7581 .mr(2) 7582 .nr(16) 7583 .kr(2) 7584 .sr(1) 7585 .m(2) 7586 .n(n) 7587 .k(16) 7588 .iterations(1) 7589 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7590 } 7591 } 7592 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_lt_16)7593 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_lt_16) { 7594 TEST_REQUIRES_ARM_NEON; 7595 for (size_t k = 1; k < 16; k++) { 7596 GemmMicrokernelTester() 7597 .mr(2) 7598 .nr(16) 7599 .kr(2) 7600 .sr(1) 7601 .m(2) 7602 .n(16) 7603 .k(k) 7604 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7605 } 7606 } 7607 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_lt_16_subtile)7608 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 7609 TEST_REQUIRES_ARM_NEON; 7610 for (size_t k = 1; k < 16; k++) { 7611 for (uint32_t n = 1; n <= 16; n++) { 7612 for (uint32_t m = 1; m <= 2; m++) { 7613 GemmMicrokernelTester() 7614 .mr(2) 7615 .nr(16) 7616 .kr(2) 7617 .sr(1) 7618 .m(m) 7619 .n(n) 7620 .k(k) 7621 .iterations(1) 7622 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7623 } 7624 } 7625 } 7626 } 7627 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_gt_16)7628 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_gt_16) { 7629 TEST_REQUIRES_ARM_NEON; 7630 for (size_t k = 17; k < 32; k++) { 7631 GemmMicrokernelTester() 7632 .mr(2) 7633 .nr(16) 7634 .kr(2) 7635 .sr(1) 7636 .m(2) 7637 .n(16) 7638 .k(k) 7639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7640 } 7641 } 7642 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_gt_16_subtile)7643 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 7644 TEST_REQUIRES_ARM_NEON; 7645 for (size_t k = 17; k < 32; k++) { 7646 for (uint32_t n = 1; n <= 16; n++) { 7647 for (uint32_t m = 1; m <= 2; m++) { 7648 GemmMicrokernelTester() 7649 .mr(2) 7650 .nr(16) 7651 .kr(2) 7652 .sr(1) 7653 .m(m) 7654 .n(n) 7655 .k(k) 7656 .iterations(1) 7657 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7658 } 7659 } 7660 } 7661 } 7662 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_div_16)7663 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_div_16) { 7664 TEST_REQUIRES_ARM_NEON; 7665 for (size_t k = 32; k <= 160; k += 16) { 7666 GemmMicrokernelTester() 7667 .mr(2) 7668 .nr(16) 7669 .kr(2) 7670 .sr(1) 7671 .m(2) 7672 .n(16) 7673 .k(k) 7674 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7675 } 7676 } 7677 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,k_div_16_subtile)7678 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, k_div_16_subtile) { 7679 TEST_REQUIRES_ARM_NEON; 7680 for (size_t k = 32; k <= 160; k += 16) { 7681 for (uint32_t n = 1; n <= 16; n++) { 7682 for (uint32_t m = 1; m <= 2; m++) { 7683 GemmMicrokernelTester() 7684 .mr(2) 7685 .nr(16) 7686 .kr(2) 7687 .sr(1) 7688 .m(m) 7689 .n(n) 7690 .k(k) 7691 .iterations(1) 7692 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7693 } 7694 } 7695 } 7696 } 7697 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,n_gt_16)7698 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_gt_16) { 7699 TEST_REQUIRES_ARM_NEON; 7700 for (uint32_t n = 17; n < 32; n++) { 7701 for (size_t k = 1; k <= 80; k += 17) { 7702 GemmMicrokernelTester() 7703 .mr(2) 7704 .nr(16) 7705 .kr(2) 7706 .sr(1) 7707 .m(2) 7708 .n(n) 7709 .k(k) 7710 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7711 } 7712 } 7713 } 7714 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,n_gt_16_strided_cn)7715 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 7716 TEST_REQUIRES_ARM_NEON; 7717 for (uint32_t n = 17; n < 32; n++) { 7718 for (size_t k = 1; k <= 80; k += 17) { 7719 GemmMicrokernelTester() 7720 .mr(2) 7721 .nr(16) 7722 .kr(2) 7723 .sr(1) 7724 .m(2) 7725 .n(n) 7726 .k(k) 7727 .cn_stride(19) 7728 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7729 } 7730 } 7731 } 7732 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,n_gt_16_subtile)7733 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) { 7734 TEST_REQUIRES_ARM_NEON; 7735 for (uint32_t n = 17; n < 32; n++) { 7736 for (size_t k = 1; k <= 80; k += 17) { 7737 for (uint32_t m = 1; m <= 2; m++) { 7738 GemmMicrokernelTester() 7739 .mr(2) 7740 .nr(16) 7741 .kr(2) 7742 .sr(1) 7743 .m(m) 7744 .n(n) 7745 .k(k) 7746 .iterations(1) 7747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7748 } 7749 } 7750 } 7751 } 7752 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,n_div_16)7753 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_div_16) { 7754 TEST_REQUIRES_ARM_NEON; 7755 for (uint32_t n = 32; n <= 48; n += 16) { 7756 for (size_t k = 1; k <= 80; k += 17) { 7757 GemmMicrokernelTester() 7758 .mr(2) 7759 .nr(16) 7760 .kr(2) 7761 .sr(1) 7762 .m(2) 7763 .n(n) 7764 .k(k) 7765 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7766 } 7767 } 7768 } 7769 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,n_div_16_strided_cn)7770 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) { 7771 TEST_REQUIRES_ARM_NEON; 7772 for (uint32_t n = 32; n <= 48; n += 16) { 7773 for (size_t k = 1; k <= 80; k += 17) { 7774 GemmMicrokernelTester() 7775 .mr(2) 7776 .nr(16) 7777 .kr(2) 7778 .sr(1) 7779 .m(2) 7780 .n(n) 7781 .k(k) 7782 .cn_stride(19) 7783 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7784 } 7785 } 7786 } 7787 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,n_div_16_subtile)7788 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_div_16_subtile) { 7789 TEST_REQUIRES_ARM_NEON; 7790 for (uint32_t n = 32; n <= 48; n += 16) { 7791 for (size_t k = 1; k <= 80; k += 17) { 7792 for (uint32_t m = 1; m <= 2; m++) { 7793 GemmMicrokernelTester() 7794 .mr(2) 7795 .nr(16) 7796 .kr(2) 7797 .sr(1) 7798 .m(m) 7799 .n(n) 7800 .k(k) 7801 .iterations(1) 7802 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7803 } 7804 } 7805 } 7806 } 7807 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,small_kernel)7808 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, small_kernel) { 7809 TEST_REQUIRES_ARM_NEON; 7810 for (size_t k = 1; k <= 80; k += 17) { 7811 GemmMicrokernelTester() 7812 .mr(2) 7813 .nr(16) 7814 .kr(2) 7815 .sr(1) 7816 .m(2) 7817 .n(16) 7818 .k(k) 7819 .ks(3) 7820 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7821 } 7822 } 7823 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,small_kernel_subtile)7824 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, small_kernel_subtile) { 7825 TEST_REQUIRES_ARM_NEON; 7826 for (size_t k = 1; k <= 80; k += 17) { 7827 for (uint32_t n = 1; n <= 16; n++) { 7828 for (uint32_t m = 1; m <= 2; m++) { 7829 GemmMicrokernelTester() 7830 .mr(2) 7831 .nr(16) 7832 .kr(2) 7833 .sr(1) 7834 .m(m) 7835 .n(n) 7836 .k(k) 7837 .ks(3) 7838 .iterations(1) 7839 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7840 } 7841 } 7842 } 7843 } 7844 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,n_gt_16_small_kernel)7845 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_gt_16_small_kernel) { 7846 TEST_REQUIRES_ARM_NEON; 7847 for (uint32_t n = 17; n < 32; n++) { 7848 for (size_t k = 1; k <= 80; k += 17) { 7849 GemmMicrokernelTester() 7850 .mr(2) 7851 .nr(16) 7852 .kr(2) 7853 .sr(1) 7854 .m(2) 7855 .n(n) 7856 .k(k) 7857 .ks(3) 7858 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7859 } 7860 } 7861 } 7862 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,n_div_16_small_kernel)7863 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, n_div_16_small_kernel) { 7864 TEST_REQUIRES_ARM_NEON; 7865 for (uint32_t n = 32; n <= 48; n += 16) { 7866 for (size_t k = 1; k <= 80; k += 17) { 7867 GemmMicrokernelTester() 7868 .mr(2) 7869 .nr(16) 7870 .kr(2) 7871 .sr(1) 7872 .m(2) 7873 .n(n) 7874 .k(k) 7875 .ks(3) 7876 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7877 } 7878 } 7879 } 7880 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,strided_cm_subtile)7881 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, strided_cm_subtile) { 7882 TEST_REQUIRES_ARM_NEON; 7883 for (size_t k = 1; k <= 80; k += 17) { 7884 for (uint32_t n = 1; n <= 16; n++) { 7885 for (uint32_t m = 1; m <= 2; m++) { 7886 GemmMicrokernelTester() 7887 .mr(2) 7888 .nr(16) 7889 .kr(2) 7890 .sr(1) 7891 .m(m) 7892 .n(n) 7893 .k(k) 7894 .cm_stride(19) 7895 .iterations(1) 7896 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7897 } 7898 } 7899 } 7900 } 7901 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,a_offset)7902 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, a_offset) { 7903 TEST_REQUIRES_ARM_NEON; 7904 for (size_t k = 1; k <= 80; k += 17) { 7905 GemmMicrokernelTester() 7906 .mr(2) 7907 .nr(16) 7908 .kr(2) 7909 .sr(1) 7910 .m(2) 7911 .n(16) 7912 .k(k) 7913 .ks(3) 7914 .a_offset(163) 7915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7916 } 7917 } 7918 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,zero)7919 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, zero) { 7920 TEST_REQUIRES_ARM_NEON; 7921 for (size_t k = 1; k <= 80; k += 17) { 7922 for (uint32_t mz = 0; mz < 2; mz++) { 7923 GemmMicrokernelTester() 7924 .mr(2) 7925 .nr(16) 7926 .kr(2) 7927 .sr(1) 7928 .m(2) 7929 .n(16) 7930 .k(k) 7931 .ks(3) 7932 .a_offset(163) 7933 .zero_index(mz) 7934 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7935 } 7936 } 7937 } 7938 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,qmin)7939 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, qmin) { 7940 TEST_REQUIRES_ARM_NEON; 7941 GemmMicrokernelTester() 7942 .mr(2) 7943 .nr(16) 7944 .kr(2) 7945 .sr(1) 7946 .m(2) 7947 .n(16) 7948 .k(16) 7949 .qmin(128) 7950 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7951 } 7952 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,qmax)7953 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, qmax) { 7954 TEST_REQUIRES_ARM_NEON; 7955 GemmMicrokernelTester() 7956 .mr(2) 7957 .nr(16) 7958 .kr(2) 7959 .sr(1) 7960 .m(2) 7961 .n(16) 7962 .k(16) 7963 .qmax(128) 7964 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7965 } 7966 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R,strided_cm)7967 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD2R, strided_cm) { 7968 TEST_REQUIRES_ARM_NEON; 7969 GemmMicrokernelTester() 7970 .mr(2) 7971 .nr(16) 7972 .kr(2) 7973 .sr(1) 7974 .m(2) 7975 .n(16) 7976 .k(16) 7977 .cm_stride(19) 7978 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7979 } 7980 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 7981 7982 7983 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_eq_8)7984 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_eq_8) { 7985 TEST_REQUIRES_ARM_NEON; 7986 GemmMicrokernelTester() 7987 .mr(2) 7988 .nr(16) 7989 .kr(2) 7990 .sr(1) 7991 .m(2) 7992 .n(16) 7993 .k(8) 7994 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 7995 } 7996 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,strided_cn)7997 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, strided_cn) { 7998 TEST_REQUIRES_ARM_NEON; 7999 GemmMicrokernelTester() 8000 .mr(2) 8001 .nr(16) 8002 .kr(2) 8003 .sr(1) 8004 .m(2) 8005 .n(16) 8006 .k(8) 8007 .cn_stride(19) 8008 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8009 } 8010 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_eq_8_subtile)8011 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_eq_8_subtile) { 8012 TEST_REQUIRES_ARM_NEON; 8013 for (uint32_t n = 1; n <= 16; n++) { 8014 for (uint32_t m = 1; m <= 2; m++) { 8015 GemmMicrokernelTester() 8016 .mr(2) 8017 .nr(16) 8018 .kr(2) 8019 .sr(1) 8020 .m(m) 8021 .n(n) 8022 .k(8) 8023 .iterations(1) 8024 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8025 } 8026 } 8027 } 8028 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_eq_8_subtile_m)8029 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) { 8030 TEST_REQUIRES_ARM_NEON; 8031 for (uint32_t m = 1; m <= 2; m++) { 8032 GemmMicrokernelTester() 8033 .mr(2) 8034 .nr(16) 8035 .kr(2) 8036 .sr(1) 8037 .m(m) 8038 .n(16) 8039 .k(8) 8040 .iterations(1) 8041 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8042 } 8043 } 8044 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_eq_8_subtile_n)8045 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) { 8046 TEST_REQUIRES_ARM_NEON; 8047 for (uint32_t n = 1; n <= 16; n++) { 8048 GemmMicrokernelTester() 8049 .mr(2) 8050 .nr(16) 8051 .kr(2) 8052 .sr(1) 8053 .m(2) 8054 .n(n) 8055 .k(8) 8056 .iterations(1) 8057 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8058 } 8059 } 8060 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_lt_8)8061 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_lt_8) { 8062 TEST_REQUIRES_ARM_NEON; 8063 for (size_t k = 1; k < 8; k++) { 8064 GemmMicrokernelTester() 8065 .mr(2) 8066 .nr(16) 8067 .kr(2) 8068 .sr(1) 8069 .m(2) 8070 .n(16) 8071 .k(k) 8072 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8073 } 8074 } 8075 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_lt_8_subtile)8076 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_lt_8_subtile) { 8077 TEST_REQUIRES_ARM_NEON; 8078 for (size_t k = 1; k < 8; k++) { 8079 for (uint32_t n = 1; n <= 16; n++) { 8080 for (uint32_t m = 1; m <= 2; m++) { 8081 GemmMicrokernelTester() 8082 .mr(2) 8083 .nr(16) 8084 .kr(2) 8085 .sr(1) 8086 .m(m) 8087 .n(n) 8088 .k(k) 8089 .iterations(1) 8090 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8091 } 8092 } 8093 } 8094 } 8095 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_gt_8)8096 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_gt_8) { 8097 TEST_REQUIRES_ARM_NEON; 8098 for (size_t k = 9; k < 16; k++) { 8099 GemmMicrokernelTester() 8100 .mr(2) 8101 .nr(16) 8102 .kr(2) 8103 .sr(1) 8104 .m(2) 8105 .n(16) 8106 .k(k) 8107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8108 } 8109 } 8110 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_gt_8_subtile)8111 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_gt_8_subtile) { 8112 TEST_REQUIRES_ARM_NEON; 8113 for (size_t k = 9; k < 16; k++) { 8114 for (uint32_t n = 1; n <= 16; n++) { 8115 for (uint32_t m = 1; m <= 2; m++) { 8116 GemmMicrokernelTester() 8117 .mr(2) 8118 .nr(16) 8119 .kr(2) 8120 .sr(1) 8121 .m(m) 8122 .n(n) 8123 .k(k) 8124 .iterations(1) 8125 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8126 } 8127 } 8128 } 8129 } 8130 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_div_8)8131 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_div_8) { 8132 TEST_REQUIRES_ARM_NEON; 8133 for (size_t k = 16; k <= 80; k += 8) { 8134 GemmMicrokernelTester() 8135 .mr(2) 8136 .nr(16) 8137 .kr(2) 8138 .sr(1) 8139 .m(2) 8140 .n(16) 8141 .k(k) 8142 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8143 } 8144 } 8145 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,k_div_8_subtile)8146 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, k_div_8_subtile) { 8147 TEST_REQUIRES_ARM_NEON; 8148 for (size_t k = 16; k <= 80; k += 8) { 8149 for (uint32_t n = 1; n <= 16; n++) { 8150 for (uint32_t m = 1; m <= 2; m++) { 8151 GemmMicrokernelTester() 8152 .mr(2) 8153 .nr(16) 8154 .kr(2) 8155 .sr(1) 8156 .m(m) 8157 .n(n) 8158 .k(k) 8159 .iterations(1) 8160 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8161 } 8162 } 8163 } 8164 } 8165 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,n_gt_16)8166 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_gt_16) { 8167 TEST_REQUIRES_ARM_NEON; 8168 for (uint32_t n = 17; n < 32; n++) { 8169 for (size_t k = 1; k <= 40; k += 9) { 8170 GemmMicrokernelTester() 8171 .mr(2) 8172 .nr(16) 8173 .kr(2) 8174 .sr(1) 8175 .m(2) 8176 .n(n) 8177 .k(k) 8178 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8179 } 8180 } 8181 } 8182 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,n_gt_16_strided_cn)8183 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) { 8184 TEST_REQUIRES_ARM_NEON; 8185 for (uint32_t n = 17; n < 32; n++) { 8186 for (size_t k = 1; k <= 40; k += 9) { 8187 GemmMicrokernelTester() 8188 .mr(2) 8189 .nr(16) 8190 .kr(2) 8191 .sr(1) 8192 .m(2) 8193 .n(n) 8194 .k(k) 8195 .cn_stride(19) 8196 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8197 } 8198 } 8199 } 8200 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,n_gt_16_subtile)8201 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_gt_16_subtile) { 8202 TEST_REQUIRES_ARM_NEON; 8203 for (uint32_t n = 17; n < 32; n++) { 8204 for (size_t k = 1; k <= 40; k += 9) { 8205 for (uint32_t m = 1; m <= 2; m++) { 8206 GemmMicrokernelTester() 8207 .mr(2) 8208 .nr(16) 8209 .kr(2) 8210 .sr(1) 8211 .m(m) 8212 .n(n) 8213 .k(k) 8214 .iterations(1) 8215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8216 } 8217 } 8218 } 8219 } 8220 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,n_div_16)8221 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_div_16) { 8222 TEST_REQUIRES_ARM_NEON; 8223 for (uint32_t n = 32; n <= 48; n += 16) { 8224 for (size_t k = 1; k <= 40; k += 9) { 8225 GemmMicrokernelTester() 8226 .mr(2) 8227 .nr(16) 8228 .kr(2) 8229 .sr(1) 8230 .m(2) 8231 .n(n) 8232 .k(k) 8233 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8234 } 8235 } 8236 } 8237 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,n_div_16_strided_cn)8238 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) { 8239 TEST_REQUIRES_ARM_NEON; 8240 for (uint32_t n = 32; n <= 48; n += 16) { 8241 for (size_t k = 1; k <= 40; k += 9) { 8242 GemmMicrokernelTester() 8243 .mr(2) 8244 .nr(16) 8245 .kr(2) 8246 .sr(1) 8247 .m(2) 8248 .n(n) 8249 .k(k) 8250 .cn_stride(19) 8251 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8252 } 8253 } 8254 } 8255 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,n_div_16_subtile)8256 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_div_16_subtile) { 8257 TEST_REQUIRES_ARM_NEON; 8258 for (uint32_t n = 32; n <= 48; n += 16) { 8259 for (size_t k = 1; k <= 40; k += 9) { 8260 for (uint32_t m = 1; m <= 2; m++) { 8261 GemmMicrokernelTester() 8262 .mr(2) 8263 .nr(16) 8264 .kr(2) 8265 .sr(1) 8266 .m(m) 8267 .n(n) 8268 .k(k) 8269 .iterations(1) 8270 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8271 } 8272 } 8273 } 8274 } 8275 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,small_kernel)8276 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, small_kernel) { 8277 TEST_REQUIRES_ARM_NEON; 8278 for (size_t k = 1; k <= 40; k += 9) { 8279 GemmMicrokernelTester() 8280 .mr(2) 8281 .nr(16) 8282 .kr(2) 8283 .sr(1) 8284 .m(2) 8285 .n(16) 8286 .k(k) 8287 .ks(3) 8288 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8289 } 8290 } 8291 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,small_kernel_subtile)8292 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, small_kernel_subtile) { 8293 TEST_REQUIRES_ARM_NEON; 8294 for (size_t k = 1; k <= 40; k += 9) { 8295 for (uint32_t n = 1; n <= 16; n++) { 8296 for (uint32_t m = 1; m <= 2; m++) { 8297 GemmMicrokernelTester() 8298 .mr(2) 8299 .nr(16) 8300 .kr(2) 8301 .sr(1) 8302 .m(m) 8303 .n(n) 8304 .k(k) 8305 .ks(3) 8306 .iterations(1) 8307 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8308 } 8309 } 8310 } 8311 } 8312 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,n_gt_16_small_kernel)8313 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_gt_16_small_kernel) { 8314 TEST_REQUIRES_ARM_NEON; 8315 for (uint32_t n = 17; n < 32; n++) { 8316 for (size_t k = 1; k <= 40; k += 9) { 8317 GemmMicrokernelTester() 8318 .mr(2) 8319 .nr(16) 8320 .kr(2) 8321 .sr(1) 8322 .m(2) 8323 .n(n) 8324 .k(k) 8325 .ks(3) 8326 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8327 } 8328 } 8329 } 8330 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,n_div_16_small_kernel)8331 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, n_div_16_small_kernel) { 8332 TEST_REQUIRES_ARM_NEON; 8333 for (uint32_t n = 32; n <= 48; n += 16) { 8334 for (size_t k = 1; k <= 40; k += 9) { 8335 GemmMicrokernelTester() 8336 .mr(2) 8337 .nr(16) 8338 .kr(2) 8339 .sr(1) 8340 .m(2) 8341 .n(n) 8342 .k(k) 8343 .ks(3) 8344 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8345 } 8346 } 8347 } 8348 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,strided_cm_subtile)8349 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, strided_cm_subtile) { 8350 TEST_REQUIRES_ARM_NEON; 8351 for (size_t k = 1; k <= 40; k += 9) { 8352 for (uint32_t n = 1; n <= 16; n++) { 8353 for (uint32_t m = 1; m <= 2; m++) { 8354 GemmMicrokernelTester() 8355 .mr(2) 8356 .nr(16) 8357 .kr(2) 8358 .sr(1) 8359 .m(m) 8360 .n(n) 8361 .k(k) 8362 .cm_stride(19) 8363 .iterations(1) 8364 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8365 } 8366 } 8367 } 8368 } 8369 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,a_offset)8370 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, a_offset) { 8371 TEST_REQUIRES_ARM_NEON; 8372 for (size_t k = 1; k <= 40; k += 9) { 8373 GemmMicrokernelTester() 8374 .mr(2) 8375 .nr(16) 8376 .kr(2) 8377 .sr(1) 8378 .m(2) 8379 .n(16) 8380 .k(k) 8381 .ks(3) 8382 .a_offset(83) 8383 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8384 } 8385 } 8386 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,zero)8387 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, zero) { 8388 TEST_REQUIRES_ARM_NEON; 8389 for (size_t k = 1; k <= 40; k += 9) { 8390 for (uint32_t mz = 0; mz < 2; mz++) { 8391 GemmMicrokernelTester() 8392 .mr(2) 8393 .nr(16) 8394 .kr(2) 8395 .sr(1) 8396 .m(2) 8397 .n(16) 8398 .k(k) 8399 .ks(3) 8400 .a_offset(83) 8401 .zero_index(mz) 8402 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8403 } 8404 } 8405 } 8406 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,qmin)8407 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, qmin) { 8408 TEST_REQUIRES_ARM_NEON; 8409 GemmMicrokernelTester() 8410 .mr(2) 8411 .nr(16) 8412 .kr(2) 8413 .sr(1) 8414 .m(2) 8415 .n(16) 8416 .k(8) 8417 .qmin(128) 8418 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8419 } 8420 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,qmax)8421 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, qmax) { 8422 TEST_REQUIRES_ARM_NEON; 8423 GemmMicrokernelTester() 8424 .mr(2) 8425 .nr(16) 8426 .kr(2) 8427 .sr(1) 8428 .m(2) 8429 .n(16) 8430 .k(8) 8431 .qmax(128) 8432 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8433 } 8434 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R,strided_cm)8435 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD2R, strided_cm) { 8436 TEST_REQUIRES_ARM_NEON; 8437 GemmMicrokernelTester() 8438 .mr(2) 8439 .nr(16) 8440 .kr(2) 8441 .sr(1) 8442 .m(2) 8443 .n(16) 8444 .k(8) 8445 .cm_stride(19) 8446 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8447 } 8448 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8449 8450 8451 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_eq_8)8452 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_eq_8) { 8453 TEST_REQUIRES_ARM_NEON; 8454 GemmMicrokernelTester() 8455 .mr(2) 8456 .nr(16) 8457 .kr(2) 8458 .sr(4) 8459 .m(2) 8460 .n(16) 8461 .k(8) 8462 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8463 } 8464 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,strided_cn)8465 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, strided_cn) { 8466 TEST_REQUIRES_ARM_NEON; 8467 GemmMicrokernelTester() 8468 .mr(2) 8469 .nr(16) 8470 .kr(2) 8471 .sr(4) 8472 .m(2) 8473 .n(16) 8474 .k(8) 8475 .cn_stride(19) 8476 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8477 } 8478 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_eq_8_subtile)8479 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_eq_8_subtile) { 8480 TEST_REQUIRES_ARM_NEON; 8481 for (uint32_t n = 1; n <= 16; n++) { 8482 for (uint32_t m = 1; m <= 2; m++) { 8483 GemmMicrokernelTester() 8484 .mr(2) 8485 .nr(16) 8486 .kr(2) 8487 .sr(4) 8488 .m(m) 8489 .n(n) 8490 .k(8) 8491 .iterations(1) 8492 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8493 } 8494 } 8495 } 8496 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_eq_8_subtile_m)8497 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_eq_8_subtile_m) { 8498 TEST_REQUIRES_ARM_NEON; 8499 for (uint32_t m = 1; m <= 2; m++) { 8500 GemmMicrokernelTester() 8501 .mr(2) 8502 .nr(16) 8503 .kr(2) 8504 .sr(4) 8505 .m(m) 8506 .n(16) 8507 .k(8) 8508 .iterations(1) 8509 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8510 } 8511 } 8512 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_eq_8_subtile_n)8513 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_eq_8_subtile_n) { 8514 TEST_REQUIRES_ARM_NEON; 8515 for (uint32_t n = 1; n <= 16; n++) { 8516 GemmMicrokernelTester() 8517 .mr(2) 8518 .nr(16) 8519 .kr(2) 8520 .sr(4) 8521 .m(2) 8522 .n(n) 8523 .k(8) 8524 .iterations(1) 8525 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8526 } 8527 } 8528 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_lt_8)8529 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_lt_8) { 8530 TEST_REQUIRES_ARM_NEON; 8531 for (size_t k = 1; k < 8; k++) { 8532 GemmMicrokernelTester() 8533 .mr(2) 8534 .nr(16) 8535 .kr(2) 8536 .sr(4) 8537 .m(2) 8538 .n(16) 8539 .k(k) 8540 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8541 } 8542 } 8543 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_lt_8_subtile)8544 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_lt_8_subtile) { 8545 TEST_REQUIRES_ARM_NEON; 8546 for (size_t k = 1; k < 8; k++) { 8547 for (uint32_t n = 1; n <= 16; n++) { 8548 for (uint32_t m = 1; m <= 2; m++) { 8549 GemmMicrokernelTester() 8550 .mr(2) 8551 .nr(16) 8552 .kr(2) 8553 .sr(4) 8554 .m(m) 8555 .n(n) 8556 .k(k) 8557 .iterations(1) 8558 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8559 } 8560 } 8561 } 8562 } 8563 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_gt_8)8564 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_gt_8) { 8565 TEST_REQUIRES_ARM_NEON; 8566 for (size_t k = 9; k < 16; k++) { 8567 GemmMicrokernelTester() 8568 .mr(2) 8569 .nr(16) 8570 .kr(2) 8571 .sr(4) 8572 .m(2) 8573 .n(16) 8574 .k(k) 8575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8576 } 8577 } 8578 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_gt_8_subtile)8579 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_gt_8_subtile) { 8580 TEST_REQUIRES_ARM_NEON; 8581 for (size_t k = 9; k < 16; k++) { 8582 for (uint32_t n = 1; n <= 16; n++) { 8583 for (uint32_t m = 1; m <= 2; m++) { 8584 GemmMicrokernelTester() 8585 .mr(2) 8586 .nr(16) 8587 .kr(2) 8588 .sr(4) 8589 .m(m) 8590 .n(n) 8591 .k(k) 8592 .iterations(1) 8593 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8594 } 8595 } 8596 } 8597 } 8598 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_div_8)8599 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_div_8) { 8600 TEST_REQUIRES_ARM_NEON; 8601 for (size_t k = 16; k <= 80; k += 8) { 8602 GemmMicrokernelTester() 8603 .mr(2) 8604 .nr(16) 8605 .kr(2) 8606 .sr(4) 8607 .m(2) 8608 .n(16) 8609 .k(k) 8610 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8611 } 8612 } 8613 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,k_div_8_subtile)8614 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, k_div_8_subtile) { 8615 TEST_REQUIRES_ARM_NEON; 8616 for (size_t k = 16; k <= 80; k += 8) { 8617 for (uint32_t n = 1; n <= 16; n++) { 8618 for (uint32_t m = 1; m <= 2; m++) { 8619 GemmMicrokernelTester() 8620 .mr(2) 8621 .nr(16) 8622 .kr(2) 8623 .sr(4) 8624 .m(m) 8625 .n(n) 8626 .k(k) 8627 .iterations(1) 8628 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8629 } 8630 } 8631 } 8632 } 8633 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,n_gt_16)8634 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_gt_16) { 8635 TEST_REQUIRES_ARM_NEON; 8636 for (uint32_t n = 17; n < 32; n++) { 8637 for (size_t k = 1; k <= 40; k += 9) { 8638 GemmMicrokernelTester() 8639 .mr(2) 8640 .nr(16) 8641 .kr(2) 8642 .sr(4) 8643 .m(2) 8644 .n(n) 8645 .k(k) 8646 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8647 } 8648 } 8649 } 8650 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,n_gt_16_strided_cn)8651 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_gt_16_strided_cn) { 8652 TEST_REQUIRES_ARM_NEON; 8653 for (uint32_t n = 17; n < 32; n++) { 8654 for (size_t k = 1; k <= 40; k += 9) { 8655 GemmMicrokernelTester() 8656 .mr(2) 8657 .nr(16) 8658 .kr(2) 8659 .sr(4) 8660 .m(2) 8661 .n(n) 8662 .k(k) 8663 .cn_stride(19) 8664 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8665 } 8666 } 8667 } 8668 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,n_gt_16_subtile)8669 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_gt_16_subtile) { 8670 TEST_REQUIRES_ARM_NEON; 8671 for (uint32_t n = 17; n < 32; n++) { 8672 for (size_t k = 1; k <= 40; k += 9) { 8673 for (uint32_t m = 1; m <= 2; m++) { 8674 GemmMicrokernelTester() 8675 .mr(2) 8676 .nr(16) 8677 .kr(2) 8678 .sr(4) 8679 .m(m) 8680 .n(n) 8681 .k(k) 8682 .iterations(1) 8683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8684 } 8685 } 8686 } 8687 } 8688 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,n_div_16)8689 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_div_16) { 8690 TEST_REQUIRES_ARM_NEON; 8691 for (uint32_t n = 32; n <= 48; n += 16) { 8692 for (size_t k = 1; k <= 40; k += 9) { 8693 GemmMicrokernelTester() 8694 .mr(2) 8695 .nr(16) 8696 .kr(2) 8697 .sr(4) 8698 .m(2) 8699 .n(n) 8700 .k(k) 8701 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8702 } 8703 } 8704 } 8705 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,n_div_16_strided_cn)8706 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_div_16_strided_cn) { 8707 TEST_REQUIRES_ARM_NEON; 8708 for (uint32_t n = 32; n <= 48; n += 16) { 8709 for (size_t k = 1; k <= 40; k += 9) { 8710 GemmMicrokernelTester() 8711 .mr(2) 8712 .nr(16) 8713 .kr(2) 8714 .sr(4) 8715 .m(2) 8716 .n(n) 8717 .k(k) 8718 .cn_stride(19) 8719 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8720 } 8721 } 8722 } 8723 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,n_div_16_subtile)8724 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_div_16_subtile) { 8725 TEST_REQUIRES_ARM_NEON; 8726 for (uint32_t n = 32; n <= 48; n += 16) { 8727 for (size_t k = 1; k <= 40; k += 9) { 8728 for (uint32_t m = 1; m <= 2; m++) { 8729 GemmMicrokernelTester() 8730 .mr(2) 8731 .nr(16) 8732 .kr(2) 8733 .sr(4) 8734 .m(m) 8735 .n(n) 8736 .k(k) 8737 .iterations(1) 8738 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8739 } 8740 } 8741 } 8742 } 8743 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,small_kernel)8744 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, small_kernel) { 8745 TEST_REQUIRES_ARM_NEON; 8746 for (size_t k = 1; k <= 40; k += 9) { 8747 GemmMicrokernelTester() 8748 .mr(2) 8749 .nr(16) 8750 .kr(2) 8751 .sr(4) 8752 .m(2) 8753 .n(16) 8754 .k(k) 8755 .ks(3) 8756 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8757 } 8758 } 8759 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,small_kernel_subtile)8760 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, small_kernel_subtile) { 8761 TEST_REQUIRES_ARM_NEON; 8762 for (size_t k = 1; k <= 40; k += 9) { 8763 for (uint32_t n = 1; n <= 16; n++) { 8764 for (uint32_t m = 1; m <= 2; m++) { 8765 GemmMicrokernelTester() 8766 .mr(2) 8767 .nr(16) 8768 .kr(2) 8769 .sr(4) 8770 .m(m) 8771 .n(n) 8772 .k(k) 8773 .ks(3) 8774 .iterations(1) 8775 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8776 } 8777 } 8778 } 8779 } 8780 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,n_gt_16_small_kernel)8781 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_gt_16_small_kernel) { 8782 TEST_REQUIRES_ARM_NEON; 8783 for (uint32_t n = 17; n < 32; n++) { 8784 for (size_t k = 1; k <= 40; k += 9) { 8785 GemmMicrokernelTester() 8786 .mr(2) 8787 .nr(16) 8788 .kr(2) 8789 .sr(4) 8790 .m(2) 8791 .n(n) 8792 .k(k) 8793 .ks(3) 8794 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8795 } 8796 } 8797 } 8798 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,n_div_16_small_kernel)8799 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, n_div_16_small_kernel) { 8800 TEST_REQUIRES_ARM_NEON; 8801 for (uint32_t n = 32; n <= 48; n += 16) { 8802 for (size_t k = 1; k <= 40; k += 9) { 8803 GemmMicrokernelTester() 8804 .mr(2) 8805 .nr(16) 8806 .kr(2) 8807 .sr(4) 8808 .m(2) 8809 .n(n) 8810 .k(k) 8811 .ks(3) 8812 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8813 } 8814 } 8815 } 8816 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,strided_cm_subtile)8817 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, strided_cm_subtile) { 8818 TEST_REQUIRES_ARM_NEON; 8819 for (size_t k = 1; k <= 40; k += 9) { 8820 for (uint32_t n = 1; n <= 16; n++) { 8821 for (uint32_t m = 1; m <= 2; m++) { 8822 GemmMicrokernelTester() 8823 .mr(2) 8824 .nr(16) 8825 .kr(2) 8826 .sr(4) 8827 .m(m) 8828 .n(n) 8829 .k(k) 8830 .cm_stride(19) 8831 .iterations(1) 8832 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8833 } 8834 } 8835 } 8836 } 8837 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,a_offset)8838 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, a_offset) { 8839 TEST_REQUIRES_ARM_NEON; 8840 for (size_t k = 1; k <= 40; k += 9) { 8841 GemmMicrokernelTester() 8842 .mr(2) 8843 .nr(16) 8844 .kr(2) 8845 .sr(4) 8846 .m(2) 8847 .n(16) 8848 .k(k) 8849 .ks(3) 8850 .a_offset(83) 8851 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8852 } 8853 } 8854 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,zero)8855 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, zero) { 8856 TEST_REQUIRES_ARM_NEON; 8857 for (size_t k = 1; k <= 40; k += 9) { 8858 for (uint32_t mz = 0; mz < 2; mz++) { 8859 GemmMicrokernelTester() 8860 .mr(2) 8861 .nr(16) 8862 .kr(2) 8863 .sr(4) 8864 .m(2) 8865 .n(16) 8866 .k(k) 8867 .ks(3) 8868 .a_offset(83) 8869 .zero_index(mz) 8870 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8871 } 8872 } 8873 } 8874 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,qmin)8875 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, qmin) { 8876 TEST_REQUIRES_ARM_NEON; 8877 GemmMicrokernelTester() 8878 .mr(2) 8879 .nr(16) 8880 .kr(2) 8881 .sr(4) 8882 .m(2) 8883 .n(16) 8884 .k(8) 8885 .qmin(128) 8886 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8887 } 8888 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,qmax)8889 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, qmax) { 8890 TEST_REQUIRES_ARM_NEON; 8891 GemmMicrokernelTester() 8892 .mr(2) 8893 .nr(16) 8894 .kr(2) 8895 .sr(4) 8896 .m(2) 8897 .n(16) 8898 .k(8) 8899 .qmax(128) 8900 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8901 } 8902 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL,strided_cm)8903 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MULL, strided_cm) { 8904 TEST_REQUIRES_ARM_NEON; 8905 GemmMicrokernelTester() 8906 .mr(2) 8907 .nr(16) 8908 .kr(2) 8909 .sr(4) 8910 .m(2) 8911 .n(16) 8912 .k(8) 8913 .cm_stride(19) 8914 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8915 } 8916 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 8917 8918 8919 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_eq_16)8920 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16) { 8921 TEST_REQUIRES_ARM_NEON; 8922 GemmMicrokernelTester() 8923 .mr(2) 8924 .nr(16) 8925 .kr(4) 8926 .sr(1) 8927 .m(2) 8928 .n(16) 8929 .k(16) 8930 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8931 } 8932 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,strided_cn)8933 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cn) { 8934 TEST_REQUIRES_ARM_NEON; 8935 GemmMicrokernelTester() 8936 .mr(2) 8937 .nr(16) 8938 .kr(4) 8939 .sr(1) 8940 .m(2) 8941 .n(16) 8942 .k(16) 8943 .cn_stride(19) 8944 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8945 } 8946 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_eq_16_subtile)8947 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 8948 TEST_REQUIRES_ARM_NEON; 8949 for (uint32_t n = 1; n <= 16; n++) { 8950 for (uint32_t m = 1; m <= 2; m++) { 8951 GemmMicrokernelTester() 8952 .mr(2) 8953 .nr(16) 8954 .kr(4) 8955 .sr(1) 8956 .m(m) 8957 .n(n) 8958 .k(16) 8959 .iterations(1) 8960 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8961 } 8962 } 8963 } 8964 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)8965 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 8966 TEST_REQUIRES_ARM_NEON; 8967 for (uint32_t m = 1; m <= 2; m++) { 8968 GemmMicrokernelTester() 8969 .mr(2) 8970 .nr(16) 8971 .kr(4) 8972 .sr(1) 8973 .m(m) 8974 .n(16) 8975 .k(16) 8976 .iterations(1) 8977 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8978 } 8979 } 8980 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)8981 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 8982 TEST_REQUIRES_ARM_NEON; 8983 for (uint32_t n = 1; n <= 16; n++) { 8984 GemmMicrokernelTester() 8985 .mr(2) 8986 .nr(16) 8987 .kr(4) 8988 .sr(1) 8989 .m(2) 8990 .n(n) 8991 .k(16) 8992 .iterations(1) 8993 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 8994 } 8995 } 8996 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_lt_16)8997 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_lt_16) { 8998 TEST_REQUIRES_ARM_NEON; 8999 for (size_t k = 1; k < 16; k++) { 9000 GemmMicrokernelTester() 9001 .mr(2) 9002 .nr(16) 9003 .kr(4) 9004 .sr(1) 9005 .m(2) 9006 .n(16) 9007 .k(k) 9008 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9009 } 9010 } 9011 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_lt_16_subtile)9012 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 9013 TEST_REQUIRES_ARM_NEON; 9014 for (size_t k = 1; k < 16; k++) { 9015 for (uint32_t n = 1; n <= 16; n++) { 9016 for (uint32_t m = 1; m <= 2; m++) { 9017 GemmMicrokernelTester() 9018 .mr(2) 9019 .nr(16) 9020 .kr(4) 9021 .sr(1) 9022 .m(m) 9023 .n(n) 9024 .k(k) 9025 .iterations(1) 9026 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9027 } 9028 } 9029 } 9030 } 9031 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_gt_16)9032 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_gt_16) { 9033 TEST_REQUIRES_ARM_NEON; 9034 for (size_t k = 17; k < 32; k++) { 9035 GemmMicrokernelTester() 9036 .mr(2) 9037 .nr(16) 9038 .kr(4) 9039 .sr(1) 9040 .m(2) 9041 .n(16) 9042 .k(k) 9043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9044 } 9045 } 9046 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_gt_16_subtile)9047 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 9048 TEST_REQUIRES_ARM_NEON; 9049 for (size_t k = 17; k < 32; k++) { 9050 for (uint32_t n = 1; n <= 16; n++) { 9051 for (uint32_t m = 1; m <= 2; m++) { 9052 GemmMicrokernelTester() 9053 .mr(2) 9054 .nr(16) 9055 .kr(4) 9056 .sr(1) 9057 .m(m) 9058 .n(n) 9059 .k(k) 9060 .iterations(1) 9061 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9062 } 9063 } 9064 } 9065 } 9066 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_div_16)9067 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_div_16) { 9068 TEST_REQUIRES_ARM_NEON; 9069 for (size_t k = 32; k <= 160; k += 16) { 9070 GemmMicrokernelTester() 9071 .mr(2) 9072 .nr(16) 9073 .kr(4) 9074 .sr(1) 9075 .m(2) 9076 .n(16) 9077 .k(k) 9078 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9079 } 9080 } 9081 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,k_div_16_subtile)9082 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_div_16_subtile) { 9083 TEST_REQUIRES_ARM_NEON; 9084 for (size_t k = 32; k <= 160; k += 16) { 9085 for (uint32_t n = 1; n <= 16; n++) { 9086 for (uint32_t m = 1; m <= 2; m++) { 9087 GemmMicrokernelTester() 9088 .mr(2) 9089 .nr(16) 9090 .kr(4) 9091 .sr(1) 9092 .m(m) 9093 .n(n) 9094 .k(k) 9095 .iterations(1) 9096 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9097 } 9098 } 9099 } 9100 } 9101 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,n_gt_16)9102 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16) { 9103 TEST_REQUIRES_ARM_NEON; 9104 for (uint32_t n = 17; n < 32; n++) { 9105 for (size_t k = 1; k <= 80; k += 17) { 9106 GemmMicrokernelTester() 9107 .mr(2) 9108 .nr(16) 9109 .kr(4) 9110 .sr(1) 9111 .m(2) 9112 .n(n) 9113 .k(k) 9114 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9115 } 9116 } 9117 } 9118 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,n_gt_16_strided_cn)9119 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 9120 TEST_REQUIRES_ARM_NEON; 9121 for (uint32_t n = 17; n < 32; n++) { 9122 for (size_t k = 1; k <= 80; k += 17) { 9123 GemmMicrokernelTester() 9124 .mr(2) 9125 .nr(16) 9126 .kr(4) 9127 .sr(1) 9128 .m(2) 9129 .n(n) 9130 .k(k) 9131 .cn_stride(19) 9132 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9133 } 9134 } 9135 } 9136 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,n_gt_16_subtile)9137 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) { 9138 TEST_REQUIRES_ARM_NEON; 9139 for (uint32_t n = 17; n < 32; n++) { 9140 for (size_t k = 1; k <= 80; k += 17) { 9141 for (uint32_t m = 1; m <= 2; m++) { 9142 GemmMicrokernelTester() 9143 .mr(2) 9144 .nr(16) 9145 .kr(4) 9146 .sr(1) 9147 .m(m) 9148 .n(n) 9149 .k(k) 9150 .iterations(1) 9151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9152 } 9153 } 9154 } 9155 } 9156 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,n_div_16)9157 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16) { 9158 TEST_REQUIRES_ARM_NEON; 9159 for (uint32_t n = 32; n <= 48; n += 16) { 9160 for (size_t k = 1; k <= 80; k += 17) { 9161 GemmMicrokernelTester() 9162 .mr(2) 9163 .nr(16) 9164 .kr(4) 9165 .sr(1) 9166 .m(2) 9167 .n(n) 9168 .k(k) 9169 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9170 } 9171 } 9172 } 9173 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,n_div_16_strided_cn)9174 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) { 9175 TEST_REQUIRES_ARM_NEON; 9176 for (uint32_t n = 32; n <= 48; n += 16) { 9177 for (size_t k = 1; k <= 80; k += 17) { 9178 GemmMicrokernelTester() 9179 .mr(2) 9180 .nr(16) 9181 .kr(4) 9182 .sr(1) 9183 .m(2) 9184 .n(n) 9185 .k(k) 9186 .cn_stride(19) 9187 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9188 } 9189 } 9190 } 9191 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,n_div_16_subtile)9192 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_subtile) { 9193 TEST_REQUIRES_ARM_NEON; 9194 for (uint32_t n = 32; n <= 48; n += 16) { 9195 for (size_t k = 1; k <= 80; k += 17) { 9196 for (uint32_t m = 1; m <= 2; m++) { 9197 GemmMicrokernelTester() 9198 .mr(2) 9199 .nr(16) 9200 .kr(4) 9201 .sr(1) 9202 .m(m) 9203 .n(n) 9204 .k(k) 9205 .iterations(1) 9206 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9207 } 9208 } 9209 } 9210 } 9211 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,small_kernel)9212 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, small_kernel) { 9213 TEST_REQUIRES_ARM_NEON; 9214 for (size_t k = 1; k <= 80; k += 17) { 9215 GemmMicrokernelTester() 9216 .mr(2) 9217 .nr(16) 9218 .kr(4) 9219 .sr(1) 9220 .m(2) 9221 .n(16) 9222 .k(k) 9223 .ks(3) 9224 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9225 } 9226 } 9227 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,small_kernel_subtile)9228 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, small_kernel_subtile) { 9229 TEST_REQUIRES_ARM_NEON; 9230 for (size_t k = 1; k <= 80; k += 17) { 9231 for (uint32_t n = 1; n <= 16; n++) { 9232 for (uint32_t m = 1; m <= 2; m++) { 9233 GemmMicrokernelTester() 9234 .mr(2) 9235 .nr(16) 9236 .kr(4) 9237 .sr(1) 9238 .m(m) 9239 .n(n) 9240 .k(k) 9241 .ks(3) 9242 .iterations(1) 9243 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9244 } 9245 } 9246 } 9247 } 9248 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,n_gt_16_small_kernel)9249 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_small_kernel) { 9250 TEST_REQUIRES_ARM_NEON; 9251 for (uint32_t n = 17; n < 32; n++) { 9252 for (size_t k = 1; k <= 80; k += 17) { 9253 GemmMicrokernelTester() 9254 .mr(2) 9255 .nr(16) 9256 .kr(4) 9257 .sr(1) 9258 .m(2) 9259 .n(n) 9260 .k(k) 9261 .ks(3) 9262 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9263 } 9264 } 9265 } 9266 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,n_div_16_small_kernel)9267 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_small_kernel) { 9268 TEST_REQUIRES_ARM_NEON; 9269 for (uint32_t n = 32; n <= 48; n += 16) { 9270 for (size_t k = 1; k <= 80; k += 17) { 9271 GemmMicrokernelTester() 9272 .mr(2) 9273 .nr(16) 9274 .kr(4) 9275 .sr(1) 9276 .m(2) 9277 .n(n) 9278 .k(k) 9279 .ks(3) 9280 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9281 } 9282 } 9283 } 9284 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,strided_cm_subtile)9285 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cm_subtile) { 9286 TEST_REQUIRES_ARM_NEON; 9287 for (size_t k = 1; k <= 80; k += 17) { 9288 for (uint32_t n = 1; n <= 16; n++) { 9289 for (uint32_t m = 1; m <= 2; m++) { 9290 GemmMicrokernelTester() 9291 .mr(2) 9292 .nr(16) 9293 .kr(4) 9294 .sr(1) 9295 .m(m) 9296 .n(n) 9297 .k(k) 9298 .cm_stride(19) 9299 .iterations(1) 9300 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9301 } 9302 } 9303 } 9304 } 9305 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,a_offset)9306 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, a_offset) { 9307 TEST_REQUIRES_ARM_NEON; 9308 for (size_t k = 1; k <= 80; k += 17) { 9309 GemmMicrokernelTester() 9310 .mr(2) 9311 .nr(16) 9312 .kr(4) 9313 .sr(1) 9314 .m(2) 9315 .n(16) 9316 .k(k) 9317 .ks(3) 9318 .a_offset(163) 9319 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9320 } 9321 } 9322 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,zero)9323 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, zero) { 9324 TEST_REQUIRES_ARM_NEON; 9325 for (size_t k = 1; k <= 80; k += 17) { 9326 for (uint32_t mz = 0; mz < 2; mz++) { 9327 GemmMicrokernelTester() 9328 .mr(2) 9329 .nr(16) 9330 .kr(4) 9331 .sr(1) 9332 .m(2) 9333 .n(16) 9334 .k(k) 9335 .ks(3) 9336 .a_offset(163) 9337 .zero_index(mz) 9338 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9339 } 9340 } 9341 } 9342 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,qmin)9343 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, qmin) { 9344 TEST_REQUIRES_ARM_NEON; 9345 GemmMicrokernelTester() 9346 .mr(2) 9347 .nr(16) 9348 .kr(4) 9349 .sr(1) 9350 .m(2) 9351 .n(16) 9352 .k(16) 9353 .qmin(128) 9354 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9355 } 9356 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,qmax)9357 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, qmax) { 9358 TEST_REQUIRES_ARM_NEON; 9359 GemmMicrokernelTester() 9360 .mr(2) 9361 .nr(16) 9362 .kr(4) 9363 .sr(1) 9364 .m(2) 9365 .n(16) 9366 .k(16) 9367 .qmax(128) 9368 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9369 } 9370 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R,strided_cm)9371 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cm) { 9372 TEST_REQUIRES_ARM_NEON; 9373 GemmMicrokernelTester() 9374 .mr(2) 9375 .nr(16) 9376 .kr(4) 9377 .sr(1) 9378 .m(2) 9379 .n(16) 9380 .k(16) 9381 .cm_stride(19) 9382 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9383 } 9384 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 9385 9386 9387 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_eq_8)9388 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_eq_8) { 9389 TEST_REQUIRES_ARM_NEON; 9390 GemmMicrokernelTester() 9391 .mr(2) 9392 .nr(16) 9393 .kr(4) 9394 .sr(1) 9395 .m(2) 9396 .n(16) 9397 .k(8) 9398 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9399 } 9400 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,strided_cn)9401 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, strided_cn) { 9402 TEST_REQUIRES_ARM_NEON; 9403 GemmMicrokernelTester() 9404 .mr(2) 9405 .nr(16) 9406 .kr(4) 9407 .sr(1) 9408 .m(2) 9409 .n(16) 9410 .k(8) 9411 .cn_stride(19) 9412 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9413 } 9414 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_eq_8_subtile)9415 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_eq_8_subtile) { 9416 TEST_REQUIRES_ARM_NEON; 9417 for (uint32_t n = 1; n <= 16; n++) { 9418 for (uint32_t m = 1; m <= 2; m++) { 9419 GemmMicrokernelTester() 9420 .mr(2) 9421 .nr(16) 9422 .kr(4) 9423 .sr(1) 9424 .m(m) 9425 .n(n) 9426 .k(8) 9427 .iterations(1) 9428 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9429 } 9430 } 9431 } 9432 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_eq_8_subtile_m)9433 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 9434 TEST_REQUIRES_ARM_NEON; 9435 for (uint32_t m = 1; m <= 2; m++) { 9436 GemmMicrokernelTester() 9437 .mr(2) 9438 .nr(16) 9439 .kr(4) 9440 .sr(1) 9441 .m(m) 9442 .n(16) 9443 .k(8) 9444 .iterations(1) 9445 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9446 } 9447 } 9448 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_eq_8_subtile_n)9449 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 9450 TEST_REQUIRES_ARM_NEON; 9451 for (uint32_t n = 1; n <= 16; n++) { 9452 GemmMicrokernelTester() 9453 .mr(2) 9454 .nr(16) 9455 .kr(4) 9456 .sr(1) 9457 .m(2) 9458 .n(n) 9459 .k(8) 9460 .iterations(1) 9461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9462 } 9463 } 9464 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_lt_8)9465 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_lt_8) { 9466 TEST_REQUIRES_ARM_NEON; 9467 for (size_t k = 1; k < 8; k++) { 9468 GemmMicrokernelTester() 9469 .mr(2) 9470 .nr(16) 9471 .kr(4) 9472 .sr(1) 9473 .m(2) 9474 .n(16) 9475 .k(k) 9476 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9477 } 9478 } 9479 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_lt_8_subtile)9480 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_lt_8_subtile) { 9481 TEST_REQUIRES_ARM_NEON; 9482 for (size_t k = 1; k < 8; k++) { 9483 for (uint32_t n = 1; n <= 16; n++) { 9484 for (uint32_t m = 1; m <= 2; m++) { 9485 GemmMicrokernelTester() 9486 .mr(2) 9487 .nr(16) 9488 .kr(4) 9489 .sr(1) 9490 .m(m) 9491 .n(n) 9492 .k(k) 9493 .iterations(1) 9494 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9495 } 9496 } 9497 } 9498 } 9499 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_gt_8)9500 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_gt_8) { 9501 TEST_REQUIRES_ARM_NEON; 9502 for (size_t k = 9; k < 16; k++) { 9503 GemmMicrokernelTester() 9504 .mr(2) 9505 .nr(16) 9506 .kr(4) 9507 .sr(1) 9508 .m(2) 9509 .n(16) 9510 .k(k) 9511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9512 } 9513 } 9514 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_gt_8_subtile)9515 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_gt_8_subtile) { 9516 TEST_REQUIRES_ARM_NEON; 9517 for (size_t k = 9; k < 16; k++) { 9518 for (uint32_t n = 1; n <= 16; n++) { 9519 for (uint32_t m = 1; m <= 2; m++) { 9520 GemmMicrokernelTester() 9521 .mr(2) 9522 .nr(16) 9523 .kr(4) 9524 .sr(1) 9525 .m(m) 9526 .n(n) 9527 .k(k) 9528 .iterations(1) 9529 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9530 } 9531 } 9532 } 9533 } 9534 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_div_8)9535 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_div_8) { 9536 TEST_REQUIRES_ARM_NEON; 9537 for (size_t k = 16; k <= 80; k += 8) { 9538 GemmMicrokernelTester() 9539 .mr(2) 9540 .nr(16) 9541 .kr(4) 9542 .sr(1) 9543 .m(2) 9544 .n(16) 9545 .k(k) 9546 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9547 } 9548 } 9549 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,k_div_8_subtile)9550 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, k_div_8_subtile) { 9551 TEST_REQUIRES_ARM_NEON; 9552 for (size_t k = 16; k <= 80; k += 8) { 9553 for (uint32_t n = 1; n <= 16; n++) { 9554 for (uint32_t m = 1; m <= 2; m++) { 9555 GemmMicrokernelTester() 9556 .mr(2) 9557 .nr(16) 9558 .kr(4) 9559 .sr(1) 9560 .m(m) 9561 .n(n) 9562 .k(k) 9563 .iterations(1) 9564 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9565 } 9566 } 9567 } 9568 } 9569 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,n_gt_16)9570 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_gt_16) { 9571 TEST_REQUIRES_ARM_NEON; 9572 for (uint32_t n = 17; n < 32; n++) { 9573 for (size_t k = 1; k <= 40; k += 9) { 9574 GemmMicrokernelTester() 9575 .mr(2) 9576 .nr(16) 9577 .kr(4) 9578 .sr(1) 9579 .m(2) 9580 .n(n) 9581 .k(k) 9582 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9583 } 9584 } 9585 } 9586 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,n_gt_16_strided_cn)9587 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) { 9588 TEST_REQUIRES_ARM_NEON; 9589 for (uint32_t n = 17; n < 32; n++) { 9590 for (size_t k = 1; k <= 40; k += 9) { 9591 GemmMicrokernelTester() 9592 .mr(2) 9593 .nr(16) 9594 .kr(4) 9595 .sr(1) 9596 .m(2) 9597 .n(n) 9598 .k(k) 9599 .cn_stride(19) 9600 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9601 } 9602 } 9603 } 9604 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,n_gt_16_subtile)9605 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_gt_16_subtile) { 9606 TEST_REQUIRES_ARM_NEON; 9607 for (uint32_t n = 17; n < 32; n++) { 9608 for (size_t k = 1; k <= 40; k += 9) { 9609 for (uint32_t m = 1; m <= 2; m++) { 9610 GemmMicrokernelTester() 9611 .mr(2) 9612 .nr(16) 9613 .kr(4) 9614 .sr(1) 9615 .m(m) 9616 .n(n) 9617 .k(k) 9618 .iterations(1) 9619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9620 } 9621 } 9622 } 9623 } 9624 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,n_div_16)9625 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_div_16) { 9626 TEST_REQUIRES_ARM_NEON; 9627 for (uint32_t n = 32; n <= 48; n += 16) { 9628 for (size_t k = 1; k <= 40; k += 9) { 9629 GemmMicrokernelTester() 9630 .mr(2) 9631 .nr(16) 9632 .kr(4) 9633 .sr(1) 9634 .m(2) 9635 .n(n) 9636 .k(k) 9637 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9638 } 9639 } 9640 } 9641 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,n_div_16_strided_cn)9642 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) { 9643 TEST_REQUIRES_ARM_NEON; 9644 for (uint32_t n = 32; n <= 48; n += 16) { 9645 for (size_t k = 1; k <= 40; k += 9) { 9646 GemmMicrokernelTester() 9647 .mr(2) 9648 .nr(16) 9649 .kr(4) 9650 .sr(1) 9651 .m(2) 9652 .n(n) 9653 .k(k) 9654 .cn_stride(19) 9655 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9656 } 9657 } 9658 } 9659 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,n_div_16_subtile)9660 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_div_16_subtile) { 9661 TEST_REQUIRES_ARM_NEON; 9662 for (uint32_t n = 32; n <= 48; n += 16) { 9663 for (size_t k = 1; k <= 40; k += 9) { 9664 for (uint32_t m = 1; m <= 2; m++) { 9665 GemmMicrokernelTester() 9666 .mr(2) 9667 .nr(16) 9668 .kr(4) 9669 .sr(1) 9670 .m(m) 9671 .n(n) 9672 .k(k) 9673 .iterations(1) 9674 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9675 } 9676 } 9677 } 9678 } 9679 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,small_kernel)9680 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, small_kernel) { 9681 TEST_REQUIRES_ARM_NEON; 9682 for (size_t k = 1; k <= 40; k += 9) { 9683 GemmMicrokernelTester() 9684 .mr(2) 9685 .nr(16) 9686 .kr(4) 9687 .sr(1) 9688 .m(2) 9689 .n(16) 9690 .k(k) 9691 .ks(3) 9692 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9693 } 9694 } 9695 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,small_kernel_subtile)9696 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, small_kernel_subtile) { 9697 TEST_REQUIRES_ARM_NEON; 9698 for (size_t k = 1; k <= 40; k += 9) { 9699 for (uint32_t n = 1; n <= 16; n++) { 9700 for (uint32_t m = 1; m <= 2; m++) { 9701 GemmMicrokernelTester() 9702 .mr(2) 9703 .nr(16) 9704 .kr(4) 9705 .sr(1) 9706 .m(m) 9707 .n(n) 9708 .k(k) 9709 .ks(3) 9710 .iterations(1) 9711 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9712 } 9713 } 9714 } 9715 } 9716 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,n_gt_16_small_kernel)9717 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_gt_16_small_kernel) { 9718 TEST_REQUIRES_ARM_NEON; 9719 for (uint32_t n = 17; n < 32; n++) { 9720 for (size_t k = 1; k <= 40; k += 9) { 9721 GemmMicrokernelTester() 9722 .mr(2) 9723 .nr(16) 9724 .kr(4) 9725 .sr(1) 9726 .m(2) 9727 .n(n) 9728 .k(k) 9729 .ks(3) 9730 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9731 } 9732 } 9733 } 9734 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,n_div_16_small_kernel)9735 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, n_div_16_small_kernel) { 9736 TEST_REQUIRES_ARM_NEON; 9737 for (uint32_t n = 32; n <= 48; n += 16) { 9738 for (size_t k = 1; k <= 40; k += 9) { 9739 GemmMicrokernelTester() 9740 .mr(2) 9741 .nr(16) 9742 .kr(4) 9743 .sr(1) 9744 .m(2) 9745 .n(n) 9746 .k(k) 9747 .ks(3) 9748 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9749 } 9750 } 9751 } 9752 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,strided_cm_subtile)9753 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, strided_cm_subtile) { 9754 TEST_REQUIRES_ARM_NEON; 9755 for (size_t k = 1; k <= 40; k += 9) { 9756 for (uint32_t n = 1; n <= 16; n++) { 9757 for (uint32_t m = 1; m <= 2; m++) { 9758 GemmMicrokernelTester() 9759 .mr(2) 9760 .nr(16) 9761 .kr(4) 9762 .sr(1) 9763 .m(m) 9764 .n(n) 9765 .k(k) 9766 .cm_stride(19) 9767 .iterations(1) 9768 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9769 } 9770 } 9771 } 9772 } 9773 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,a_offset)9774 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, a_offset) { 9775 TEST_REQUIRES_ARM_NEON; 9776 for (size_t k = 1; k <= 40; k += 9) { 9777 GemmMicrokernelTester() 9778 .mr(2) 9779 .nr(16) 9780 .kr(4) 9781 .sr(1) 9782 .m(2) 9783 .n(16) 9784 .k(k) 9785 .ks(3) 9786 .a_offset(83) 9787 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9788 } 9789 } 9790 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,zero)9791 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, zero) { 9792 TEST_REQUIRES_ARM_NEON; 9793 for (size_t k = 1; k <= 40; k += 9) { 9794 for (uint32_t mz = 0; mz < 2; mz++) { 9795 GemmMicrokernelTester() 9796 .mr(2) 9797 .nr(16) 9798 .kr(4) 9799 .sr(1) 9800 .m(2) 9801 .n(16) 9802 .k(k) 9803 .ks(3) 9804 .a_offset(83) 9805 .zero_index(mz) 9806 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9807 } 9808 } 9809 } 9810 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,qmin)9811 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, qmin) { 9812 TEST_REQUIRES_ARM_NEON; 9813 GemmMicrokernelTester() 9814 .mr(2) 9815 .nr(16) 9816 .kr(4) 9817 .sr(1) 9818 .m(2) 9819 .n(16) 9820 .k(8) 9821 .qmin(128) 9822 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9823 } 9824 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,qmax)9825 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, qmax) { 9826 TEST_REQUIRES_ARM_NEON; 9827 GemmMicrokernelTester() 9828 .mr(2) 9829 .nr(16) 9830 .kr(4) 9831 .sr(1) 9832 .m(2) 9833 .n(16) 9834 .k(8) 9835 .qmax(128) 9836 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9837 } 9838 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R,strided_cm)9839 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MULL_LD2R, strided_cm) { 9840 TEST_REQUIRES_ARM_NEON; 9841 GemmMicrokernelTester() 9842 .mr(2) 9843 .nr(16) 9844 .kr(4) 9845 .sr(1) 9846 .m(2) 9847 .n(16) 9848 .k(8) 9849 .cm_stride(19) 9850 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9851 } 9852 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 9853 9854 9855 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_eq_16)9856 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_eq_16) { 9857 TEST_REQUIRES_ARM_NEON; 9858 GemmMicrokernelTester() 9859 .mr(2) 9860 .nr(16) 9861 .kr(4) 9862 .sr(2) 9863 .m(2) 9864 .n(16) 9865 .k(16) 9866 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9867 } 9868 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,strided_cn)9869 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, strided_cn) { 9870 TEST_REQUIRES_ARM_NEON; 9871 GemmMicrokernelTester() 9872 .mr(2) 9873 .nr(16) 9874 .kr(4) 9875 .sr(2) 9876 .m(2) 9877 .n(16) 9878 .k(16) 9879 .cn_stride(19) 9880 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9881 } 9882 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_eq_16_subtile)9883 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_eq_16_subtile) { 9884 TEST_REQUIRES_ARM_NEON; 9885 for (uint32_t n = 1; n <= 16; n++) { 9886 for (uint32_t m = 1; m <= 2; m++) { 9887 GemmMicrokernelTester() 9888 .mr(2) 9889 .nr(16) 9890 .kr(4) 9891 .sr(2) 9892 .m(m) 9893 .n(n) 9894 .k(16) 9895 .iterations(1) 9896 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9897 } 9898 } 9899 } 9900 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_eq_16_subtile_m)9901 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_eq_16_subtile_m) { 9902 TEST_REQUIRES_ARM_NEON; 9903 for (uint32_t m = 1; m <= 2; m++) { 9904 GemmMicrokernelTester() 9905 .mr(2) 9906 .nr(16) 9907 .kr(4) 9908 .sr(2) 9909 .m(m) 9910 .n(16) 9911 .k(16) 9912 .iterations(1) 9913 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9914 } 9915 } 9916 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_eq_16_subtile_n)9917 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_eq_16_subtile_n) { 9918 TEST_REQUIRES_ARM_NEON; 9919 for (uint32_t n = 1; n <= 16; n++) { 9920 GemmMicrokernelTester() 9921 .mr(2) 9922 .nr(16) 9923 .kr(4) 9924 .sr(2) 9925 .m(2) 9926 .n(n) 9927 .k(16) 9928 .iterations(1) 9929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9930 } 9931 } 9932 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_lt_16)9933 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_lt_16) { 9934 TEST_REQUIRES_ARM_NEON; 9935 for (size_t k = 1; k < 16; k++) { 9936 GemmMicrokernelTester() 9937 .mr(2) 9938 .nr(16) 9939 .kr(4) 9940 .sr(2) 9941 .m(2) 9942 .n(16) 9943 .k(k) 9944 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9945 } 9946 } 9947 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_lt_16_subtile)9948 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_lt_16_subtile) { 9949 TEST_REQUIRES_ARM_NEON; 9950 for (size_t k = 1; k < 16; k++) { 9951 for (uint32_t n = 1; n <= 16; n++) { 9952 for (uint32_t m = 1; m <= 2; m++) { 9953 GemmMicrokernelTester() 9954 .mr(2) 9955 .nr(16) 9956 .kr(4) 9957 .sr(2) 9958 .m(m) 9959 .n(n) 9960 .k(k) 9961 .iterations(1) 9962 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9963 } 9964 } 9965 } 9966 } 9967 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_gt_16)9968 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_gt_16) { 9969 TEST_REQUIRES_ARM_NEON; 9970 for (size_t k = 17; k < 32; k++) { 9971 GemmMicrokernelTester() 9972 .mr(2) 9973 .nr(16) 9974 .kr(4) 9975 .sr(2) 9976 .m(2) 9977 .n(16) 9978 .k(k) 9979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9980 } 9981 } 9982 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_gt_16_subtile)9983 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_gt_16_subtile) { 9984 TEST_REQUIRES_ARM_NEON; 9985 for (size_t k = 17; k < 32; k++) { 9986 for (uint32_t n = 1; n <= 16; n++) { 9987 for (uint32_t m = 1; m <= 2; m++) { 9988 GemmMicrokernelTester() 9989 .mr(2) 9990 .nr(16) 9991 .kr(4) 9992 .sr(2) 9993 .m(m) 9994 .n(n) 9995 .k(k) 9996 .iterations(1) 9997 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 9998 } 9999 } 10000 } 10001 } 10002 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_div_16)10003 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_div_16) { 10004 TEST_REQUIRES_ARM_NEON; 10005 for (size_t k = 32; k <= 160; k += 16) { 10006 GemmMicrokernelTester() 10007 .mr(2) 10008 .nr(16) 10009 .kr(4) 10010 .sr(2) 10011 .m(2) 10012 .n(16) 10013 .k(k) 10014 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10015 } 10016 } 10017 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,k_div_16_subtile)10018 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, k_div_16_subtile) { 10019 TEST_REQUIRES_ARM_NEON; 10020 for (size_t k = 32; k <= 160; k += 16) { 10021 for (uint32_t n = 1; n <= 16; n++) { 10022 for (uint32_t m = 1; m <= 2; m++) { 10023 GemmMicrokernelTester() 10024 .mr(2) 10025 .nr(16) 10026 .kr(4) 10027 .sr(2) 10028 .m(m) 10029 .n(n) 10030 .k(k) 10031 .iterations(1) 10032 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10033 } 10034 } 10035 } 10036 } 10037 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,n_gt_16)10038 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_gt_16) { 10039 TEST_REQUIRES_ARM_NEON; 10040 for (uint32_t n = 17; n < 32; n++) { 10041 for (size_t k = 1; k <= 80; k += 17) { 10042 GemmMicrokernelTester() 10043 .mr(2) 10044 .nr(16) 10045 .kr(4) 10046 .sr(2) 10047 .m(2) 10048 .n(n) 10049 .k(k) 10050 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10051 } 10052 } 10053 } 10054 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,n_gt_16_strided_cn)10055 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_gt_16_strided_cn) { 10056 TEST_REQUIRES_ARM_NEON; 10057 for (uint32_t n = 17; n < 32; n++) { 10058 for (size_t k = 1; k <= 80; k += 17) { 10059 GemmMicrokernelTester() 10060 .mr(2) 10061 .nr(16) 10062 .kr(4) 10063 .sr(2) 10064 .m(2) 10065 .n(n) 10066 .k(k) 10067 .cn_stride(19) 10068 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10069 } 10070 } 10071 } 10072 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,n_gt_16_subtile)10073 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_gt_16_subtile) { 10074 TEST_REQUIRES_ARM_NEON; 10075 for (uint32_t n = 17; n < 32; n++) { 10076 for (size_t k = 1; k <= 80; k += 17) { 10077 for (uint32_t m = 1; m <= 2; m++) { 10078 GemmMicrokernelTester() 10079 .mr(2) 10080 .nr(16) 10081 .kr(4) 10082 .sr(2) 10083 .m(m) 10084 .n(n) 10085 .k(k) 10086 .iterations(1) 10087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10088 } 10089 } 10090 } 10091 } 10092 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,n_div_16)10093 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_div_16) { 10094 TEST_REQUIRES_ARM_NEON; 10095 for (uint32_t n = 32; n <= 48; n += 16) { 10096 for (size_t k = 1; k <= 80; k += 17) { 10097 GemmMicrokernelTester() 10098 .mr(2) 10099 .nr(16) 10100 .kr(4) 10101 .sr(2) 10102 .m(2) 10103 .n(n) 10104 .k(k) 10105 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10106 } 10107 } 10108 } 10109 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,n_div_16_strided_cn)10110 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_div_16_strided_cn) { 10111 TEST_REQUIRES_ARM_NEON; 10112 for (uint32_t n = 32; n <= 48; n += 16) { 10113 for (size_t k = 1; k <= 80; k += 17) { 10114 GemmMicrokernelTester() 10115 .mr(2) 10116 .nr(16) 10117 .kr(4) 10118 .sr(2) 10119 .m(2) 10120 .n(n) 10121 .k(k) 10122 .cn_stride(19) 10123 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10124 } 10125 } 10126 } 10127 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,n_div_16_subtile)10128 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_div_16_subtile) { 10129 TEST_REQUIRES_ARM_NEON; 10130 for (uint32_t n = 32; n <= 48; n += 16) { 10131 for (size_t k = 1; k <= 80; k += 17) { 10132 for (uint32_t m = 1; m <= 2; m++) { 10133 GemmMicrokernelTester() 10134 .mr(2) 10135 .nr(16) 10136 .kr(4) 10137 .sr(2) 10138 .m(m) 10139 .n(n) 10140 .k(k) 10141 .iterations(1) 10142 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10143 } 10144 } 10145 } 10146 } 10147 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,small_kernel)10148 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, small_kernel) { 10149 TEST_REQUIRES_ARM_NEON; 10150 for (size_t k = 1; k <= 80; k += 17) { 10151 GemmMicrokernelTester() 10152 .mr(2) 10153 .nr(16) 10154 .kr(4) 10155 .sr(2) 10156 .m(2) 10157 .n(16) 10158 .k(k) 10159 .ks(3) 10160 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10161 } 10162 } 10163 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,small_kernel_subtile)10164 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, small_kernel_subtile) { 10165 TEST_REQUIRES_ARM_NEON; 10166 for (size_t k = 1; k <= 80; k += 17) { 10167 for (uint32_t n = 1; n <= 16; n++) { 10168 for (uint32_t m = 1; m <= 2; m++) { 10169 GemmMicrokernelTester() 10170 .mr(2) 10171 .nr(16) 10172 .kr(4) 10173 .sr(2) 10174 .m(m) 10175 .n(n) 10176 .k(k) 10177 .ks(3) 10178 .iterations(1) 10179 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10180 } 10181 } 10182 } 10183 } 10184 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,n_gt_16_small_kernel)10185 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_gt_16_small_kernel) { 10186 TEST_REQUIRES_ARM_NEON; 10187 for (uint32_t n = 17; n < 32; n++) { 10188 for (size_t k = 1; k <= 80; k += 17) { 10189 GemmMicrokernelTester() 10190 .mr(2) 10191 .nr(16) 10192 .kr(4) 10193 .sr(2) 10194 .m(2) 10195 .n(n) 10196 .k(k) 10197 .ks(3) 10198 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10199 } 10200 } 10201 } 10202 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,n_div_16_small_kernel)10203 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, n_div_16_small_kernel) { 10204 TEST_REQUIRES_ARM_NEON; 10205 for (uint32_t n = 32; n <= 48; n += 16) { 10206 for (size_t k = 1; k <= 80; k += 17) { 10207 GemmMicrokernelTester() 10208 .mr(2) 10209 .nr(16) 10210 .kr(4) 10211 .sr(2) 10212 .m(2) 10213 .n(n) 10214 .k(k) 10215 .ks(3) 10216 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10217 } 10218 } 10219 } 10220 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,strided_cm_subtile)10221 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, strided_cm_subtile) { 10222 TEST_REQUIRES_ARM_NEON; 10223 for (size_t k = 1; k <= 80; k += 17) { 10224 for (uint32_t n = 1; n <= 16; n++) { 10225 for (uint32_t m = 1; m <= 2; m++) { 10226 GemmMicrokernelTester() 10227 .mr(2) 10228 .nr(16) 10229 .kr(4) 10230 .sr(2) 10231 .m(m) 10232 .n(n) 10233 .k(k) 10234 .cm_stride(19) 10235 .iterations(1) 10236 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10237 } 10238 } 10239 } 10240 } 10241 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,a_offset)10242 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, a_offset) { 10243 TEST_REQUIRES_ARM_NEON; 10244 for (size_t k = 1; k <= 80; k += 17) { 10245 GemmMicrokernelTester() 10246 .mr(2) 10247 .nr(16) 10248 .kr(4) 10249 .sr(2) 10250 .m(2) 10251 .n(16) 10252 .k(k) 10253 .ks(3) 10254 .a_offset(163) 10255 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10256 } 10257 } 10258 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,zero)10259 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, zero) { 10260 TEST_REQUIRES_ARM_NEON; 10261 for (size_t k = 1; k <= 80; k += 17) { 10262 for (uint32_t mz = 0; mz < 2; mz++) { 10263 GemmMicrokernelTester() 10264 .mr(2) 10265 .nr(16) 10266 .kr(4) 10267 .sr(2) 10268 .m(2) 10269 .n(16) 10270 .k(k) 10271 .ks(3) 10272 .a_offset(163) 10273 .zero_index(mz) 10274 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10275 } 10276 } 10277 } 10278 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,qmin)10279 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, qmin) { 10280 TEST_REQUIRES_ARM_NEON; 10281 GemmMicrokernelTester() 10282 .mr(2) 10283 .nr(16) 10284 .kr(4) 10285 .sr(2) 10286 .m(2) 10287 .n(16) 10288 .k(16) 10289 .qmin(128) 10290 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10291 } 10292 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,qmax)10293 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, qmax) { 10294 TEST_REQUIRES_ARM_NEON; 10295 GemmMicrokernelTester() 10296 .mr(2) 10297 .nr(16) 10298 .kr(4) 10299 .sr(2) 10300 .m(2) 10301 .n(16) 10302 .k(16) 10303 .qmax(128) 10304 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10305 } 10306 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL,strided_cm)10307 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MLAL, strided_cm) { 10308 TEST_REQUIRES_ARM_NEON; 10309 GemmMicrokernelTester() 10310 .mr(2) 10311 .nr(16) 10312 .kr(4) 10313 .sr(2) 10314 .m(2) 10315 .n(16) 10316 .k(16) 10317 .cm_stride(19) 10318 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10319 } 10320 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10321 10322 10323 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16)10324 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16) { 10325 TEST_REQUIRES_ARM_NEON; 10326 GemmMicrokernelTester() 10327 .mr(2) 10328 .nr(16) 10329 .kr(16) 10330 .sr(1) 10331 .m(2) 10332 .n(16) 10333 .k(16) 10334 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10335 } 10336 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cn)10337 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cn) { 10338 TEST_REQUIRES_ARM_NEON; 10339 GemmMicrokernelTester() 10340 .mr(2) 10341 .nr(16) 10342 .kr(16) 10343 .sr(1) 10344 .m(2) 10345 .n(16) 10346 .k(16) 10347 .cn_stride(19) 10348 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10349 } 10350 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile)10351 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile) { 10352 TEST_REQUIRES_ARM_NEON; 10353 for (uint32_t n = 1; n <= 16; n++) { 10354 for (uint32_t m = 1; m <= 2; m++) { 10355 GemmMicrokernelTester() 10356 .mr(2) 10357 .nr(16) 10358 .kr(16) 10359 .sr(1) 10360 .m(m) 10361 .n(n) 10362 .k(16) 10363 .iterations(1) 10364 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10365 } 10366 } 10367 } 10368 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile_m)10369 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile_m) { 10370 TEST_REQUIRES_ARM_NEON; 10371 for (uint32_t m = 1; m <= 2; m++) { 10372 GemmMicrokernelTester() 10373 .mr(2) 10374 .nr(16) 10375 .kr(16) 10376 .sr(1) 10377 .m(m) 10378 .n(16) 10379 .k(16) 10380 .iterations(1) 10381 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10382 } 10383 } 10384 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_eq_16_subtile_n)10385 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_eq_16_subtile_n) { 10386 TEST_REQUIRES_ARM_NEON; 10387 for (uint32_t n = 1; n <= 16; n++) { 10388 GemmMicrokernelTester() 10389 .mr(2) 10390 .nr(16) 10391 .kr(16) 10392 .sr(1) 10393 .m(2) 10394 .n(n) 10395 .k(16) 10396 .iterations(1) 10397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10398 } 10399 } 10400 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_lt_16)10401 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16) { 10402 TEST_REQUIRES_ARM_NEON; 10403 for (size_t k = 1; k < 16; k++) { 10404 GemmMicrokernelTester() 10405 .mr(2) 10406 .nr(16) 10407 .kr(16) 10408 .sr(1) 10409 .m(2) 10410 .n(16) 10411 .k(k) 10412 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10413 } 10414 } 10415 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_lt_16_subtile)10416 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_lt_16_subtile) { 10417 TEST_REQUIRES_ARM_NEON; 10418 for (size_t k = 1; k < 16; k++) { 10419 for (uint32_t n = 1; n <= 16; n++) { 10420 for (uint32_t m = 1; m <= 2; m++) { 10421 GemmMicrokernelTester() 10422 .mr(2) 10423 .nr(16) 10424 .kr(16) 10425 .sr(1) 10426 .m(m) 10427 .n(n) 10428 .k(k) 10429 .iterations(1) 10430 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10431 } 10432 } 10433 } 10434 } 10435 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_gt_16)10436 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16) { 10437 TEST_REQUIRES_ARM_NEON; 10438 for (size_t k = 17; k < 32; k++) { 10439 GemmMicrokernelTester() 10440 .mr(2) 10441 .nr(16) 10442 .kr(16) 10443 .sr(1) 10444 .m(2) 10445 .n(16) 10446 .k(k) 10447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10448 } 10449 } 10450 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_gt_16_subtile)10451 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_gt_16_subtile) { 10452 TEST_REQUIRES_ARM_NEON; 10453 for (size_t k = 17; k < 32; k++) { 10454 for (uint32_t n = 1; n <= 16; n++) { 10455 for (uint32_t m = 1; m <= 2; m++) { 10456 GemmMicrokernelTester() 10457 .mr(2) 10458 .nr(16) 10459 .kr(16) 10460 .sr(1) 10461 .m(m) 10462 .n(n) 10463 .k(k) 10464 .iterations(1) 10465 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10466 } 10467 } 10468 } 10469 } 10470 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_div_16)10471 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16) { 10472 TEST_REQUIRES_ARM_NEON; 10473 for (size_t k = 32; k <= 160; k += 16) { 10474 GemmMicrokernelTester() 10475 .mr(2) 10476 .nr(16) 10477 .kr(16) 10478 .sr(1) 10479 .m(2) 10480 .n(16) 10481 .k(k) 10482 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10483 } 10484 } 10485 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,k_div_16_subtile)10486 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, k_div_16_subtile) { 10487 TEST_REQUIRES_ARM_NEON; 10488 for (size_t k = 32; k <= 160; k += 16) { 10489 for (uint32_t n = 1; n <= 16; n++) { 10490 for (uint32_t m = 1; m <= 2; m++) { 10491 GemmMicrokernelTester() 10492 .mr(2) 10493 .nr(16) 10494 .kr(16) 10495 .sr(1) 10496 .m(m) 10497 .n(n) 10498 .k(k) 10499 .iterations(1) 10500 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10501 } 10502 } 10503 } 10504 } 10505 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16)10506 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16) { 10507 TEST_REQUIRES_ARM_NEON; 10508 for (uint32_t n = 17; n < 32; n++) { 10509 for (size_t k = 1; k <= 80; k += 17) { 10510 GemmMicrokernelTester() 10511 .mr(2) 10512 .nr(16) 10513 .kr(16) 10514 .sr(1) 10515 .m(2) 10516 .n(n) 10517 .k(k) 10518 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10519 } 10520 } 10521 } 10522 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_strided_cn)10523 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_strided_cn) { 10524 TEST_REQUIRES_ARM_NEON; 10525 for (uint32_t n = 17; n < 32; n++) { 10526 for (size_t k = 1; k <= 80; k += 17) { 10527 GemmMicrokernelTester() 10528 .mr(2) 10529 .nr(16) 10530 .kr(16) 10531 .sr(1) 10532 .m(2) 10533 .n(n) 10534 .k(k) 10535 .cn_stride(19) 10536 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10537 } 10538 } 10539 } 10540 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_subtile)10541 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_subtile) { 10542 TEST_REQUIRES_ARM_NEON; 10543 for (uint32_t n = 17; n < 32; n++) { 10544 for (size_t k = 1; k <= 80; k += 17) { 10545 for (uint32_t m = 1; m <= 2; m++) { 10546 GemmMicrokernelTester() 10547 .mr(2) 10548 .nr(16) 10549 .kr(16) 10550 .sr(1) 10551 .m(m) 10552 .n(n) 10553 .k(k) 10554 .iterations(1) 10555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10556 } 10557 } 10558 } 10559 } 10560 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16)10561 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16) { 10562 TEST_REQUIRES_ARM_NEON; 10563 for (uint32_t n = 32; n <= 48; n += 16) { 10564 for (size_t k = 1; k <= 80; k += 17) { 10565 GemmMicrokernelTester() 10566 .mr(2) 10567 .nr(16) 10568 .kr(16) 10569 .sr(1) 10570 .m(2) 10571 .n(n) 10572 .k(k) 10573 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10574 } 10575 } 10576 } 10577 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_strided_cn)10578 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_strided_cn) { 10579 TEST_REQUIRES_ARM_NEON; 10580 for (uint32_t n = 32; n <= 48; n += 16) { 10581 for (size_t k = 1; k <= 80; k += 17) { 10582 GemmMicrokernelTester() 10583 .mr(2) 10584 .nr(16) 10585 .kr(16) 10586 .sr(1) 10587 .m(2) 10588 .n(n) 10589 .k(k) 10590 .cn_stride(19) 10591 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10592 } 10593 } 10594 } 10595 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_subtile)10596 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_subtile) { 10597 TEST_REQUIRES_ARM_NEON; 10598 for (uint32_t n = 32; n <= 48; n += 16) { 10599 for (size_t k = 1; k <= 80; k += 17) { 10600 for (uint32_t m = 1; m <= 2; m++) { 10601 GemmMicrokernelTester() 10602 .mr(2) 10603 .nr(16) 10604 .kr(16) 10605 .sr(1) 10606 .m(m) 10607 .n(n) 10608 .k(k) 10609 .iterations(1) 10610 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10611 } 10612 } 10613 } 10614 } 10615 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,small_kernel)10616 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, small_kernel) { 10617 TEST_REQUIRES_ARM_NEON; 10618 for (size_t k = 1; k <= 80; k += 17) { 10619 GemmMicrokernelTester() 10620 .mr(2) 10621 .nr(16) 10622 .kr(16) 10623 .sr(1) 10624 .m(2) 10625 .n(16) 10626 .k(k) 10627 .ks(3) 10628 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10629 } 10630 } 10631 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,small_kernel_subtile)10632 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, small_kernel_subtile) { 10633 TEST_REQUIRES_ARM_NEON; 10634 for (size_t k = 1; k <= 80; k += 17) { 10635 for (uint32_t n = 1; n <= 16; n++) { 10636 for (uint32_t m = 1; m <= 2; m++) { 10637 GemmMicrokernelTester() 10638 .mr(2) 10639 .nr(16) 10640 .kr(16) 10641 .sr(1) 10642 .m(m) 10643 .n(n) 10644 .k(k) 10645 .ks(3) 10646 .iterations(1) 10647 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10648 } 10649 } 10650 } 10651 } 10652 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_gt_16_small_kernel)10653 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_gt_16_small_kernel) { 10654 TEST_REQUIRES_ARM_NEON; 10655 for (uint32_t n = 17; n < 32; n++) { 10656 for (size_t k = 1; k <= 80; k += 17) { 10657 GemmMicrokernelTester() 10658 .mr(2) 10659 .nr(16) 10660 .kr(16) 10661 .sr(1) 10662 .m(2) 10663 .n(n) 10664 .k(k) 10665 .ks(3) 10666 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10667 } 10668 } 10669 } 10670 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,n_div_16_small_kernel)10671 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, n_div_16_small_kernel) { 10672 TEST_REQUIRES_ARM_NEON; 10673 for (uint32_t n = 32; n <= 48; n += 16) { 10674 for (size_t k = 1; k <= 80; k += 17) { 10675 GemmMicrokernelTester() 10676 .mr(2) 10677 .nr(16) 10678 .kr(16) 10679 .sr(1) 10680 .m(2) 10681 .n(n) 10682 .k(k) 10683 .ks(3) 10684 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10685 } 10686 } 10687 } 10688 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cm_subtile)10689 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cm_subtile) { 10690 TEST_REQUIRES_ARM_NEON; 10691 for (size_t k = 1; k <= 80; k += 17) { 10692 for (uint32_t n = 1; n <= 16; n++) { 10693 for (uint32_t m = 1; m <= 2; m++) { 10694 GemmMicrokernelTester() 10695 .mr(2) 10696 .nr(16) 10697 .kr(16) 10698 .sr(1) 10699 .m(m) 10700 .n(n) 10701 .k(k) 10702 .cm_stride(19) 10703 .iterations(1) 10704 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10705 } 10706 } 10707 } 10708 } 10709 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,a_offset)10710 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, a_offset) { 10711 TEST_REQUIRES_ARM_NEON; 10712 for (size_t k = 1; k <= 80; k += 17) { 10713 GemmMicrokernelTester() 10714 .mr(2) 10715 .nr(16) 10716 .kr(16) 10717 .sr(1) 10718 .m(2) 10719 .n(16) 10720 .k(k) 10721 .ks(3) 10722 .a_offset(163) 10723 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10724 } 10725 } 10726 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,zero)10727 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, zero) { 10728 TEST_REQUIRES_ARM_NEON; 10729 for (size_t k = 1; k <= 80; k += 17) { 10730 for (uint32_t mz = 0; mz < 2; mz++) { 10731 GemmMicrokernelTester() 10732 .mr(2) 10733 .nr(16) 10734 .kr(16) 10735 .sr(1) 10736 .m(2) 10737 .n(16) 10738 .k(k) 10739 .ks(3) 10740 .a_offset(163) 10741 .zero_index(mz) 10742 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10743 } 10744 } 10745 } 10746 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,qmin)10747 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, qmin) { 10748 TEST_REQUIRES_ARM_NEON; 10749 GemmMicrokernelTester() 10750 .mr(2) 10751 .nr(16) 10752 .kr(16) 10753 .sr(1) 10754 .m(2) 10755 .n(16) 10756 .k(16) 10757 .qmin(128) 10758 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10759 } 10760 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,qmax)10761 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, qmax) { 10762 TEST_REQUIRES_ARM_NEON; 10763 GemmMicrokernelTester() 10764 .mr(2) 10765 .nr(16) 10766 .kr(16) 10767 .sr(1) 10768 .m(2) 10769 .n(16) 10770 .k(16) 10771 .qmax(128) 10772 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10773 } 10774 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL,strided_cm)10775 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C16__NEON_MLAL, strided_cm) { 10776 TEST_REQUIRES_ARM_NEON; 10777 GemmMicrokernelTester() 10778 .mr(2) 10779 .nr(16) 10780 .kr(16) 10781 .sr(1) 10782 .m(2) 10783 .n(16) 10784 .k(16) 10785 .cm_stride(19) 10786 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10787 } 10788 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 10789 10790 10791 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_eq_8)10792 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) { 10793 TEST_REQUIRES_ARM_NEON; 10794 GemmMicrokernelTester() 10795 .mr(3) 10796 .nr(8) 10797 .kr(1) 10798 .sr(1) 10799 .m(3) 10800 .n(8) 10801 .k(8) 10802 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10803 } 10804 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,strided_cn)10805 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cn) { 10806 TEST_REQUIRES_ARM_NEON; 10807 GemmMicrokernelTester() 10808 .mr(3) 10809 .nr(8) 10810 .kr(1) 10811 .sr(1) 10812 .m(3) 10813 .n(8) 10814 .k(8) 10815 .cn_stride(11) 10816 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10817 } 10818 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)10819 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 10820 TEST_REQUIRES_ARM_NEON; 10821 for (uint32_t n = 1; n <= 8; n++) { 10822 for (uint32_t m = 1; m <= 3; m++) { 10823 GemmMicrokernelTester() 10824 .mr(3) 10825 .nr(8) 10826 .kr(1) 10827 .sr(1) 10828 .m(m) 10829 .n(n) 10830 .k(8) 10831 .iterations(1) 10832 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10833 } 10834 } 10835 } 10836 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)10837 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 10838 TEST_REQUIRES_ARM_NEON; 10839 for (uint32_t m = 1; m <= 3; m++) { 10840 GemmMicrokernelTester() 10841 .mr(3) 10842 .nr(8) 10843 .kr(1) 10844 .sr(1) 10845 .m(m) 10846 .n(8) 10847 .k(8) 10848 .iterations(1) 10849 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10850 } 10851 } 10852 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)10853 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 10854 TEST_REQUIRES_ARM_NEON; 10855 for (uint32_t n = 1; n <= 8; n++) { 10856 GemmMicrokernelTester() 10857 .mr(3) 10858 .nr(8) 10859 .kr(1) 10860 .sr(1) 10861 .m(3) 10862 .n(n) 10863 .k(8) 10864 .iterations(1) 10865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10866 } 10867 } 10868 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_lt_8)10869 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) { 10870 TEST_REQUIRES_ARM_NEON; 10871 for (size_t k = 1; k < 8; k++) { 10872 GemmMicrokernelTester() 10873 .mr(3) 10874 .nr(8) 10875 .kr(1) 10876 .sr(1) 10877 .m(3) 10878 .n(8) 10879 .k(k) 10880 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10881 } 10882 } 10883 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)10884 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 10885 TEST_REQUIRES_ARM_NEON; 10886 for (size_t k = 1; k < 8; k++) { 10887 for (uint32_t n = 1; n <= 8; n++) { 10888 for (uint32_t m = 1; m <= 3; m++) { 10889 GemmMicrokernelTester() 10890 .mr(3) 10891 .nr(8) 10892 .kr(1) 10893 .sr(1) 10894 .m(m) 10895 .n(n) 10896 .k(k) 10897 .iterations(1) 10898 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10899 } 10900 } 10901 } 10902 } 10903 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_gt_8)10904 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) { 10905 TEST_REQUIRES_ARM_NEON; 10906 for (size_t k = 9; k < 16; k++) { 10907 GemmMicrokernelTester() 10908 .mr(3) 10909 .nr(8) 10910 .kr(1) 10911 .sr(1) 10912 .m(3) 10913 .n(8) 10914 .k(k) 10915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10916 } 10917 } 10918 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)10919 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 10920 TEST_REQUIRES_ARM_NEON; 10921 for (size_t k = 9; k < 16; k++) { 10922 for (uint32_t n = 1; n <= 8; n++) { 10923 for (uint32_t m = 1; m <= 3; m++) { 10924 GemmMicrokernelTester() 10925 .mr(3) 10926 .nr(8) 10927 .kr(1) 10928 .sr(1) 10929 .m(m) 10930 .n(n) 10931 .k(k) 10932 .iterations(1) 10933 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10934 } 10935 } 10936 } 10937 } 10938 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_div_8)10939 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_div_8) { 10940 TEST_REQUIRES_ARM_NEON; 10941 for (size_t k = 16; k <= 80; k += 8) { 10942 GemmMicrokernelTester() 10943 .mr(3) 10944 .nr(8) 10945 .kr(1) 10946 .sr(1) 10947 .m(3) 10948 .n(8) 10949 .k(k) 10950 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10951 } 10952 } 10953 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)10954 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 10955 TEST_REQUIRES_ARM_NEON; 10956 for (size_t k = 16; k <= 80; k += 8) { 10957 for (uint32_t n = 1; n <= 8; n++) { 10958 for (uint32_t m = 1; m <= 3; m++) { 10959 GemmMicrokernelTester() 10960 .mr(3) 10961 .nr(8) 10962 .kr(1) 10963 .sr(1) 10964 .m(m) 10965 .n(n) 10966 .k(k) 10967 .iterations(1) 10968 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10969 } 10970 } 10971 } 10972 } 10973 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,n_gt_8)10974 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) { 10975 TEST_REQUIRES_ARM_NEON; 10976 for (uint32_t n = 9; n < 16; n++) { 10977 for (size_t k = 1; k <= 40; k += 9) { 10978 GemmMicrokernelTester() 10979 .mr(3) 10980 .nr(8) 10981 .kr(1) 10982 .sr(1) 10983 .m(3) 10984 .n(n) 10985 .k(k) 10986 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 10987 } 10988 } 10989 } 10990 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)10991 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) { 10992 TEST_REQUIRES_ARM_NEON; 10993 for (uint32_t n = 9; n < 16; n++) { 10994 for (size_t k = 1; k <= 40; k += 9) { 10995 GemmMicrokernelTester() 10996 .mr(3) 10997 .nr(8) 10998 .kr(1) 10999 .sr(1) 11000 .m(3) 11001 .n(n) 11002 .k(k) 11003 .cn_stride(11) 11004 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11005 } 11006 } 11007 } 11008 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)11009 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) { 11010 TEST_REQUIRES_ARM_NEON; 11011 for (uint32_t n = 9; n < 16; n++) { 11012 for (size_t k = 1; k <= 40; k += 9) { 11013 for (uint32_t m = 1; m <= 3; m++) { 11014 GemmMicrokernelTester() 11015 .mr(3) 11016 .nr(8) 11017 .kr(1) 11018 .sr(1) 11019 .m(m) 11020 .n(n) 11021 .k(k) 11022 .iterations(1) 11023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11024 } 11025 } 11026 } 11027 } 11028 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,n_div_8)11029 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8) { 11030 TEST_REQUIRES_ARM_NEON; 11031 for (uint32_t n = 16; n <= 24; n += 8) { 11032 for (size_t k = 1; k <= 40; k += 9) { 11033 GemmMicrokernelTester() 11034 .mr(3) 11035 .nr(8) 11036 .kr(1) 11037 .sr(1) 11038 .m(3) 11039 .n(n) 11040 .k(k) 11041 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11042 } 11043 } 11044 } 11045 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)11046 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) { 11047 TEST_REQUIRES_ARM_NEON; 11048 for (uint32_t n = 16; n <= 24; n += 8) { 11049 for (size_t k = 1; k <= 40; k += 9) { 11050 GemmMicrokernelTester() 11051 .mr(3) 11052 .nr(8) 11053 .kr(1) 11054 .sr(1) 11055 .m(3) 11056 .n(n) 11057 .k(k) 11058 .cn_stride(11) 11059 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11060 } 11061 } 11062 } 11063 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)11064 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) { 11065 TEST_REQUIRES_ARM_NEON; 11066 for (uint32_t n = 16; n <= 24; n += 8) { 11067 for (size_t k = 1; k <= 40; k += 9) { 11068 for (uint32_t m = 1; m <= 3; m++) { 11069 GemmMicrokernelTester() 11070 .mr(3) 11071 .nr(8) 11072 .kr(1) 11073 .sr(1) 11074 .m(m) 11075 .n(n) 11076 .k(k) 11077 .iterations(1) 11078 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11079 } 11080 } 11081 } 11082 } 11083 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,small_kernel)11084 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, small_kernel) { 11085 TEST_REQUIRES_ARM_NEON; 11086 for (size_t k = 1; k <= 40; k += 9) { 11087 GemmMicrokernelTester() 11088 .mr(3) 11089 .nr(8) 11090 .kr(1) 11091 .sr(1) 11092 .m(3) 11093 .n(8) 11094 .k(k) 11095 .ks(3) 11096 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11097 } 11098 } 11099 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)11100 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) { 11101 TEST_REQUIRES_ARM_NEON; 11102 for (size_t k = 1; k <= 40; k += 9) { 11103 for (uint32_t n = 1; n <= 8; n++) { 11104 for (uint32_t m = 1; m <= 3; m++) { 11105 GemmMicrokernelTester() 11106 .mr(3) 11107 .nr(8) 11108 .kr(1) 11109 .sr(1) 11110 .m(m) 11111 .n(n) 11112 .k(k) 11113 .ks(3) 11114 .iterations(1) 11115 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11116 } 11117 } 11118 } 11119 } 11120 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)11121 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) { 11122 TEST_REQUIRES_ARM_NEON; 11123 for (uint32_t n = 9; n < 16; n++) { 11124 for (size_t k = 1; k <= 40; k += 9) { 11125 GemmMicrokernelTester() 11126 .mr(3) 11127 .nr(8) 11128 .kr(1) 11129 .sr(1) 11130 .m(3) 11131 .n(n) 11132 .k(k) 11133 .ks(3) 11134 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11135 } 11136 } 11137 } 11138 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)11139 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) { 11140 TEST_REQUIRES_ARM_NEON; 11141 for (uint32_t n = 16; n <= 24; n += 8) { 11142 for (size_t k = 1; k <= 40; k += 9) { 11143 GemmMicrokernelTester() 11144 .mr(3) 11145 .nr(8) 11146 .kr(1) 11147 .sr(1) 11148 .m(3) 11149 .n(n) 11150 .k(k) 11151 .ks(3) 11152 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11153 } 11154 } 11155 } 11156 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)11157 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 11158 TEST_REQUIRES_ARM_NEON; 11159 for (size_t k = 1; k <= 40; k += 9) { 11160 for (uint32_t n = 1; n <= 8; n++) { 11161 for (uint32_t m = 1; m <= 3; m++) { 11162 GemmMicrokernelTester() 11163 .mr(3) 11164 .nr(8) 11165 .kr(1) 11166 .sr(1) 11167 .m(m) 11168 .n(n) 11169 .k(k) 11170 .cm_stride(11) 11171 .iterations(1) 11172 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11173 } 11174 } 11175 } 11176 } 11177 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,a_offset)11178 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, a_offset) { 11179 TEST_REQUIRES_ARM_NEON; 11180 for (size_t k = 1; k <= 40; k += 9) { 11181 GemmMicrokernelTester() 11182 .mr(3) 11183 .nr(8) 11184 .kr(1) 11185 .sr(1) 11186 .m(3) 11187 .n(8) 11188 .k(k) 11189 .ks(3) 11190 .a_offset(127) 11191 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11192 } 11193 } 11194 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,zero)11195 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, zero) { 11196 TEST_REQUIRES_ARM_NEON; 11197 for (size_t k = 1; k <= 40; k += 9) { 11198 for (uint32_t mz = 0; mz < 3; mz++) { 11199 GemmMicrokernelTester() 11200 .mr(3) 11201 .nr(8) 11202 .kr(1) 11203 .sr(1) 11204 .m(3) 11205 .n(8) 11206 .k(k) 11207 .ks(3) 11208 .a_offset(127) 11209 .zero_index(mz) 11210 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11211 } 11212 } 11213 } 11214 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,qmin)11215 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, qmin) { 11216 TEST_REQUIRES_ARM_NEON; 11217 GemmMicrokernelTester() 11218 .mr(3) 11219 .nr(8) 11220 .kr(1) 11221 .sr(1) 11222 .m(3) 11223 .n(8) 11224 .k(8) 11225 .qmin(128) 11226 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11227 } 11228 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,qmax)11229 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, qmax) { 11230 TEST_REQUIRES_ARM_NEON; 11231 GemmMicrokernelTester() 11232 .mr(3) 11233 .nr(8) 11234 .kr(1) 11235 .sr(1) 11236 .m(3) 11237 .n(8) 11238 .k(8) 11239 .qmax(128) 11240 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11241 } 11242 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM,strided_cm)11243 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE_PRFM, strided_cm) { 11244 TEST_REQUIRES_ARM_NEON; 11245 GemmMicrokernelTester() 11246 .mr(3) 11247 .nr(8) 11248 .kr(1) 11249 .sr(1) 11250 .m(3) 11251 .n(8) 11252 .k(8) 11253 .cm_stride(11) 11254 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11255 } 11256 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 11257 11258 11259 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16)11260 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16) { 11261 TEST_REQUIRES_ARM_NEON; 11262 GemmMicrokernelTester() 11263 .mr(3) 11264 .nr(8) 11265 .kr(2) 11266 .sr(1) 11267 .m(3) 11268 .n(8) 11269 .k(16) 11270 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11271 } 11272 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cn)11273 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cn) { 11274 TEST_REQUIRES_ARM_NEON; 11275 GemmMicrokernelTester() 11276 .mr(3) 11277 .nr(8) 11278 .kr(2) 11279 .sr(1) 11280 .m(3) 11281 .n(8) 11282 .k(16) 11283 .cn_stride(11) 11284 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11285 } 11286 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)11287 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) { 11288 TEST_REQUIRES_ARM_NEON; 11289 for (uint32_t n = 1; n <= 8; n++) { 11290 for (uint32_t m = 1; m <= 3; m++) { 11291 GemmMicrokernelTester() 11292 .mr(3) 11293 .nr(8) 11294 .kr(2) 11295 .sr(1) 11296 .m(m) 11297 .n(n) 11298 .k(16) 11299 .iterations(1) 11300 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11301 } 11302 } 11303 } 11304 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)11305 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 11306 TEST_REQUIRES_ARM_NEON; 11307 for (uint32_t m = 1; m <= 3; m++) { 11308 GemmMicrokernelTester() 11309 .mr(3) 11310 .nr(8) 11311 .kr(2) 11312 .sr(1) 11313 .m(m) 11314 .n(8) 11315 .k(16) 11316 .iterations(1) 11317 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11318 } 11319 } 11320 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)11321 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 11322 TEST_REQUIRES_ARM_NEON; 11323 for (uint32_t n = 1; n <= 8; n++) { 11324 GemmMicrokernelTester() 11325 .mr(3) 11326 .nr(8) 11327 .kr(2) 11328 .sr(1) 11329 .m(3) 11330 .n(n) 11331 .k(16) 11332 .iterations(1) 11333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11334 } 11335 } 11336 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_lt_16)11337 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16) { 11338 TEST_REQUIRES_ARM_NEON; 11339 for (size_t k = 1; k < 16; k++) { 11340 GemmMicrokernelTester() 11341 .mr(3) 11342 .nr(8) 11343 .kr(2) 11344 .sr(1) 11345 .m(3) 11346 .n(8) 11347 .k(k) 11348 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11349 } 11350 } 11351 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)11352 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) { 11353 TEST_REQUIRES_ARM_NEON; 11354 for (size_t k = 1; k < 16; k++) { 11355 for (uint32_t n = 1; n <= 8; n++) { 11356 for (uint32_t m = 1; m <= 3; m++) { 11357 GemmMicrokernelTester() 11358 .mr(3) 11359 .nr(8) 11360 .kr(2) 11361 .sr(1) 11362 .m(m) 11363 .n(n) 11364 .k(k) 11365 .iterations(1) 11366 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11367 } 11368 } 11369 } 11370 } 11371 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_gt_16)11372 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16) { 11373 TEST_REQUIRES_ARM_NEON; 11374 for (size_t k = 17; k < 32; k++) { 11375 GemmMicrokernelTester() 11376 .mr(3) 11377 .nr(8) 11378 .kr(2) 11379 .sr(1) 11380 .m(3) 11381 .n(8) 11382 .k(k) 11383 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11384 } 11385 } 11386 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)11387 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) { 11388 TEST_REQUIRES_ARM_NEON; 11389 for (size_t k = 17; k < 32; k++) { 11390 for (uint32_t n = 1; n <= 8; n++) { 11391 for (uint32_t m = 1; m <= 3; m++) { 11392 GemmMicrokernelTester() 11393 .mr(3) 11394 .nr(8) 11395 .kr(2) 11396 .sr(1) 11397 .m(m) 11398 .n(n) 11399 .k(k) 11400 .iterations(1) 11401 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11402 } 11403 } 11404 } 11405 } 11406 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_div_16)11407 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16) { 11408 TEST_REQUIRES_ARM_NEON; 11409 for (size_t k = 32; k <= 160; k += 16) { 11410 GemmMicrokernelTester() 11411 .mr(3) 11412 .nr(8) 11413 .kr(2) 11414 .sr(1) 11415 .m(3) 11416 .n(8) 11417 .k(k) 11418 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11419 } 11420 } 11421 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,k_div_16_subtile)11422 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, k_div_16_subtile) { 11423 TEST_REQUIRES_ARM_NEON; 11424 for (size_t k = 32; k <= 160; k += 16) { 11425 for (uint32_t n = 1; n <= 8; n++) { 11426 for (uint32_t m = 1; m <= 3; m++) { 11427 GemmMicrokernelTester() 11428 .mr(3) 11429 .nr(8) 11430 .kr(2) 11431 .sr(1) 11432 .m(m) 11433 .n(n) 11434 .k(k) 11435 .iterations(1) 11436 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11437 } 11438 } 11439 } 11440 } 11441 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8)11442 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8) { 11443 TEST_REQUIRES_ARM_NEON; 11444 for (uint32_t n = 9; n < 16; n++) { 11445 for (size_t k = 1; k <= 80; k += 17) { 11446 GemmMicrokernelTester() 11447 .mr(3) 11448 .nr(8) 11449 .kr(2) 11450 .sr(1) 11451 .m(3) 11452 .n(n) 11453 .k(k) 11454 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11455 } 11456 } 11457 } 11458 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)11459 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 11460 TEST_REQUIRES_ARM_NEON; 11461 for (uint32_t n = 9; n < 16; n++) { 11462 for (size_t k = 1; k <= 80; k += 17) { 11463 GemmMicrokernelTester() 11464 .mr(3) 11465 .nr(8) 11466 .kr(2) 11467 .sr(1) 11468 .m(3) 11469 .n(n) 11470 .k(k) 11471 .cn_stride(11) 11472 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11473 } 11474 } 11475 } 11476 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)11477 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) { 11478 TEST_REQUIRES_ARM_NEON; 11479 for (uint32_t n = 9; n < 16; n++) { 11480 for (size_t k = 1; k <= 80; k += 17) { 11481 for (uint32_t m = 1; m <= 3; m++) { 11482 GemmMicrokernelTester() 11483 .mr(3) 11484 .nr(8) 11485 .kr(2) 11486 .sr(1) 11487 .m(m) 11488 .n(n) 11489 .k(k) 11490 .iterations(1) 11491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11492 } 11493 } 11494 } 11495 } 11496 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8)11497 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8) { 11498 TEST_REQUIRES_ARM_NEON; 11499 for (uint32_t n = 16; n <= 24; n += 8) { 11500 for (size_t k = 1; k <= 80; k += 17) { 11501 GemmMicrokernelTester() 11502 .mr(3) 11503 .nr(8) 11504 .kr(2) 11505 .sr(1) 11506 .m(3) 11507 .n(n) 11508 .k(k) 11509 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11510 } 11511 } 11512 } 11513 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)11514 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) { 11515 TEST_REQUIRES_ARM_NEON; 11516 for (uint32_t n = 16; n <= 24; n += 8) { 11517 for (size_t k = 1; k <= 80; k += 17) { 11518 GemmMicrokernelTester() 11519 .mr(3) 11520 .nr(8) 11521 .kr(2) 11522 .sr(1) 11523 .m(3) 11524 .n(n) 11525 .k(k) 11526 .cn_stride(11) 11527 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11528 } 11529 } 11530 } 11531 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_subtile)11532 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_subtile) { 11533 TEST_REQUIRES_ARM_NEON; 11534 for (uint32_t n = 16; n <= 24; n += 8) { 11535 for (size_t k = 1; k <= 80; k += 17) { 11536 for (uint32_t m = 1; m <= 3; m++) { 11537 GemmMicrokernelTester() 11538 .mr(3) 11539 .nr(8) 11540 .kr(2) 11541 .sr(1) 11542 .m(m) 11543 .n(n) 11544 .k(k) 11545 .iterations(1) 11546 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11547 } 11548 } 11549 } 11550 } 11551 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,small_kernel)11552 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, small_kernel) { 11553 TEST_REQUIRES_ARM_NEON; 11554 for (size_t k = 1; k <= 80; k += 17) { 11555 GemmMicrokernelTester() 11556 .mr(3) 11557 .nr(8) 11558 .kr(2) 11559 .sr(1) 11560 .m(3) 11561 .n(8) 11562 .k(k) 11563 .ks(3) 11564 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11565 } 11566 } 11567 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,small_kernel_subtile)11568 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, small_kernel_subtile) { 11569 TEST_REQUIRES_ARM_NEON; 11570 for (size_t k = 1; k <= 80; k += 17) { 11571 for (uint32_t n = 1; n <= 8; n++) { 11572 for (uint32_t m = 1; m <= 3; m++) { 11573 GemmMicrokernelTester() 11574 .mr(3) 11575 .nr(8) 11576 .kr(2) 11577 .sr(1) 11578 .m(m) 11579 .n(n) 11580 .k(k) 11581 .ks(3) 11582 .iterations(1) 11583 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11584 } 11585 } 11586 } 11587 } 11588 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)11589 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) { 11590 TEST_REQUIRES_ARM_NEON; 11591 for (uint32_t n = 9; n < 16; n++) { 11592 for (size_t k = 1; k <= 80; k += 17) { 11593 GemmMicrokernelTester() 11594 .mr(3) 11595 .nr(8) 11596 .kr(2) 11597 .sr(1) 11598 .m(3) 11599 .n(n) 11600 .k(k) 11601 .ks(3) 11602 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11603 } 11604 } 11605 } 11606 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)11607 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) { 11608 TEST_REQUIRES_ARM_NEON; 11609 for (uint32_t n = 16; n <= 24; n += 8) { 11610 for (size_t k = 1; k <= 80; k += 17) { 11611 GemmMicrokernelTester() 11612 .mr(3) 11613 .nr(8) 11614 .kr(2) 11615 .sr(1) 11616 .m(3) 11617 .n(n) 11618 .k(k) 11619 .ks(3) 11620 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11621 } 11622 } 11623 } 11624 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cm_subtile)11625 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cm_subtile) { 11626 TEST_REQUIRES_ARM_NEON; 11627 for (size_t k = 1; k <= 80; k += 17) { 11628 for (uint32_t n = 1; n <= 8; n++) { 11629 for (uint32_t m = 1; m <= 3; m++) { 11630 GemmMicrokernelTester() 11631 .mr(3) 11632 .nr(8) 11633 .kr(2) 11634 .sr(1) 11635 .m(m) 11636 .n(n) 11637 .k(k) 11638 .cm_stride(11) 11639 .iterations(1) 11640 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11641 } 11642 } 11643 } 11644 } 11645 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,a_offset)11646 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, a_offset) { 11647 TEST_REQUIRES_ARM_NEON; 11648 for (size_t k = 1; k <= 80; k += 17) { 11649 GemmMicrokernelTester() 11650 .mr(3) 11651 .nr(8) 11652 .kr(2) 11653 .sr(1) 11654 .m(3) 11655 .n(8) 11656 .k(k) 11657 .ks(3) 11658 .a_offset(251) 11659 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11660 } 11661 } 11662 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,zero)11663 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, zero) { 11664 TEST_REQUIRES_ARM_NEON; 11665 for (size_t k = 1; k <= 80; k += 17) { 11666 for (uint32_t mz = 0; mz < 3; mz++) { 11667 GemmMicrokernelTester() 11668 .mr(3) 11669 .nr(8) 11670 .kr(2) 11671 .sr(1) 11672 .m(3) 11673 .n(8) 11674 .k(k) 11675 .ks(3) 11676 .a_offset(251) 11677 .zero_index(mz) 11678 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11679 } 11680 } 11681 } 11682 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,qmin)11683 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, qmin) { 11684 TEST_REQUIRES_ARM_NEON; 11685 GemmMicrokernelTester() 11686 .mr(3) 11687 .nr(8) 11688 .kr(2) 11689 .sr(1) 11690 .m(3) 11691 .n(8) 11692 .k(16) 11693 .qmin(128) 11694 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11695 } 11696 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,qmax)11697 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, qmax) { 11698 TEST_REQUIRES_ARM_NEON; 11699 GemmMicrokernelTester() 11700 .mr(3) 11701 .nr(8) 11702 .kr(2) 11703 .sr(1) 11704 .m(3) 11705 .n(8) 11706 .k(16) 11707 .qmax(128) 11708 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11709 } 11710 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R,strided_cm)11711 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_LD1R, strided_cm) { 11712 TEST_REQUIRES_ARM_NEON; 11713 GemmMicrokernelTester() 11714 .mr(3) 11715 .nr(8) 11716 .kr(2) 11717 .sr(1) 11718 .m(3) 11719 .n(8) 11720 .k(16) 11721 .cm_stride(11) 11722 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11723 } 11724 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 11725 11726 11727 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_eq_8)11728 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8) { 11729 TEST_REQUIRES_ARM_NEON; 11730 GemmMicrokernelTester() 11731 .mr(3) 11732 .nr(8) 11733 .kr(2) 11734 .sr(1) 11735 .m(3) 11736 .n(8) 11737 .k(8) 11738 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11739 } 11740 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,strided_cn)11741 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cn) { 11742 TEST_REQUIRES_ARM_NEON; 11743 GemmMicrokernelTester() 11744 .mr(3) 11745 .nr(8) 11746 .kr(2) 11747 .sr(1) 11748 .m(3) 11749 .n(8) 11750 .k(8) 11751 .cn_stride(11) 11752 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11753 } 11754 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_eq_8_subtile)11755 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile) { 11756 TEST_REQUIRES_ARM_NEON; 11757 for (uint32_t n = 1; n <= 8; n++) { 11758 for (uint32_t m = 1; m <= 3; m++) { 11759 GemmMicrokernelTester() 11760 .mr(3) 11761 .nr(8) 11762 .kr(2) 11763 .sr(1) 11764 .m(m) 11765 .n(n) 11766 .k(8) 11767 .iterations(1) 11768 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11769 } 11770 } 11771 } 11772 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_eq_8_subtile_m)11773 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 11774 TEST_REQUIRES_ARM_NEON; 11775 for (uint32_t m = 1; m <= 3; m++) { 11776 GemmMicrokernelTester() 11777 .mr(3) 11778 .nr(8) 11779 .kr(2) 11780 .sr(1) 11781 .m(m) 11782 .n(8) 11783 .k(8) 11784 .iterations(1) 11785 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11786 } 11787 } 11788 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_eq_8_subtile_n)11789 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 11790 TEST_REQUIRES_ARM_NEON; 11791 for (uint32_t n = 1; n <= 8; n++) { 11792 GemmMicrokernelTester() 11793 .mr(3) 11794 .nr(8) 11795 .kr(2) 11796 .sr(1) 11797 .m(3) 11798 .n(n) 11799 .k(8) 11800 .iterations(1) 11801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11802 } 11803 } 11804 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_lt_8)11805 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_lt_8) { 11806 TEST_REQUIRES_ARM_NEON; 11807 for (size_t k = 1; k < 8; k++) { 11808 GemmMicrokernelTester() 11809 .mr(3) 11810 .nr(8) 11811 .kr(2) 11812 .sr(1) 11813 .m(3) 11814 .n(8) 11815 .k(k) 11816 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11817 } 11818 } 11819 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_lt_8_subtile)11820 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_lt_8_subtile) { 11821 TEST_REQUIRES_ARM_NEON; 11822 for (size_t k = 1; k < 8; k++) { 11823 for (uint32_t n = 1; n <= 8; n++) { 11824 for (uint32_t m = 1; m <= 3; m++) { 11825 GemmMicrokernelTester() 11826 .mr(3) 11827 .nr(8) 11828 .kr(2) 11829 .sr(1) 11830 .m(m) 11831 .n(n) 11832 .k(k) 11833 .iterations(1) 11834 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11835 } 11836 } 11837 } 11838 } 11839 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_gt_8)11840 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_gt_8) { 11841 TEST_REQUIRES_ARM_NEON; 11842 for (size_t k = 9; k < 16; k++) { 11843 GemmMicrokernelTester() 11844 .mr(3) 11845 .nr(8) 11846 .kr(2) 11847 .sr(1) 11848 .m(3) 11849 .n(8) 11850 .k(k) 11851 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11852 } 11853 } 11854 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_gt_8_subtile)11855 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_gt_8_subtile) { 11856 TEST_REQUIRES_ARM_NEON; 11857 for (size_t k = 9; k < 16; k++) { 11858 for (uint32_t n = 1; n <= 8; n++) { 11859 for (uint32_t m = 1; m <= 3; m++) { 11860 GemmMicrokernelTester() 11861 .mr(3) 11862 .nr(8) 11863 .kr(2) 11864 .sr(1) 11865 .m(m) 11866 .n(n) 11867 .k(k) 11868 .iterations(1) 11869 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11870 } 11871 } 11872 } 11873 } 11874 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_div_8)11875 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_div_8) { 11876 TEST_REQUIRES_ARM_NEON; 11877 for (size_t k = 16; k <= 80; k += 8) { 11878 GemmMicrokernelTester() 11879 .mr(3) 11880 .nr(8) 11881 .kr(2) 11882 .sr(1) 11883 .m(3) 11884 .n(8) 11885 .k(k) 11886 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11887 } 11888 } 11889 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,k_div_8_subtile)11890 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, k_div_8_subtile) { 11891 TEST_REQUIRES_ARM_NEON; 11892 for (size_t k = 16; k <= 80; k += 8) { 11893 for (uint32_t n = 1; n <= 8; n++) { 11894 for (uint32_t m = 1; m <= 3; m++) { 11895 GemmMicrokernelTester() 11896 .mr(3) 11897 .nr(8) 11898 .kr(2) 11899 .sr(1) 11900 .m(m) 11901 .n(n) 11902 .k(k) 11903 .iterations(1) 11904 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11905 } 11906 } 11907 } 11908 } 11909 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,n_gt_8)11910 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8) { 11911 TEST_REQUIRES_ARM_NEON; 11912 for (uint32_t n = 9; n < 16; n++) { 11913 for (size_t k = 1; k <= 40; k += 9) { 11914 GemmMicrokernelTester() 11915 .mr(3) 11916 .nr(8) 11917 .kr(2) 11918 .sr(1) 11919 .m(3) 11920 .n(n) 11921 .k(k) 11922 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11923 } 11924 } 11925 } 11926 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,n_gt_8_strided_cn)11927 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) { 11928 TEST_REQUIRES_ARM_NEON; 11929 for (uint32_t n = 9; n < 16; n++) { 11930 for (size_t k = 1; k <= 40; k += 9) { 11931 GemmMicrokernelTester() 11932 .mr(3) 11933 .nr(8) 11934 .kr(2) 11935 .sr(1) 11936 .m(3) 11937 .n(n) 11938 .k(k) 11939 .cn_stride(11) 11940 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11941 } 11942 } 11943 } 11944 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,n_gt_8_subtile)11945 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_subtile) { 11946 TEST_REQUIRES_ARM_NEON; 11947 for (uint32_t n = 9; n < 16; n++) { 11948 for (size_t k = 1; k <= 40; k += 9) { 11949 for (uint32_t m = 1; m <= 3; m++) { 11950 GemmMicrokernelTester() 11951 .mr(3) 11952 .nr(8) 11953 .kr(2) 11954 .sr(1) 11955 .m(m) 11956 .n(n) 11957 .k(k) 11958 .iterations(1) 11959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11960 } 11961 } 11962 } 11963 } 11964 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,n_div_8)11965 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8) { 11966 TEST_REQUIRES_ARM_NEON; 11967 for (uint32_t n = 16; n <= 24; n += 8) { 11968 for (size_t k = 1; k <= 40; k += 9) { 11969 GemmMicrokernelTester() 11970 .mr(3) 11971 .nr(8) 11972 .kr(2) 11973 .sr(1) 11974 .m(3) 11975 .n(n) 11976 .k(k) 11977 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11978 } 11979 } 11980 } 11981 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,n_div_8_strided_cn)11982 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) { 11983 TEST_REQUIRES_ARM_NEON; 11984 for (uint32_t n = 16; n <= 24; n += 8) { 11985 for (size_t k = 1; k <= 40; k += 9) { 11986 GemmMicrokernelTester() 11987 .mr(3) 11988 .nr(8) 11989 .kr(2) 11990 .sr(1) 11991 .m(3) 11992 .n(n) 11993 .k(k) 11994 .cn_stride(11) 11995 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 11996 } 11997 } 11998 } 11999 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,n_div_8_subtile)12000 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_subtile) { 12001 TEST_REQUIRES_ARM_NEON; 12002 for (uint32_t n = 16; n <= 24; n += 8) { 12003 for (size_t k = 1; k <= 40; k += 9) { 12004 for (uint32_t m = 1; m <= 3; m++) { 12005 GemmMicrokernelTester() 12006 .mr(3) 12007 .nr(8) 12008 .kr(2) 12009 .sr(1) 12010 .m(m) 12011 .n(n) 12012 .k(k) 12013 .iterations(1) 12014 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12015 } 12016 } 12017 } 12018 } 12019 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,small_kernel)12020 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, small_kernel) { 12021 TEST_REQUIRES_ARM_NEON; 12022 for (size_t k = 1; k <= 40; k += 9) { 12023 GemmMicrokernelTester() 12024 .mr(3) 12025 .nr(8) 12026 .kr(2) 12027 .sr(1) 12028 .m(3) 12029 .n(8) 12030 .k(k) 12031 .ks(3) 12032 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12033 } 12034 } 12035 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,small_kernel_subtile)12036 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, small_kernel_subtile) { 12037 TEST_REQUIRES_ARM_NEON; 12038 for (size_t k = 1; k <= 40; k += 9) { 12039 for (uint32_t n = 1; n <= 8; n++) { 12040 for (uint32_t m = 1; m <= 3; m++) { 12041 GemmMicrokernelTester() 12042 .mr(3) 12043 .nr(8) 12044 .kr(2) 12045 .sr(1) 12046 .m(m) 12047 .n(n) 12048 .k(k) 12049 .ks(3) 12050 .iterations(1) 12051 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12052 } 12053 } 12054 } 12055 } 12056 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,n_gt_8_small_kernel)12057 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) { 12058 TEST_REQUIRES_ARM_NEON; 12059 for (uint32_t n = 9; n < 16; n++) { 12060 for (size_t k = 1; k <= 40; k += 9) { 12061 GemmMicrokernelTester() 12062 .mr(3) 12063 .nr(8) 12064 .kr(2) 12065 .sr(1) 12066 .m(3) 12067 .n(n) 12068 .k(k) 12069 .ks(3) 12070 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12071 } 12072 } 12073 } 12074 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,n_div_8_small_kernel)12075 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) { 12076 TEST_REQUIRES_ARM_NEON; 12077 for (uint32_t n = 16; n <= 24; n += 8) { 12078 for (size_t k = 1; k <= 40; k += 9) { 12079 GemmMicrokernelTester() 12080 .mr(3) 12081 .nr(8) 12082 .kr(2) 12083 .sr(1) 12084 .m(3) 12085 .n(n) 12086 .k(k) 12087 .ks(3) 12088 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12089 } 12090 } 12091 } 12092 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,strided_cm_subtile)12093 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cm_subtile) { 12094 TEST_REQUIRES_ARM_NEON; 12095 for (size_t k = 1; k <= 40; k += 9) { 12096 for (uint32_t n = 1; n <= 8; n++) { 12097 for (uint32_t m = 1; m <= 3; m++) { 12098 GemmMicrokernelTester() 12099 .mr(3) 12100 .nr(8) 12101 .kr(2) 12102 .sr(1) 12103 .m(m) 12104 .n(n) 12105 .k(k) 12106 .cm_stride(11) 12107 .iterations(1) 12108 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12109 } 12110 } 12111 } 12112 } 12113 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,a_offset)12114 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, a_offset) { 12115 TEST_REQUIRES_ARM_NEON; 12116 for (size_t k = 1; k <= 40; k += 9) { 12117 GemmMicrokernelTester() 12118 .mr(3) 12119 .nr(8) 12120 .kr(2) 12121 .sr(1) 12122 .m(3) 12123 .n(8) 12124 .k(k) 12125 .ks(3) 12126 .a_offset(127) 12127 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12128 } 12129 } 12130 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,zero)12131 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, zero) { 12132 TEST_REQUIRES_ARM_NEON; 12133 for (size_t k = 1; k <= 40; k += 9) { 12134 for (uint32_t mz = 0; mz < 3; mz++) { 12135 GemmMicrokernelTester() 12136 .mr(3) 12137 .nr(8) 12138 .kr(2) 12139 .sr(1) 12140 .m(3) 12141 .n(8) 12142 .k(k) 12143 .ks(3) 12144 .a_offset(127) 12145 .zero_index(mz) 12146 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12147 } 12148 } 12149 } 12150 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,qmin)12151 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, qmin) { 12152 TEST_REQUIRES_ARM_NEON; 12153 GemmMicrokernelTester() 12154 .mr(3) 12155 .nr(8) 12156 .kr(2) 12157 .sr(1) 12158 .m(3) 12159 .n(8) 12160 .k(8) 12161 .qmin(128) 12162 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12163 } 12164 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,qmax)12165 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, qmax) { 12166 TEST_REQUIRES_ARM_NEON; 12167 GemmMicrokernelTester() 12168 .mr(3) 12169 .nr(8) 12170 .kr(2) 12171 .sr(1) 12172 .m(3) 12173 .n(8) 12174 .k(8) 12175 .qmax(128) 12176 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12177 } 12178 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R,strided_cm)12179 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_LD1R, strided_cm) { 12180 TEST_REQUIRES_ARM_NEON; 12181 GemmMicrokernelTester() 12182 .mr(3) 12183 .nr(8) 12184 .kr(2) 12185 .sr(1) 12186 .m(3) 12187 .n(8) 12188 .k(8) 12189 .cm_stride(11) 12190 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12191 } 12192 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 12193 12194 12195 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_eq_8)12196 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_eq_8) { 12197 TEST_REQUIRES_ARM_NEON; 12198 GemmMicrokernelTester() 12199 .mr(3) 12200 .nr(8) 12201 .kr(2) 12202 .sr(4) 12203 .m(3) 12204 .n(8) 12205 .k(8) 12206 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12207 } 12208 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,strided_cn)12209 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, strided_cn) { 12210 TEST_REQUIRES_ARM_NEON; 12211 GemmMicrokernelTester() 12212 .mr(3) 12213 .nr(8) 12214 .kr(2) 12215 .sr(4) 12216 .m(3) 12217 .n(8) 12218 .k(8) 12219 .cn_stride(11) 12220 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12221 } 12222 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_eq_8_subtile)12223 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_eq_8_subtile) { 12224 TEST_REQUIRES_ARM_NEON; 12225 for (uint32_t n = 1; n <= 8; n++) { 12226 for (uint32_t m = 1; m <= 3; m++) { 12227 GemmMicrokernelTester() 12228 .mr(3) 12229 .nr(8) 12230 .kr(2) 12231 .sr(4) 12232 .m(m) 12233 .n(n) 12234 .k(8) 12235 .iterations(1) 12236 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12237 } 12238 } 12239 } 12240 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_eq_8_subtile_m)12241 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_eq_8_subtile_m) { 12242 TEST_REQUIRES_ARM_NEON; 12243 for (uint32_t m = 1; m <= 3; m++) { 12244 GemmMicrokernelTester() 12245 .mr(3) 12246 .nr(8) 12247 .kr(2) 12248 .sr(4) 12249 .m(m) 12250 .n(8) 12251 .k(8) 12252 .iterations(1) 12253 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12254 } 12255 } 12256 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_eq_8_subtile_n)12257 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_eq_8_subtile_n) { 12258 TEST_REQUIRES_ARM_NEON; 12259 for (uint32_t n = 1; n <= 8; n++) { 12260 GemmMicrokernelTester() 12261 .mr(3) 12262 .nr(8) 12263 .kr(2) 12264 .sr(4) 12265 .m(3) 12266 .n(n) 12267 .k(8) 12268 .iterations(1) 12269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12270 } 12271 } 12272 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_lt_8)12273 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_lt_8) { 12274 TEST_REQUIRES_ARM_NEON; 12275 for (size_t k = 1; k < 8; k++) { 12276 GemmMicrokernelTester() 12277 .mr(3) 12278 .nr(8) 12279 .kr(2) 12280 .sr(4) 12281 .m(3) 12282 .n(8) 12283 .k(k) 12284 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12285 } 12286 } 12287 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_lt_8_subtile)12288 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_lt_8_subtile) { 12289 TEST_REQUIRES_ARM_NEON; 12290 for (size_t k = 1; k < 8; k++) { 12291 for (uint32_t n = 1; n <= 8; n++) { 12292 for (uint32_t m = 1; m <= 3; m++) { 12293 GemmMicrokernelTester() 12294 .mr(3) 12295 .nr(8) 12296 .kr(2) 12297 .sr(4) 12298 .m(m) 12299 .n(n) 12300 .k(k) 12301 .iterations(1) 12302 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12303 } 12304 } 12305 } 12306 } 12307 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_gt_8)12308 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_gt_8) { 12309 TEST_REQUIRES_ARM_NEON; 12310 for (size_t k = 9; k < 16; k++) { 12311 GemmMicrokernelTester() 12312 .mr(3) 12313 .nr(8) 12314 .kr(2) 12315 .sr(4) 12316 .m(3) 12317 .n(8) 12318 .k(k) 12319 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12320 } 12321 } 12322 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_gt_8_subtile)12323 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_gt_8_subtile) { 12324 TEST_REQUIRES_ARM_NEON; 12325 for (size_t k = 9; k < 16; k++) { 12326 for (uint32_t n = 1; n <= 8; n++) { 12327 for (uint32_t m = 1; m <= 3; m++) { 12328 GemmMicrokernelTester() 12329 .mr(3) 12330 .nr(8) 12331 .kr(2) 12332 .sr(4) 12333 .m(m) 12334 .n(n) 12335 .k(k) 12336 .iterations(1) 12337 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12338 } 12339 } 12340 } 12341 } 12342 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_div_8)12343 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_div_8) { 12344 TEST_REQUIRES_ARM_NEON; 12345 for (size_t k = 16; k <= 80; k += 8) { 12346 GemmMicrokernelTester() 12347 .mr(3) 12348 .nr(8) 12349 .kr(2) 12350 .sr(4) 12351 .m(3) 12352 .n(8) 12353 .k(k) 12354 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12355 } 12356 } 12357 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,k_div_8_subtile)12358 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, k_div_8_subtile) { 12359 TEST_REQUIRES_ARM_NEON; 12360 for (size_t k = 16; k <= 80; k += 8) { 12361 for (uint32_t n = 1; n <= 8; n++) { 12362 for (uint32_t m = 1; m <= 3; m++) { 12363 GemmMicrokernelTester() 12364 .mr(3) 12365 .nr(8) 12366 .kr(2) 12367 .sr(4) 12368 .m(m) 12369 .n(n) 12370 .k(k) 12371 .iterations(1) 12372 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12373 } 12374 } 12375 } 12376 } 12377 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,n_gt_8)12378 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_gt_8) { 12379 TEST_REQUIRES_ARM_NEON; 12380 for (uint32_t n = 9; n < 16; n++) { 12381 for (size_t k = 1; k <= 40; k += 9) { 12382 GemmMicrokernelTester() 12383 .mr(3) 12384 .nr(8) 12385 .kr(2) 12386 .sr(4) 12387 .m(3) 12388 .n(n) 12389 .k(k) 12390 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12391 } 12392 } 12393 } 12394 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,n_gt_8_strided_cn)12395 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_gt_8_strided_cn) { 12396 TEST_REQUIRES_ARM_NEON; 12397 for (uint32_t n = 9; n < 16; n++) { 12398 for (size_t k = 1; k <= 40; k += 9) { 12399 GemmMicrokernelTester() 12400 .mr(3) 12401 .nr(8) 12402 .kr(2) 12403 .sr(4) 12404 .m(3) 12405 .n(n) 12406 .k(k) 12407 .cn_stride(11) 12408 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12409 } 12410 } 12411 } 12412 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,n_gt_8_subtile)12413 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_gt_8_subtile) { 12414 TEST_REQUIRES_ARM_NEON; 12415 for (uint32_t n = 9; n < 16; n++) { 12416 for (size_t k = 1; k <= 40; k += 9) { 12417 for (uint32_t m = 1; m <= 3; m++) { 12418 GemmMicrokernelTester() 12419 .mr(3) 12420 .nr(8) 12421 .kr(2) 12422 .sr(4) 12423 .m(m) 12424 .n(n) 12425 .k(k) 12426 .iterations(1) 12427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12428 } 12429 } 12430 } 12431 } 12432 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,n_div_8)12433 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_div_8) { 12434 TEST_REQUIRES_ARM_NEON; 12435 for (uint32_t n = 16; n <= 24; n += 8) { 12436 for (size_t k = 1; k <= 40; k += 9) { 12437 GemmMicrokernelTester() 12438 .mr(3) 12439 .nr(8) 12440 .kr(2) 12441 .sr(4) 12442 .m(3) 12443 .n(n) 12444 .k(k) 12445 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12446 } 12447 } 12448 } 12449 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,n_div_8_strided_cn)12450 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_div_8_strided_cn) { 12451 TEST_REQUIRES_ARM_NEON; 12452 for (uint32_t n = 16; n <= 24; n += 8) { 12453 for (size_t k = 1; k <= 40; k += 9) { 12454 GemmMicrokernelTester() 12455 .mr(3) 12456 .nr(8) 12457 .kr(2) 12458 .sr(4) 12459 .m(3) 12460 .n(n) 12461 .k(k) 12462 .cn_stride(11) 12463 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12464 } 12465 } 12466 } 12467 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,n_div_8_subtile)12468 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_div_8_subtile) { 12469 TEST_REQUIRES_ARM_NEON; 12470 for (uint32_t n = 16; n <= 24; n += 8) { 12471 for (size_t k = 1; k <= 40; k += 9) { 12472 for (uint32_t m = 1; m <= 3; m++) { 12473 GemmMicrokernelTester() 12474 .mr(3) 12475 .nr(8) 12476 .kr(2) 12477 .sr(4) 12478 .m(m) 12479 .n(n) 12480 .k(k) 12481 .iterations(1) 12482 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12483 } 12484 } 12485 } 12486 } 12487 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,small_kernel)12488 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, small_kernel) { 12489 TEST_REQUIRES_ARM_NEON; 12490 for (size_t k = 1; k <= 40; k += 9) { 12491 GemmMicrokernelTester() 12492 .mr(3) 12493 .nr(8) 12494 .kr(2) 12495 .sr(4) 12496 .m(3) 12497 .n(8) 12498 .k(k) 12499 .ks(3) 12500 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12501 } 12502 } 12503 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,small_kernel_subtile)12504 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, small_kernel_subtile) { 12505 TEST_REQUIRES_ARM_NEON; 12506 for (size_t k = 1; k <= 40; k += 9) { 12507 for (uint32_t n = 1; n <= 8; n++) { 12508 for (uint32_t m = 1; m <= 3; m++) { 12509 GemmMicrokernelTester() 12510 .mr(3) 12511 .nr(8) 12512 .kr(2) 12513 .sr(4) 12514 .m(m) 12515 .n(n) 12516 .k(k) 12517 .ks(3) 12518 .iterations(1) 12519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12520 } 12521 } 12522 } 12523 } 12524 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,n_gt_8_small_kernel)12525 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_gt_8_small_kernel) { 12526 TEST_REQUIRES_ARM_NEON; 12527 for (uint32_t n = 9; n < 16; n++) { 12528 for (size_t k = 1; k <= 40; k += 9) { 12529 GemmMicrokernelTester() 12530 .mr(3) 12531 .nr(8) 12532 .kr(2) 12533 .sr(4) 12534 .m(3) 12535 .n(n) 12536 .k(k) 12537 .ks(3) 12538 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12539 } 12540 } 12541 } 12542 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,n_div_8_small_kernel)12543 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, n_div_8_small_kernel) { 12544 TEST_REQUIRES_ARM_NEON; 12545 for (uint32_t n = 16; n <= 24; n += 8) { 12546 for (size_t k = 1; k <= 40; k += 9) { 12547 GemmMicrokernelTester() 12548 .mr(3) 12549 .nr(8) 12550 .kr(2) 12551 .sr(4) 12552 .m(3) 12553 .n(n) 12554 .k(k) 12555 .ks(3) 12556 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12557 } 12558 } 12559 } 12560 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,strided_cm_subtile)12561 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, strided_cm_subtile) { 12562 TEST_REQUIRES_ARM_NEON; 12563 for (size_t k = 1; k <= 40; k += 9) { 12564 for (uint32_t n = 1; n <= 8; n++) { 12565 for (uint32_t m = 1; m <= 3; m++) { 12566 GemmMicrokernelTester() 12567 .mr(3) 12568 .nr(8) 12569 .kr(2) 12570 .sr(4) 12571 .m(m) 12572 .n(n) 12573 .k(k) 12574 .cm_stride(11) 12575 .iterations(1) 12576 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12577 } 12578 } 12579 } 12580 } 12581 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,a_offset)12582 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, a_offset) { 12583 TEST_REQUIRES_ARM_NEON; 12584 for (size_t k = 1; k <= 40; k += 9) { 12585 GemmMicrokernelTester() 12586 .mr(3) 12587 .nr(8) 12588 .kr(2) 12589 .sr(4) 12590 .m(3) 12591 .n(8) 12592 .k(k) 12593 .ks(3) 12594 .a_offset(127) 12595 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12596 } 12597 } 12598 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,zero)12599 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, zero) { 12600 TEST_REQUIRES_ARM_NEON; 12601 for (size_t k = 1; k <= 40; k += 9) { 12602 for (uint32_t mz = 0; mz < 3; mz++) { 12603 GemmMicrokernelTester() 12604 .mr(3) 12605 .nr(8) 12606 .kr(2) 12607 .sr(4) 12608 .m(3) 12609 .n(8) 12610 .k(k) 12611 .ks(3) 12612 .a_offset(127) 12613 .zero_index(mz) 12614 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12615 } 12616 } 12617 } 12618 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,qmin)12619 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, qmin) { 12620 TEST_REQUIRES_ARM_NEON; 12621 GemmMicrokernelTester() 12622 .mr(3) 12623 .nr(8) 12624 .kr(2) 12625 .sr(4) 12626 .m(3) 12627 .n(8) 12628 .k(8) 12629 .qmin(128) 12630 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12631 } 12632 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,qmax)12633 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, qmax) { 12634 TEST_REQUIRES_ARM_NEON; 12635 GemmMicrokernelTester() 12636 .mr(3) 12637 .nr(8) 12638 .kr(2) 12639 .sr(4) 12640 .m(3) 12641 .n(8) 12642 .k(8) 12643 .qmax(128) 12644 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12645 } 12646 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL,strided_cm)12647 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MULL, strided_cm) { 12648 TEST_REQUIRES_ARM_NEON; 12649 GemmMicrokernelTester() 12650 .mr(3) 12651 .nr(8) 12652 .kr(2) 12653 .sr(4) 12654 .m(3) 12655 .n(8) 12656 .k(8) 12657 .cm_stride(11) 12658 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12659 } 12660 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 12661 12662 12663 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_eq_16)12664 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_eq_16) { 12665 TEST_REQUIRES_ARM_NEON; 12666 GemmMicrokernelTester() 12667 .mr(3) 12668 .nr(8) 12669 .kr(4) 12670 .sr(1) 12671 .m(3) 12672 .n(8) 12673 .k(16) 12674 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12675 } 12676 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,strided_cn)12677 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, strided_cn) { 12678 TEST_REQUIRES_ARM_NEON; 12679 GemmMicrokernelTester() 12680 .mr(3) 12681 .nr(8) 12682 .kr(4) 12683 .sr(1) 12684 .m(3) 12685 .n(8) 12686 .k(16) 12687 .cn_stride(11) 12688 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12689 } 12690 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)12691 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 12692 TEST_REQUIRES_ARM_NEON; 12693 for (uint32_t n = 1; n <= 8; n++) { 12694 for (uint32_t m = 1; m <= 3; m++) { 12695 GemmMicrokernelTester() 12696 .mr(3) 12697 .nr(8) 12698 .kr(4) 12699 .sr(1) 12700 .m(m) 12701 .n(n) 12702 .k(16) 12703 .iterations(1) 12704 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12705 } 12706 } 12707 } 12708 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)12709 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 12710 TEST_REQUIRES_ARM_NEON; 12711 for (uint32_t m = 1; m <= 3; m++) { 12712 GemmMicrokernelTester() 12713 .mr(3) 12714 .nr(8) 12715 .kr(4) 12716 .sr(1) 12717 .m(m) 12718 .n(8) 12719 .k(16) 12720 .iterations(1) 12721 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12722 } 12723 } 12724 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)12725 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 12726 TEST_REQUIRES_ARM_NEON; 12727 for (uint32_t n = 1; n <= 8; n++) { 12728 GemmMicrokernelTester() 12729 .mr(3) 12730 .nr(8) 12731 .kr(4) 12732 .sr(1) 12733 .m(3) 12734 .n(n) 12735 .k(16) 12736 .iterations(1) 12737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12738 } 12739 } 12740 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_lt_16)12741 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_lt_16) { 12742 TEST_REQUIRES_ARM_NEON; 12743 for (size_t k = 1; k < 16; k++) { 12744 GemmMicrokernelTester() 12745 .mr(3) 12746 .nr(8) 12747 .kr(4) 12748 .sr(1) 12749 .m(3) 12750 .n(8) 12751 .k(k) 12752 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12753 } 12754 } 12755 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)12756 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 12757 TEST_REQUIRES_ARM_NEON; 12758 for (size_t k = 1; k < 16; k++) { 12759 for (uint32_t n = 1; n <= 8; n++) { 12760 for (uint32_t m = 1; m <= 3; m++) { 12761 GemmMicrokernelTester() 12762 .mr(3) 12763 .nr(8) 12764 .kr(4) 12765 .sr(1) 12766 .m(m) 12767 .n(n) 12768 .k(k) 12769 .iterations(1) 12770 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12771 } 12772 } 12773 } 12774 } 12775 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_gt_16)12776 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_gt_16) { 12777 TEST_REQUIRES_ARM_NEON; 12778 for (size_t k = 17; k < 32; k++) { 12779 GemmMicrokernelTester() 12780 .mr(3) 12781 .nr(8) 12782 .kr(4) 12783 .sr(1) 12784 .m(3) 12785 .n(8) 12786 .k(k) 12787 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12788 } 12789 } 12790 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)12791 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 12792 TEST_REQUIRES_ARM_NEON; 12793 for (size_t k = 17; k < 32; k++) { 12794 for (uint32_t n = 1; n <= 8; n++) { 12795 for (uint32_t m = 1; m <= 3; m++) { 12796 GemmMicrokernelTester() 12797 .mr(3) 12798 .nr(8) 12799 .kr(4) 12800 .sr(1) 12801 .m(m) 12802 .n(n) 12803 .k(k) 12804 .iterations(1) 12805 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12806 } 12807 } 12808 } 12809 } 12810 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_div_16)12811 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_div_16) { 12812 TEST_REQUIRES_ARM_NEON; 12813 for (size_t k = 32; k <= 160; k += 16) { 12814 GemmMicrokernelTester() 12815 .mr(3) 12816 .nr(8) 12817 .kr(4) 12818 .sr(1) 12819 .m(3) 12820 .n(8) 12821 .k(k) 12822 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12823 } 12824 } 12825 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,k_div_16_subtile)12826 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, k_div_16_subtile) { 12827 TEST_REQUIRES_ARM_NEON; 12828 for (size_t k = 32; k <= 160; k += 16) { 12829 for (uint32_t n = 1; n <= 8; n++) { 12830 for (uint32_t m = 1; m <= 3; m++) { 12831 GemmMicrokernelTester() 12832 .mr(3) 12833 .nr(8) 12834 .kr(4) 12835 .sr(1) 12836 .m(m) 12837 .n(n) 12838 .k(k) 12839 .iterations(1) 12840 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12841 } 12842 } 12843 } 12844 } 12845 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,n_gt_8)12846 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_gt_8) { 12847 TEST_REQUIRES_ARM_NEON; 12848 for (uint32_t n = 9; n < 16; n++) { 12849 for (size_t k = 1; k <= 80; k += 17) { 12850 GemmMicrokernelTester() 12851 .mr(3) 12852 .nr(8) 12853 .kr(4) 12854 .sr(1) 12855 .m(3) 12856 .n(n) 12857 .k(k) 12858 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12859 } 12860 } 12861 } 12862 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)12863 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 12864 TEST_REQUIRES_ARM_NEON; 12865 for (uint32_t n = 9; n < 16; n++) { 12866 for (size_t k = 1; k <= 80; k += 17) { 12867 GemmMicrokernelTester() 12868 .mr(3) 12869 .nr(8) 12870 .kr(4) 12871 .sr(1) 12872 .m(3) 12873 .n(n) 12874 .k(k) 12875 .cn_stride(11) 12876 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12877 } 12878 } 12879 } 12880 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)12881 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) { 12882 TEST_REQUIRES_ARM_NEON; 12883 for (uint32_t n = 9; n < 16; n++) { 12884 for (size_t k = 1; k <= 80; k += 17) { 12885 for (uint32_t m = 1; m <= 3; m++) { 12886 GemmMicrokernelTester() 12887 .mr(3) 12888 .nr(8) 12889 .kr(4) 12890 .sr(1) 12891 .m(m) 12892 .n(n) 12893 .k(k) 12894 .iterations(1) 12895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12896 } 12897 } 12898 } 12899 } 12900 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,n_div_8)12901 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_div_8) { 12902 TEST_REQUIRES_ARM_NEON; 12903 for (uint32_t n = 16; n <= 24; n += 8) { 12904 for (size_t k = 1; k <= 80; k += 17) { 12905 GemmMicrokernelTester() 12906 .mr(3) 12907 .nr(8) 12908 .kr(4) 12909 .sr(1) 12910 .m(3) 12911 .n(n) 12912 .k(k) 12913 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12914 } 12915 } 12916 } 12917 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)12918 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) { 12919 TEST_REQUIRES_ARM_NEON; 12920 for (uint32_t n = 16; n <= 24; n += 8) { 12921 for (size_t k = 1; k <= 80; k += 17) { 12922 GemmMicrokernelTester() 12923 .mr(3) 12924 .nr(8) 12925 .kr(4) 12926 .sr(1) 12927 .m(3) 12928 .n(n) 12929 .k(k) 12930 .cn_stride(11) 12931 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12932 } 12933 } 12934 } 12935 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,n_div_8_subtile)12936 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_div_8_subtile) { 12937 TEST_REQUIRES_ARM_NEON; 12938 for (uint32_t n = 16; n <= 24; n += 8) { 12939 for (size_t k = 1; k <= 80; k += 17) { 12940 for (uint32_t m = 1; m <= 3; m++) { 12941 GemmMicrokernelTester() 12942 .mr(3) 12943 .nr(8) 12944 .kr(4) 12945 .sr(1) 12946 .m(m) 12947 .n(n) 12948 .k(k) 12949 .iterations(1) 12950 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12951 } 12952 } 12953 } 12954 } 12955 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,small_kernel)12956 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, small_kernel) { 12957 TEST_REQUIRES_ARM_NEON; 12958 for (size_t k = 1; k <= 80; k += 17) { 12959 GemmMicrokernelTester() 12960 .mr(3) 12961 .nr(8) 12962 .kr(4) 12963 .sr(1) 12964 .m(3) 12965 .n(8) 12966 .k(k) 12967 .ks(3) 12968 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12969 } 12970 } 12971 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,small_kernel_subtile)12972 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, small_kernel_subtile) { 12973 TEST_REQUIRES_ARM_NEON; 12974 for (size_t k = 1; k <= 80; k += 17) { 12975 for (uint32_t n = 1; n <= 8; n++) { 12976 for (uint32_t m = 1; m <= 3; m++) { 12977 GemmMicrokernelTester() 12978 .mr(3) 12979 .nr(8) 12980 .kr(4) 12981 .sr(1) 12982 .m(m) 12983 .n(n) 12984 .k(k) 12985 .ks(3) 12986 .iterations(1) 12987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 12988 } 12989 } 12990 } 12991 } 12992 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)12993 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) { 12994 TEST_REQUIRES_ARM_NEON; 12995 for (uint32_t n = 9; n < 16; n++) { 12996 for (size_t k = 1; k <= 80; k += 17) { 12997 GemmMicrokernelTester() 12998 .mr(3) 12999 .nr(8) 13000 .kr(4) 13001 .sr(1) 13002 .m(3) 13003 .n(n) 13004 .k(k) 13005 .ks(3) 13006 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13007 } 13008 } 13009 } 13010 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)13011 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) { 13012 TEST_REQUIRES_ARM_NEON; 13013 for (uint32_t n = 16; n <= 24; n += 8) { 13014 for (size_t k = 1; k <= 80; k += 17) { 13015 GemmMicrokernelTester() 13016 .mr(3) 13017 .nr(8) 13018 .kr(4) 13019 .sr(1) 13020 .m(3) 13021 .n(n) 13022 .k(k) 13023 .ks(3) 13024 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13025 } 13026 } 13027 } 13028 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,strided_cm_subtile)13029 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, strided_cm_subtile) { 13030 TEST_REQUIRES_ARM_NEON; 13031 for (size_t k = 1; k <= 80; k += 17) { 13032 for (uint32_t n = 1; n <= 8; n++) { 13033 for (uint32_t m = 1; m <= 3; m++) { 13034 GemmMicrokernelTester() 13035 .mr(3) 13036 .nr(8) 13037 .kr(4) 13038 .sr(1) 13039 .m(m) 13040 .n(n) 13041 .k(k) 13042 .cm_stride(11) 13043 .iterations(1) 13044 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13045 } 13046 } 13047 } 13048 } 13049 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,a_offset)13050 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, a_offset) { 13051 TEST_REQUIRES_ARM_NEON; 13052 for (size_t k = 1; k <= 80; k += 17) { 13053 GemmMicrokernelTester() 13054 .mr(3) 13055 .nr(8) 13056 .kr(4) 13057 .sr(1) 13058 .m(3) 13059 .n(8) 13060 .k(k) 13061 .ks(3) 13062 .a_offset(251) 13063 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13064 } 13065 } 13066 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,zero)13067 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, zero) { 13068 TEST_REQUIRES_ARM_NEON; 13069 for (size_t k = 1; k <= 80; k += 17) { 13070 for (uint32_t mz = 0; mz < 3; mz++) { 13071 GemmMicrokernelTester() 13072 .mr(3) 13073 .nr(8) 13074 .kr(4) 13075 .sr(1) 13076 .m(3) 13077 .n(8) 13078 .k(k) 13079 .ks(3) 13080 .a_offset(251) 13081 .zero_index(mz) 13082 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13083 } 13084 } 13085 } 13086 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,qmin)13087 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, qmin) { 13088 TEST_REQUIRES_ARM_NEON; 13089 GemmMicrokernelTester() 13090 .mr(3) 13091 .nr(8) 13092 .kr(4) 13093 .sr(1) 13094 .m(3) 13095 .n(8) 13096 .k(16) 13097 .qmin(128) 13098 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13099 } 13100 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,qmax)13101 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, qmax) { 13102 TEST_REQUIRES_ARM_NEON; 13103 GemmMicrokernelTester() 13104 .mr(3) 13105 .nr(8) 13106 .kr(4) 13107 .sr(1) 13108 .m(3) 13109 .n(8) 13110 .k(16) 13111 .qmax(128) 13112 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13113 } 13114 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R,strided_cm)13115 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_LD1R, strided_cm) { 13116 TEST_REQUIRES_ARM_NEON; 13117 GemmMicrokernelTester() 13118 .mr(3) 13119 .nr(8) 13120 .kr(4) 13121 .sr(1) 13122 .m(3) 13123 .n(8) 13124 .k(16) 13125 .cm_stride(11) 13126 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13127 } 13128 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 13129 13130 13131 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_eq_8)13132 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_eq_8) { 13133 TEST_REQUIRES_ARM_NEON; 13134 GemmMicrokernelTester() 13135 .mr(3) 13136 .nr(8) 13137 .kr(4) 13138 .sr(1) 13139 .m(3) 13140 .n(8) 13141 .k(8) 13142 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13143 } 13144 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,strided_cn)13145 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, strided_cn) { 13146 TEST_REQUIRES_ARM_NEON; 13147 GemmMicrokernelTester() 13148 .mr(3) 13149 .nr(8) 13150 .kr(4) 13151 .sr(1) 13152 .m(3) 13153 .n(8) 13154 .k(8) 13155 .cn_stride(11) 13156 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13157 } 13158 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_eq_8_subtile)13159 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_eq_8_subtile) { 13160 TEST_REQUIRES_ARM_NEON; 13161 for (uint32_t n = 1; n <= 8; n++) { 13162 for (uint32_t m = 1; m <= 3; m++) { 13163 GemmMicrokernelTester() 13164 .mr(3) 13165 .nr(8) 13166 .kr(4) 13167 .sr(1) 13168 .m(m) 13169 .n(n) 13170 .k(8) 13171 .iterations(1) 13172 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13173 } 13174 } 13175 } 13176 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_eq_8_subtile_m)13177 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) { 13178 TEST_REQUIRES_ARM_NEON; 13179 for (uint32_t m = 1; m <= 3; m++) { 13180 GemmMicrokernelTester() 13181 .mr(3) 13182 .nr(8) 13183 .kr(4) 13184 .sr(1) 13185 .m(m) 13186 .n(8) 13187 .k(8) 13188 .iterations(1) 13189 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13190 } 13191 } 13192 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_eq_8_subtile_n)13193 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) { 13194 TEST_REQUIRES_ARM_NEON; 13195 for (uint32_t n = 1; n <= 8; n++) { 13196 GemmMicrokernelTester() 13197 .mr(3) 13198 .nr(8) 13199 .kr(4) 13200 .sr(1) 13201 .m(3) 13202 .n(n) 13203 .k(8) 13204 .iterations(1) 13205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13206 } 13207 } 13208 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_lt_8)13209 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_lt_8) { 13210 TEST_REQUIRES_ARM_NEON; 13211 for (size_t k = 1; k < 8; k++) { 13212 GemmMicrokernelTester() 13213 .mr(3) 13214 .nr(8) 13215 .kr(4) 13216 .sr(1) 13217 .m(3) 13218 .n(8) 13219 .k(k) 13220 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13221 } 13222 } 13223 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_lt_8_subtile)13224 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_lt_8_subtile) { 13225 TEST_REQUIRES_ARM_NEON; 13226 for (size_t k = 1; k < 8; k++) { 13227 for (uint32_t n = 1; n <= 8; n++) { 13228 for (uint32_t m = 1; m <= 3; m++) { 13229 GemmMicrokernelTester() 13230 .mr(3) 13231 .nr(8) 13232 .kr(4) 13233 .sr(1) 13234 .m(m) 13235 .n(n) 13236 .k(k) 13237 .iterations(1) 13238 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13239 } 13240 } 13241 } 13242 } 13243 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_gt_8)13244 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_gt_8) { 13245 TEST_REQUIRES_ARM_NEON; 13246 for (size_t k = 9; k < 16; k++) { 13247 GemmMicrokernelTester() 13248 .mr(3) 13249 .nr(8) 13250 .kr(4) 13251 .sr(1) 13252 .m(3) 13253 .n(8) 13254 .k(k) 13255 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13256 } 13257 } 13258 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_gt_8_subtile)13259 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_gt_8_subtile) { 13260 TEST_REQUIRES_ARM_NEON; 13261 for (size_t k = 9; k < 16; k++) { 13262 for (uint32_t n = 1; n <= 8; n++) { 13263 for (uint32_t m = 1; m <= 3; m++) { 13264 GemmMicrokernelTester() 13265 .mr(3) 13266 .nr(8) 13267 .kr(4) 13268 .sr(1) 13269 .m(m) 13270 .n(n) 13271 .k(k) 13272 .iterations(1) 13273 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13274 } 13275 } 13276 } 13277 } 13278 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_div_8)13279 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_div_8) { 13280 TEST_REQUIRES_ARM_NEON; 13281 for (size_t k = 16; k <= 80; k += 8) { 13282 GemmMicrokernelTester() 13283 .mr(3) 13284 .nr(8) 13285 .kr(4) 13286 .sr(1) 13287 .m(3) 13288 .n(8) 13289 .k(k) 13290 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13291 } 13292 } 13293 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,k_div_8_subtile)13294 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, k_div_8_subtile) { 13295 TEST_REQUIRES_ARM_NEON; 13296 for (size_t k = 16; k <= 80; k += 8) { 13297 for (uint32_t n = 1; n <= 8; n++) { 13298 for (uint32_t m = 1; m <= 3; m++) { 13299 GemmMicrokernelTester() 13300 .mr(3) 13301 .nr(8) 13302 .kr(4) 13303 .sr(1) 13304 .m(m) 13305 .n(n) 13306 .k(k) 13307 .iterations(1) 13308 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13309 } 13310 } 13311 } 13312 } 13313 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,n_gt_8)13314 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_gt_8) { 13315 TEST_REQUIRES_ARM_NEON; 13316 for (uint32_t n = 9; n < 16; n++) { 13317 for (size_t k = 1; k <= 40; k += 9) { 13318 GemmMicrokernelTester() 13319 .mr(3) 13320 .nr(8) 13321 .kr(4) 13322 .sr(1) 13323 .m(3) 13324 .n(n) 13325 .k(k) 13326 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13327 } 13328 } 13329 } 13330 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,n_gt_8_strided_cn)13331 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) { 13332 TEST_REQUIRES_ARM_NEON; 13333 for (uint32_t n = 9; n < 16; n++) { 13334 for (size_t k = 1; k <= 40; k += 9) { 13335 GemmMicrokernelTester() 13336 .mr(3) 13337 .nr(8) 13338 .kr(4) 13339 .sr(1) 13340 .m(3) 13341 .n(n) 13342 .k(k) 13343 .cn_stride(11) 13344 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13345 } 13346 } 13347 } 13348 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,n_gt_8_subtile)13349 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_gt_8_subtile) { 13350 TEST_REQUIRES_ARM_NEON; 13351 for (uint32_t n = 9; n < 16; n++) { 13352 for (size_t k = 1; k <= 40; k += 9) { 13353 for (uint32_t m = 1; m <= 3; m++) { 13354 GemmMicrokernelTester() 13355 .mr(3) 13356 .nr(8) 13357 .kr(4) 13358 .sr(1) 13359 .m(m) 13360 .n(n) 13361 .k(k) 13362 .iterations(1) 13363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13364 } 13365 } 13366 } 13367 } 13368 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,n_div_8)13369 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_div_8) { 13370 TEST_REQUIRES_ARM_NEON; 13371 for (uint32_t n = 16; n <= 24; n += 8) { 13372 for (size_t k = 1; k <= 40; k += 9) { 13373 GemmMicrokernelTester() 13374 .mr(3) 13375 .nr(8) 13376 .kr(4) 13377 .sr(1) 13378 .m(3) 13379 .n(n) 13380 .k(k) 13381 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13382 } 13383 } 13384 } 13385 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,n_div_8_strided_cn)13386 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) { 13387 TEST_REQUIRES_ARM_NEON; 13388 for (uint32_t n = 16; n <= 24; n += 8) { 13389 for (size_t k = 1; k <= 40; k += 9) { 13390 GemmMicrokernelTester() 13391 .mr(3) 13392 .nr(8) 13393 .kr(4) 13394 .sr(1) 13395 .m(3) 13396 .n(n) 13397 .k(k) 13398 .cn_stride(11) 13399 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13400 } 13401 } 13402 } 13403 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,n_div_8_subtile)13404 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_div_8_subtile) { 13405 TEST_REQUIRES_ARM_NEON; 13406 for (uint32_t n = 16; n <= 24; n += 8) { 13407 for (size_t k = 1; k <= 40; k += 9) { 13408 for (uint32_t m = 1; m <= 3; m++) { 13409 GemmMicrokernelTester() 13410 .mr(3) 13411 .nr(8) 13412 .kr(4) 13413 .sr(1) 13414 .m(m) 13415 .n(n) 13416 .k(k) 13417 .iterations(1) 13418 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13419 } 13420 } 13421 } 13422 } 13423 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,small_kernel)13424 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, small_kernel) { 13425 TEST_REQUIRES_ARM_NEON; 13426 for (size_t k = 1; k <= 40; k += 9) { 13427 GemmMicrokernelTester() 13428 .mr(3) 13429 .nr(8) 13430 .kr(4) 13431 .sr(1) 13432 .m(3) 13433 .n(8) 13434 .k(k) 13435 .ks(3) 13436 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13437 } 13438 } 13439 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,small_kernel_subtile)13440 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, small_kernel_subtile) { 13441 TEST_REQUIRES_ARM_NEON; 13442 for (size_t k = 1; k <= 40; k += 9) { 13443 for (uint32_t n = 1; n <= 8; n++) { 13444 for (uint32_t m = 1; m <= 3; m++) { 13445 GemmMicrokernelTester() 13446 .mr(3) 13447 .nr(8) 13448 .kr(4) 13449 .sr(1) 13450 .m(m) 13451 .n(n) 13452 .k(k) 13453 .ks(3) 13454 .iterations(1) 13455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13456 } 13457 } 13458 } 13459 } 13460 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,n_gt_8_small_kernel)13461 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) { 13462 TEST_REQUIRES_ARM_NEON; 13463 for (uint32_t n = 9; n < 16; n++) { 13464 for (size_t k = 1; k <= 40; k += 9) { 13465 GemmMicrokernelTester() 13466 .mr(3) 13467 .nr(8) 13468 .kr(4) 13469 .sr(1) 13470 .m(3) 13471 .n(n) 13472 .k(k) 13473 .ks(3) 13474 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13475 } 13476 } 13477 } 13478 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,n_div_8_small_kernel)13479 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) { 13480 TEST_REQUIRES_ARM_NEON; 13481 for (uint32_t n = 16; n <= 24; n += 8) { 13482 for (size_t k = 1; k <= 40; k += 9) { 13483 GemmMicrokernelTester() 13484 .mr(3) 13485 .nr(8) 13486 .kr(4) 13487 .sr(1) 13488 .m(3) 13489 .n(n) 13490 .k(k) 13491 .ks(3) 13492 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13493 } 13494 } 13495 } 13496 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,strided_cm_subtile)13497 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, strided_cm_subtile) { 13498 TEST_REQUIRES_ARM_NEON; 13499 for (size_t k = 1; k <= 40; k += 9) { 13500 for (uint32_t n = 1; n <= 8; n++) { 13501 for (uint32_t m = 1; m <= 3; m++) { 13502 GemmMicrokernelTester() 13503 .mr(3) 13504 .nr(8) 13505 .kr(4) 13506 .sr(1) 13507 .m(m) 13508 .n(n) 13509 .k(k) 13510 .cm_stride(11) 13511 .iterations(1) 13512 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13513 } 13514 } 13515 } 13516 } 13517 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,a_offset)13518 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, a_offset) { 13519 TEST_REQUIRES_ARM_NEON; 13520 for (size_t k = 1; k <= 40; k += 9) { 13521 GemmMicrokernelTester() 13522 .mr(3) 13523 .nr(8) 13524 .kr(4) 13525 .sr(1) 13526 .m(3) 13527 .n(8) 13528 .k(k) 13529 .ks(3) 13530 .a_offset(127) 13531 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13532 } 13533 } 13534 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,zero)13535 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, zero) { 13536 TEST_REQUIRES_ARM_NEON; 13537 for (size_t k = 1; k <= 40; k += 9) { 13538 for (uint32_t mz = 0; mz < 3; mz++) { 13539 GemmMicrokernelTester() 13540 .mr(3) 13541 .nr(8) 13542 .kr(4) 13543 .sr(1) 13544 .m(3) 13545 .n(8) 13546 .k(k) 13547 .ks(3) 13548 .a_offset(127) 13549 .zero_index(mz) 13550 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13551 } 13552 } 13553 } 13554 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,qmin)13555 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, qmin) { 13556 TEST_REQUIRES_ARM_NEON; 13557 GemmMicrokernelTester() 13558 .mr(3) 13559 .nr(8) 13560 .kr(4) 13561 .sr(1) 13562 .m(3) 13563 .n(8) 13564 .k(8) 13565 .qmin(128) 13566 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13567 } 13568 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,qmax)13569 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, qmax) { 13570 TEST_REQUIRES_ARM_NEON; 13571 GemmMicrokernelTester() 13572 .mr(3) 13573 .nr(8) 13574 .kr(4) 13575 .sr(1) 13576 .m(3) 13577 .n(8) 13578 .k(8) 13579 .qmax(128) 13580 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13581 } 13582 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R,strided_cm)13583 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD1R, strided_cm) { 13584 TEST_REQUIRES_ARM_NEON; 13585 GemmMicrokernelTester() 13586 .mr(3) 13587 .nr(8) 13588 .kr(4) 13589 .sr(1) 13590 .m(3) 13591 .n(8) 13592 .k(8) 13593 .cm_stride(11) 13594 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13595 } 13596 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 13597 13598 13599 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_eq_16)13600 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16) { 13601 TEST_REQUIRES_ARM_NEON; 13602 GemmMicrokernelTester() 13603 .mr(3) 13604 .nr(8) 13605 .kr(4) 13606 .sr(2) 13607 .m(3) 13608 .n(8) 13609 .k(16) 13610 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13611 } 13612 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,strided_cn)13613 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cn) { 13614 TEST_REQUIRES_ARM_NEON; 13615 GemmMicrokernelTester() 13616 .mr(3) 13617 .nr(8) 13618 .kr(4) 13619 .sr(2) 13620 .m(3) 13621 .n(8) 13622 .k(16) 13623 .cn_stride(11) 13624 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13625 } 13626 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_eq_16_subtile)13627 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile) { 13628 TEST_REQUIRES_ARM_NEON; 13629 for (uint32_t n = 1; n <= 8; n++) { 13630 for (uint32_t m = 1; m <= 3; m++) { 13631 GemmMicrokernelTester() 13632 .mr(3) 13633 .nr(8) 13634 .kr(4) 13635 .sr(2) 13636 .m(m) 13637 .n(n) 13638 .k(16) 13639 .iterations(1) 13640 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13641 } 13642 } 13643 } 13644 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_eq_16_subtile_m)13645 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile_m) { 13646 TEST_REQUIRES_ARM_NEON; 13647 for (uint32_t m = 1; m <= 3; m++) { 13648 GemmMicrokernelTester() 13649 .mr(3) 13650 .nr(8) 13651 .kr(4) 13652 .sr(2) 13653 .m(m) 13654 .n(8) 13655 .k(16) 13656 .iterations(1) 13657 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13658 } 13659 } 13660 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_eq_16_subtile_n)13661 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile_n) { 13662 TEST_REQUIRES_ARM_NEON; 13663 for (uint32_t n = 1; n <= 8; n++) { 13664 GemmMicrokernelTester() 13665 .mr(3) 13666 .nr(8) 13667 .kr(4) 13668 .sr(2) 13669 .m(3) 13670 .n(n) 13671 .k(16) 13672 .iterations(1) 13673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13674 } 13675 } 13676 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_lt_16)13677 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_lt_16) { 13678 TEST_REQUIRES_ARM_NEON; 13679 for (size_t k = 1; k < 16; k++) { 13680 GemmMicrokernelTester() 13681 .mr(3) 13682 .nr(8) 13683 .kr(4) 13684 .sr(2) 13685 .m(3) 13686 .n(8) 13687 .k(k) 13688 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13689 } 13690 } 13691 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_lt_16_subtile)13692 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_lt_16_subtile) { 13693 TEST_REQUIRES_ARM_NEON; 13694 for (size_t k = 1; k < 16; k++) { 13695 for (uint32_t n = 1; n <= 8; n++) { 13696 for (uint32_t m = 1; m <= 3; m++) { 13697 GemmMicrokernelTester() 13698 .mr(3) 13699 .nr(8) 13700 .kr(4) 13701 .sr(2) 13702 .m(m) 13703 .n(n) 13704 .k(k) 13705 .iterations(1) 13706 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13707 } 13708 } 13709 } 13710 } 13711 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_gt_16)13712 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_gt_16) { 13713 TEST_REQUIRES_ARM_NEON; 13714 for (size_t k = 17; k < 32; k++) { 13715 GemmMicrokernelTester() 13716 .mr(3) 13717 .nr(8) 13718 .kr(4) 13719 .sr(2) 13720 .m(3) 13721 .n(8) 13722 .k(k) 13723 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13724 } 13725 } 13726 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_gt_16_subtile)13727 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_gt_16_subtile) { 13728 TEST_REQUIRES_ARM_NEON; 13729 for (size_t k = 17; k < 32; k++) { 13730 for (uint32_t n = 1; n <= 8; n++) { 13731 for (uint32_t m = 1; m <= 3; m++) { 13732 GemmMicrokernelTester() 13733 .mr(3) 13734 .nr(8) 13735 .kr(4) 13736 .sr(2) 13737 .m(m) 13738 .n(n) 13739 .k(k) 13740 .iterations(1) 13741 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13742 } 13743 } 13744 } 13745 } 13746 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_div_16)13747 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_div_16) { 13748 TEST_REQUIRES_ARM_NEON; 13749 for (size_t k = 32; k <= 160; k += 16) { 13750 GemmMicrokernelTester() 13751 .mr(3) 13752 .nr(8) 13753 .kr(4) 13754 .sr(2) 13755 .m(3) 13756 .n(8) 13757 .k(k) 13758 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13759 } 13760 } 13761 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,k_div_16_subtile)13762 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_div_16_subtile) { 13763 TEST_REQUIRES_ARM_NEON; 13764 for (size_t k = 32; k <= 160; k += 16) { 13765 for (uint32_t n = 1; n <= 8; n++) { 13766 for (uint32_t m = 1; m <= 3; m++) { 13767 GemmMicrokernelTester() 13768 .mr(3) 13769 .nr(8) 13770 .kr(4) 13771 .sr(2) 13772 .m(m) 13773 .n(n) 13774 .k(k) 13775 .iterations(1) 13776 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13777 } 13778 } 13779 } 13780 } 13781 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,n_gt_8)13782 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8) { 13783 TEST_REQUIRES_ARM_NEON; 13784 for (uint32_t n = 9; n < 16; n++) { 13785 for (size_t k = 1; k <= 80; k += 17) { 13786 GemmMicrokernelTester() 13787 .mr(3) 13788 .nr(8) 13789 .kr(4) 13790 .sr(2) 13791 .m(3) 13792 .n(n) 13793 .k(k) 13794 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13795 } 13796 } 13797 } 13798 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,n_gt_8_strided_cn)13799 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_strided_cn) { 13800 TEST_REQUIRES_ARM_NEON; 13801 for (uint32_t n = 9; n < 16; n++) { 13802 for (size_t k = 1; k <= 80; k += 17) { 13803 GemmMicrokernelTester() 13804 .mr(3) 13805 .nr(8) 13806 .kr(4) 13807 .sr(2) 13808 .m(3) 13809 .n(n) 13810 .k(k) 13811 .cn_stride(11) 13812 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13813 } 13814 } 13815 } 13816 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,n_gt_8_subtile)13817 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_subtile) { 13818 TEST_REQUIRES_ARM_NEON; 13819 for (uint32_t n = 9; n < 16; n++) { 13820 for (size_t k = 1; k <= 80; k += 17) { 13821 for (uint32_t m = 1; m <= 3; m++) { 13822 GemmMicrokernelTester() 13823 .mr(3) 13824 .nr(8) 13825 .kr(4) 13826 .sr(2) 13827 .m(m) 13828 .n(n) 13829 .k(k) 13830 .iterations(1) 13831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13832 } 13833 } 13834 } 13835 } 13836 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,n_div_8)13837 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8) { 13838 TEST_REQUIRES_ARM_NEON; 13839 for (uint32_t n = 16; n <= 24; n += 8) { 13840 for (size_t k = 1; k <= 80; k += 17) { 13841 GemmMicrokernelTester() 13842 .mr(3) 13843 .nr(8) 13844 .kr(4) 13845 .sr(2) 13846 .m(3) 13847 .n(n) 13848 .k(k) 13849 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13850 } 13851 } 13852 } 13853 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,n_div_8_strided_cn)13854 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_strided_cn) { 13855 TEST_REQUIRES_ARM_NEON; 13856 for (uint32_t n = 16; n <= 24; n += 8) { 13857 for (size_t k = 1; k <= 80; k += 17) { 13858 GemmMicrokernelTester() 13859 .mr(3) 13860 .nr(8) 13861 .kr(4) 13862 .sr(2) 13863 .m(3) 13864 .n(n) 13865 .k(k) 13866 .cn_stride(11) 13867 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13868 } 13869 } 13870 } 13871 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,n_div_8_subtile)13872 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_subtile) { 13873 TEST_REQUIRES_ARM_NEON; 13874 for (uint32_t n = 16; n <= 24; n += 8) { 13875 for (size_t k = 1; k <= 80; k += 17) { 13876 for (uint32_t m = 1; m <= 3; m++) { 13877 GemmMicrokernelTester() 13878 .mr(3) 13879 .nr(8) 13880 .kr(4) 13881 .sr(2) 13882 .m(m) 13883 .n(n) 13884 .k(k) 13885 .iterations(1) 13886 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13887 } 13888 } 13889 } 13890 } 13891 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,small_kernel)13892 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, small_kernel) { 13893 TEST_REQUIRES_ARM_NEON; 13894 for (size_t k = 1; k <= 80; k += 17) { 13895 GemmMicrokernelTester() 13896 .mr(3) 13897 .nr(8) 13898 .kr(4) 13899 .sr(2) 13900 .m(3) 13901 .n(8) 13902 .k(k) 13903 .ks(3) 13904 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13905 } 13906 } 13907 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,small_kernel_subtile)13908 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, small_kernel_subtile) { 13909 TEST_REQUIRES_ARM_NEON; 13910 for (size_t k = 1; k <= 80; k += 17) { 13911 for (uint32_t n = 1; n <= 8; n++) { 13912 for (uint32_t m = 1; m <= 3; m++) { 13913 GemmMicrokernelTester() 13914 .mr(3) 13915 .nr(8) 13916 .kr(4) 13917 .sr(2) 13918 .m(m) 13919 .n(n) 13920 .k(k) 13921 .ks(3) 13922 .iterations(1) 13923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13924 } 13925 } 13926 } 13927 } 13928 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,n_gt_8_small_kernel)13929 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_small_kernel) { 13930 TEST_REQUIRES_ARM_NEON; 13931 for (uint32_t n = 9; n < 16; n++) { 13932 for (size_t k = 1; k <= 80; k += 17) { 13933 GemmMicrokernelTester() 13934 .mr(3) 13935 .nr(8) 13936 .kr(4) 13937 .sr(2) 13938 .m(3) 13939 .n(n) 13940 .k(k) 13941 .ks(3) 13942 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13943 } 13944 } 13945 } 13946 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,n_div_8_small_kernel)13947 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_small_kernel) { 13948 TEST_REQUIRES_ARM_NEON; 13949 for (uint32_t n = 16; n <= 24; n += 8) { 13950 for (size_t k = 1; k <= 80; k += 17) { 13951 GemmMicrokernelTester() 13952 .mr(3) 13953 .nr(8) 13954 .kr(4) 13955 .sr(2) 13956 .m(3) 13957 .n(n) 13958 .k(k) 13959 .ks(3) 13960 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13961 } 13962 } 13963 } 13964 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,strided_cm_subtile)13965 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cm_subtile) { 13966 TEST_REQUIRES_ARM_NEON; 13967 for (size_t k = 1; k <= 80; k += 17) { 13968 for (uint32_t n = 1; n <= 8; n++) { 13969 for (uint32_t m = 1; m <= 3; m++) { 13970 GemmMicrokernelTester() 13971 .mr(3) 13972 .nr(8) 13973 .kr(4) 13974 .sr(2) 13975 .m(m) 13976 .n(n) 13977 .k(k) 13978 .cm_stride(11) 13979 .iterations(1) 13980 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 13981 } 13982 } 13983 } 13984 } 13985 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,a_offset)13986 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, a_offset) { 13987 TEST_REQUIRES_ARM_NEON; 13988 for (size_t k = 1; k <= 80; k += 17) { 13989 GemmMicrokernelTester() 13990 .mr(3) 13991 .nr(8) 13992 .kr(4) 13993 .sr(2) 13994 .m(3) 13995 .n(8) 13996 .k(k) 13997 .ks(3) 13998 .a_offset(251) 13999 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14000 } 14001 } 14002 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,zero)14003 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, zero) { 14004 TEST_REQUIRES_ARM_NEON; 14005 for (size_t k = 1; k <= 80; k += 17) { 14006 for (uint32_t mz = 0; mz < 3; mz++) { 14007 GemmMicrokernelTester() 14008 .mr(3) 14009 .nr(8) 14010 .kr(4) 14011 .sr(2) 14012 .m(3) 14013 .n(8) 14014 .k(k) 14015 .ks(3) 14016 .a_offset(251) 14017 .zero_index(mz) 14018 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14019 } 14020 } 14021 } 14022 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,qmin)14023 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, qmin) { 14024 TEST_REQUIRES_ARM_NEON; 14025 GemmMicrokernelTester() 14026 .mr(3) 14027 .nr(8) 14028 .kr(4) 14029 .sr(2) 14030 .m(3) 14031 .n(8) 14032 .k(16) 14033 .qmin(128) 14034 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14035 } 14036 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,qmax)14037 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, qmax) { 14038 TEST_REQUIRES_ARM_NEON; 14039 GemmMicrokernelTester() 14040 .mr(3) 14041 .nr(8) 14042 .kr(4) 14043 .sr(2) 14044 .m(3) 14045 .n(8) 14046 .k(16) 14047 .qmax(128) 14048 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14049 } 14050 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL,strided_cm)14051 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cm) { 14052 TEST_REQUIRES_ARM_NEON; 14053 GemmMicrokernelTester() 14054 .mr(3) 14055 .nr(8) 14056 .kr(4) 14057 .sr(2) 14058 .m(3) 14059 .n(8) 14060 .k(16) 14061 .cm_stride(11) 14062 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14063 } 14064 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 14065 14066 14067 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_eq_8)14068 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_eq_8) { 14069 TEST_REQUIRES_ARM_NEON; 14070 GemmMicrokernelTester() 14071 .mr(3) 14072 .nr(16) 14073 .kr(1) 14074 .sr(1) 14075 .m(3) 14076 .n(16) 14077 .k(8) 14078 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14079 } 14080 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,strided_cn)14081 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, strided_cn) { 14082 TEST_REQUIRES_ARM_NEON; 14083 GemmMicrokernelTester() 14084 .mr(3) 14085 .nr(16) 14086 .kr(1) 14087 .sr(1) 14088 .m(3) 14089 .n(16) 14090 .k(8) 14091 .cn_stride(19) 14092 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14093 } 14094 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_eq_8_subtile)14095 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) { 14096 TEST_REQUIRES_ARM_NEON; 14097 for (uint32_t n = 1; n <= 16; n++) { 14098 for (uint32_t m = 1; m <= 3; m++) { 14099 GemmMicrokernelTester() 14100 .mr(3) 14101 .nr(16) 14102 .kr(1) 14103 .sr(1) 14104 .m(m) 14105 .n(n) 14106 .k(8) 14107 .iterations(1) 14108 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14109 } 14110 } 14111 } 14112 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_eq_8_subtile_m)14113 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) { 14114 TEST_REQUIRES_ARM_NEON; 14115 for (uint32_t m = 1; m <= 3; m++) { 14116 GemmMicrokernelTester() 14117 .mr(3) 14118 .nr(16) 14119 .kr(1) 14120 .sr(1) 14121 .m(m) 14122 .n(16) 14123 .k(8) 14124 .iterations(1) 14125 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14126 } 14127 } 14128 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_eq_8_subtile_n)14129 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) { 14130 TEST_REQUIRES_ARM_NEON; 14131 for (uint32_t n = 1; n <= 16; n++) { 14132 GemmMicrokernelTester() 14133 .mr(3) 14134 .nr(16) 14135 .kr(1) 14136 .sr(1) 14137 .m(3) 14138 .n(n) 14139 .k(8) 14140 .iterations(1) 14141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14142 } 14143 } 14144 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_lt_8)14145 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_lt_8) { 14146 TEST_REQUIRES_ARM_NEON; 14147 for (size_t k = 1; k < 8; k++) { 14148 GemmMicrokernelTester() 14149 .mr(3) 14150 .nr(16) 14151 .kr(1) 14152 .sr(1) 14153 .m(3) 14154 .n(16) 14155 .k(k) 14156 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14157 } 14158 } 14159 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_lt_8_subtile)14160 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) { 14161 TEST_REQUIRES_ARM_NEON; 14162 for (size_t k = 1; k < 8; k++) { 14163 for (uint32_t n = 1; n <= 16; n++) { 14164 for (uint32_t m = 1; m <= 3; m++) { 14165 GemmMicrokernelTester() 14166 .mr(3) 14167 .nr(16) 14168 .kr(1) 14169 .sr(1) 14170 .m(m) 14171 .n(n) 14172 .k(k) 14173 .iterations(1) 14174 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14175 } 14176 } 14177 } 14178 } 14179 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_gt_8)14180 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_gt_8) { 14181 TEST_REQUIRES_ARM_NEON; 14182 for (size_t k = 9; k < 16; k++) { 14183 GemmMicrokernelTester() 14184 .mr(3) 14185 .nr(16) 14186 .kr(1) 14187 .sr(1) 14188 .m(3) 14189 .n(16) 14190 .k(k) 14191 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14192 } 14193 } 14194 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_gt_8_subtile)14195 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) { 14196 TEST_REQUIRES_ARM_NEON; 14197 for (size_t k = 9; k < 16; k++) { 14198 for (uint32_t n = 1; n <= 16; n++) { 14199 for (uint32_t m = 1; m <= 3; m++) { 14200 GemmMicrokernelTester() 14201 .mr(3) 14202 .nr(16) 14203 .kr(1) 14204 .sr(1) 14205 .m(m) 14206 .n(n) 14207 .k(k) 14208 .iterations(1) 14209 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14210 } 14211 } 14212 } 14213 } 14214 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_div_8)14215 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_div_8) { 14216 TEST_REQUIRES_ARM_NEON; 14217 for (size_t k = 16; k <= 80; k += 8) { 14218 GemmMicrokernelTester() 14219 .mr(3) 14220 .nr(16) 14221 .kr(1) 14222 .sr(1) 14223 .m(3) 14224 .n(16) 14225 .k(k) 14226 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14227 } 14228 } 14229 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,k_div_8_subtile)14230 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) { 14231 TEST_REQUIRES_ARM_NEON; 14232 for (size_t k = 16; k <= 80; k += 8) { 14233 for (uint32_t n = 1; n <= 16; n++) { 14234 for (uint32_t m = 1; m <= 3; m++) { 14235 GemmMicrokernelTester() 14236 .mr(3) 14237 .nr(16) 14238 .kr(1) 14239 .sr(1) 14240 .m(m) 14241 .n(n) 14242 .k(k) 14243 .iterations(1) 14244 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14245 } 14246 } 14247 } 14248 } 14249 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,n_gt_16)14250 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_gt_16) { 14251 TEST_REQUIRES_ARM_NEON; 14252 for (uint32_t n = 17; n < 32; n++) { 14253 for (size_t k = 1; k <= 40; k += 9) { 14254 GemmMicrokernelTester() 14255 .mr(3) 14256 .nr(16) 14257 .kr(1) 14258 .sr(1) 14259 .m(3) 14260 .n(n) 14261 .k(k) 14262 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14263 } 14264 } 14265 } 14266 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,n_gt_16_strided_cn)14267 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) { 14268 TEST_REQUIRES_ARM_NEON; 14269 for (uint32_t n = 17; n < 32; n++) { 14270 for (size_t k = 1; k <= 40; k += 9) { 14271 GemmMicrokernelTester() 14272 .mr(3) 14273 .nr(16) 14274 .kr(1) 14275 .sr(1) 14276 .m(3) 14277 .n(n) 14278 .k(k) 14279 .cn_stride(19) 14280 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14281 } 14282 } 14283 } 14284 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,n_gt_16_subtile)14285 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) { 14286 TEST_REQUIRES_ARM_NEON; 14287 for (uint32_t n = 17; n < 32; n++) { 14288 for (size_t k = 1; k <= 40; k += 9) { 14289 for (uint32_t m = 1; m <= 3; m++) { 14290 GemmMicrokernelTester() 14291 .mr(3) 14292 .nr(16) 14293 .kr(1) 14294 .sr(1) 14295 .m(m) 14296 .n(n) 14297 .k(k) 14298 .iterations(1) 14299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14300 } 14301 } 14302 } 14303 } 14304 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,n_div_16)14305 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_div_16) { 14306 TEST_REQUIRES_ARM_NEON; 14307 for (uint32_t n = 32; n <= 48; n += 16) { 14308 for (size_t k = 1; k <= 40; k += 9) { 14309 GemmMicrokernelTester() 14310 .mr(3) 14311 .nr(16) 14312 .kr(1) 14313 .sr(1) 14314 .m(3) 14315 .n(n) 14316 .k(k) 14317 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14318 } 14319 } 14320 } 14321 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,n_div_16_strided_cn)14322 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) { 14323 TEST_REQUIRES_ARM_NEON; 14324 for (uint32_t n = 32; n <= 48; n += 16) { 14325 for (size_t k = 1; k <= 40; k += 9) { 14326 GemmMicrokernelTester() 14327 .mr(3) 14328 .nr(16) 14329 .kr(1) 14330 .sr(1) 14331 .m(3) 14332 .n(n) 14333 .k(k) 14334 .cn_stride(19) 14335 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14336 } 14337 } 14338 } 14339 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,n_div_16_subtile)14340 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) { 14341 TEST_REQUIRES_ARM_NEON; 14342 for (uint32_t n = 32; n <= 48; n += 16) { 14343 for (size_t k = 1; k <= 40; k += 9) { 14344 for (uint32_t m = 1; m <= 3; m++) { 14345 GemmMicrokernelTester() 14346 .mr(3) 14347 .nr(16) 14348 .kr(1) 14349 .sr(1) 14350 .m(m) 14351 .n(n) 14352 .k(k) 14353 .iterations(1) 14354 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14355 } 14356 } 14357 } 14358 } 14359 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,small_kernel)14360 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, small_kernel) { 14361 TEST_REQUIRES_ARM_NEON; 14362 for (size_t k = 1; k <= 40; k += 9) { 14363 GemmMicrokernelTester() 14364 .mr(3) 14365 .nr(16) 14366 .kr(1) 14367 .sr(1) 14368 .m(3) 14369 .n(16) 14370 .k(k) 14371 .ks(3) 14372 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14373 } 14374 } 14375 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,small_kernel_subtile)14376 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) { 14377 TEST_REQUIRES_ARM_NEON; 14378 for (size_t k = 1; k <= 40; k += 9) { 14379 for (uint32_t n = 1; n <= 16; n++) { 14380 for (uint32_t m = 1; m <= 3; m++) { 14381 GemmMicrokernelTester() 14382 .mr(3) 14383 .nr(16) 14384 .kr(1) 14385 .sr(1) 14386 .m(m) 14387 .n(n) 14388 .k(k) 14389 .ks(3) 14390 .iterations(1) 14391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14392 } 14393 } 14394 } 14395 } 14396 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,n_gt_16_small_kernel)14397 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) { 14398 TEST_REQUIRES_ARM_NEON; 14399 for (uint32_t n = 17; n < 32; n++) { 14400 for (size_t k = 1; k <= 40; k += 9) { 14401 GemmMicrokernelTester() 14402 .mr(3) 14403 .nr(16) 14404 .kr(1) 14405 .sr(1) 14406 .m(3) 14407 .n(n) 14408 .k(k) 14409 .ks(3) 14410 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14411 } 14412 } 14413 } 14414 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,n_div_16_small_kernel)14415 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) { 14416 TEST_REQUIRES_ARM_NEON; 14417 for (uint32_t n = 32; n <= 48; n += 16) { 14418 for (size_t k = 1; k <= 40; k += 9) { 14419 GemmMicrokernelTester() 14420 .mr(3) 14421 .nr(16) 14422 .kr(1) 14423 .sr(1) 14424 .m(3) 14425 .n(n) 14426 .k(k) 14427 .ks(3) 14428 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14429 } 14430 } 14431 } 14432 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,strided_cm_subtile)14433 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) { 14434 TEST_REQUIRES_ARM_NEON; 14435 for (size_t k = 1; k <= 40; k += 9) { 14436 for (uint32_t n = 1; n <= 16; n++) { 14437 for (uint32_t m = 1; m <= 3; m++) { 14438 GemmMicrokernelTester() 14439 .mr(3) 14440 .nr(16) 14441 .kr(1) 14442 .sr(1) 14443 .m(m) 14444 .n(n) 14445 .k(k) 14446 .cm_stride(19) 14447 .iterations(1) 14448 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14449 } 14450 } 14451 } 14452 } 14453 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,a_offset)14454 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, a_offset) { 14455 TEST_REQUIRES_ARM_NEON; 14456 for (size_t k = 1; k <= 40; k += 9) { 14457 GemmMicrokernelTester() 14458 .mr(3) 14459 .nr(16) 14460 .kr(1) 14461 .sr(1) 14462 .m(3) 14463 .n(16) 14464 .k(k) 14465 .ks(3) 14466 .a_offset(127) 14467 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14468 } 14469 } 14470 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,zero)14471 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, zero) { 14472 TEST_REQUIRES_ARM_NEON; 14473 for (size_t k = 1; k <= 40; k += 9) { 14474 for (uint32_t mz = 0; mz < 3; mz++) { 14475 GemmMicrokernelTester() 14476 .mr(3) 14477 .nr(16) 14478 .kr(1) 14479 .sr(1) 14480 .m(3) 14481 .n(16) 14482 .k(k) 14483 .ks(3) 14484 .a_offset(127) 14485 .zero_index(mz) 14486 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14487 } 14488 } 14489 } 14490 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,qmin)14491 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, qmin) { 14492 TEST_REQUIRES_ARM_NEON; 14493 GemmMicrokernelTester() 14494 .mr(3) 14495 .nr(16) 14496 .kr(1) 14497 .sr(1) 14498 .m(3) 14499 .n(16) 14500 .k(8) 14501 .qmin(128) 14502 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14503 } 14504 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,qmax)14505 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, qmax) { 14506 TEST_REQUIRES_ARM_NEON; 14507 GemmMicrokernelTester() 14508 .mr(3) 14509 .nr(16) 14510 .kr(1) 14511 .sr(1) 14512 .m(3) 14513 .n(16) 14514 .k(8) 14515 .qmax(128) 14516 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14517 } 14518 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP,strided_cm)14519 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MULL_ADDW_DUP, strided_cm) { 14520 TEST_REQUIRES_ARM_NEON; 14521 GemmMicrokernelTester() 14522 .mr(3) 14523 .nr(16) 14524 .kr(1) 14525 .sr(1) 14526 .m(3) 14527 .n(16) 14528 .k(8) 14529 .cm_stride(19) 14530 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14531 } 14532 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 14533 14534 14535 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_eq_16)14536 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_eq_16) { 14537 TEST_REQUIRES_ARM_NEON; 14538 GemmMicrokernelTester() 14539 .mr(3) 14540 .nr(16) 14541 .kr(2) 14542 .sr(1) 14543 .m(3) 14544 .n(16) 14545 .k(16) 14546 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14547 } 14548 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,strided_cn)14549 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, strided_cn) { 14550 TEST_REQUIRES_ARM_NEON; 14551 GemmMicrokernelTester() 14552 .mr(3) 14553 .nr(16) 14554 .kr(2) 14555 .sr(1) 14556 .m(3) 14557 .n(16) 14558 .k(16) 14559 .cn_stride(19) 14560 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14561 } 14562 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_eq_16_subtile)14563 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_eq_16_subtile) { 14564 TEST_REQUIRES_ARM_NEON; 14565 for (uint32_t n = 1; n <= 16; n++) { 14566 for (uint32_t m = 1; m <= 3; m++) { 14567 GemmMicrokernelTester() 14568 .mr(3) 14569 .nr(16) 14570 .kr(2) 14571 .sr(1) 14572 .m(m) 14573 .n(n) 14574 .k(16) 14575 .iterations(1) 14576 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14577 } 14578 } 14579 } 14580 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_eq_16_subtile_m)14581 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) { 14582 TEST_REQUIRES_ARM_NEON; 14583 for (uint32_t m = 1; m <= 3; m++) { 14584 GemmMicrokernelTester() 14585 .mr(3) 14586 .nr(16) 14587 .kr(2) 14588 .sr(1) 14589 .m(m) 14590 .n(16) 14591 .k(16) 14592 .iterations(1) 14593 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14594 } 14595 } 14596 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_eq_16_subtile_n)14597 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) { 14598 TEST_REQUIRES_ARM_NEON; 14599 for (uint32_t n = 1; n <= 16; n++) { 14600 GemmMicrokernelTester() 14601 .mr(3) 14602 .nr(16) 14603 .kr(2) 14604 .sr(1) 14605 .m(3) 14606 .n(n) 14607 .k(16) 14608 .iterations(1) 14609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14610 } 14611 } 14612 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_lt_16)14613 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_lt_16) { 14614 TEST_REQUIRES_ARM_NEON; 14615 for (size_t k = 1; k < 16; k++) { 14616 GemmMicrokernelTester() 14617 .mr(3) 14618 .nr(16) 14619 .kr(2) 14620 .sr(1) 14621 .m(3) 14622 .n(16) 14623 .k(k) 14624 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14625 } 14626 } 14627 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_lt_16_subtile)14628 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_lt_16_subtile) { 14629 TEST_REQUIRES_ARM_NEON; 14630 for (size_t k = 1; k < 16; k++) { 14631 for (uint32_t n = 1; n <= 16; n++) { 14632 for (uint32_t m = 1; m <= 3; m++) { 14633 GemmMicrokernelTester() 14634 .mr(3) 14635 .nr(16) 14636 .kr(2) 14637 .sr(1) 14638 .m(m) 14639 .n(n) 14640 .k(k) 14641 .iterations(1) 14642 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14643 } 14644 } 14645 } 14646 } 14647 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_gt_16)14648 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_gt_16) { 14649 TEST_REQUIRES_ARM_NEON; 14650 for (size_t k = 17; k < 32; k++) { 14651 GemmMicrokernelTester() 14652 .mr(3) 14653 .nr(16) 14654 .kr(2) 14655 .sr(1) 14656 .m(3) 14657 .n(16) 14658 .k(k) 14659 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14660 } 14661 } 14662 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_gt_16_subtile)14663 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_gt_16_subtile) { 14664 TEST_REQUIRES_ARM_NEON; 14665 for (size_t k = 17; k < 32; k++) { 14666 for (uint32_t n = 1; n <= 16; n++) { 14667 for (uint32_t m = 1; m <= 3; m++) { 14668 GemmMicrokernelTester() 14669 .mr(3) 14670 .nr(16) 14671 .kr(2) 14672 .sr(1) 14673 .m(m) 14674 .n(n) 14675 .k(k) 14676 .iterations(1) 14677 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14678 } 14679 } 14680 } 14681 } 14682 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_div_16)14683 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_div_16) { 14684 TEST_REQUIRES_ARM_NEON; 14685 for (size_t k = 32; k <= 160; k += 16) { 14686 GemmMicrokernelTester() 14687 .mr(3) 14688 .nr(16) 14689 .kr(2) 14690 .sr(1) 14691 .m(3) 14692 .n(16) 14693 .k(k) 14694 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14695 } 14696 } 14697 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,k_div_16_subtile)14698 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, k_div_16_subtile) { 14699 TEST_REQUIRES_ARM_NEON; 14700 for (size_t k = 32; k <= 160; k += 16) { 14701 for (uint32_t n = 1; n <= 16; n++) { 14702 for (uint32_t m = 1; m <= 3; m++) { 14703 GemmMicrokernelTester() 14704 .mr(3) 14705 .nr(16) 14706 .kr(2) 14707 .sr(1) 14708 .m(m) 14709 .n(n) 14710 .k(k) 14711 .iterations(1) 14712 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14713 } 14714 } 14715 } 14716 } 14717 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,n_gt_16)14718 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_gt_16) { 14719 TEST_REQUIRES_ARM_NEON; 14720 for (uint32_t n = 17; n < 32; n++) { 14721 for (size_t k = 1; k <= 80; k += 17) { 14722 GemmMicrokernelTester() 14723 .mr(3) 14724 .nr(16) 14725 .kr(2) 14726 .sr(1) 14727 .m(3) 14728 .n(n) 14729 .k(k) 14730 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14731 } 14732 } 14733 } 14734 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,n_gt_16_strided_cn)14735 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) { 14736 TEST_REQUIRES_ARM_NEON; 14737 for (uint32_t n = 17; n < 32; n++) { 14738 for (size_t k = 1; k <= 80; k += 17) { 14739 GemmMicrokernelTester() 14740 .mr(3) 14741 .nr(16) 14742 .kr(2) 14743 .sr(1) 14744 .m(3) 14745 .n(n) 14746 .k(k) 14747 .cn_stride(19) 14748 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14749 } 14750 } 14751 } 14752 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,n_gt_16_subtile)14753 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_gt_16_subtile) { 14754 TEST_REQUIRES_ARM_NEON; 14755 for (uint32_t n = 17; n < 32; n++) { 14756 for (size_t k = 1; k <= 80; k += 17) { 14757 for (uint32_t m = 1; m <= 3; m++) { 14758 GemmMicrokernelTester() 14759 .mr(3) 14760 .nr(16) 14761 .kr(2) 14762 .sr(1) 14763 .m(m) 14764 .n(n) 14765 .k(k) 14766 .iterations(1) 14767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14768 } 14769 } 14770 } 14771 } 14772 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,n_div_16)14773 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_div_16) { 14774 TEST_REQUIRES_ARM_NEON; 14775 for (uint32_t n = 32; n <= 48; n += 16) { 14776 for (size_t k = 1; k <= 80; k += 17) { 14777 GemmMicrokernelTester() 14778 .mr(3) 14779 .nr(16) 14780 .kr(2) 14781 .sr(1) 14782 .m(3) 14783 .n(n) 14784 .k(k) 14785 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14786 } 14787 } 14788 } 14789 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,n_div_16_strided_cn)14790 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) { 14791 TEST_REQUIRES_ARM_NEON; 14792 for (uint32_t n = 32; n <= 48; n += 16) { 14793 for (size_t k = 1; k <= 80; k += 17) { 14794 GemmMicrokernelTester() 14795 .mr(3) 14796 .nr(16) 14797 .kr(2) 14798 .sr(1) 14799 .m(3) 14800 .n(n) 14801 .k(k) 14802 .cn_stride(19) 14803 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14804 } 14805 } 14806 } 14807 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,n_div_16_subtile)14808 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_div_16_subtile) { 14809 TEST_REQUIRES_ARM_NEON; 14810 for (uint32_t n = 32; n <= 48; n += 16) { 14811 for (size_t k = 1; k <= 80; k += 17) { 14812 for (uint32_t m = 1; m <= 3; m++) { 14813 GemmMicrokernelTester() 14814 .mr(3) 14815 .nr(16) 14816 .kr(2) 14817 .sr(1) 14818 .m(m) 14819 .n(n) 14820 .k(k) 14821 .iterations(1) 14822 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14823 } 14824 } 14825 } 14826 } 14827 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,small_kernel)14828 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, small_kernel) { 14829 TEST_REQUIRES_ARM_NEON; 14830 for (size_t k = 1; k <= 80; k += 17) { 14831 GemmMicrokernelTester() 14832 .mr(3) 14833 .nr(16) 14834 .kr(2) 14835 .sr(1) 14836 .m(3) 14837 .n(16) 14838 .k(k) 14839 .ks(3) 14840 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14841 } 14842 } 14843 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,small_kernel_subtile)14844 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, small_kernel_subtile) { 14845 TEST_REQUIRES_ARM_NEON; 14846 for (size_t k = 1; k <= 80; k += 17) { 14847 for (uint32_t n = 1; n <= 16; n++) { 14848 for (uint32_t m = 1; m <= 3; m++) { 14849 GemmMicrokernelTester() 14850 .mr(3) 14851 .nr(16) 14852 .kr(2) 14853 .sr(1) 14854 .m(m) 14855 .n(n) 14856 .k(k) 14857 .ks(3) 14858 .iterations(1) 14859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14860 } 14861 } 14862 } 14863 } 14864 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,n_gt_16_small_kernel)14865 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) { 14866 TEST_REQUIRES_ARM_NEON; 14867 for (uint32_t n = 17; n < 32; n++) { 14868 for (size_t k = 1; k <= 80; k += 17) { 14869 GemmMicrokernelTester() 14870 .mr(3) 14871 .nr(16) 14872 .kr(2) 14873 .sr(1) 14874 .m(3) 14875 .n(n) 14876 .k(k) 14877 .ks(3) 14878 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14879 } 14880 } 14881 } 14882 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,n_div_16_small_kernel)14883 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) { 14884 TEST_REQUIRES_ARM_NEON; 14885 for (uint32_t n = 32; n <= 48; n += 16) { 14886 for (size_t k = 1; k <= 80; k += 17) { 14887 GemmMicrokernelTester() 14888 .mr(3) 14889 .nr(16) 14890 .kr(2) 14891 .sr(1) 14892 .m(3) 14893 .n(n) 14894 .k(k) 14895 .ks(3) 14896 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14897 } 14898 } 14899 } 14900 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,strided_cm_subtile)14901 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, strided_cm_subtile) { 14902 TEST_REQUIRES_ARM_NEON; 14903 for (size_t k = 1; k <= 80; k += 17) { 14904 for (uint32_t n = 1; n <= 16; n++) { 14905 for (uint32_t m = 1; m <= 3; m++) { 14906 GemmMicrokernelTester() 14907 .mr(3) 14908 .nr(16) 14909 .kr(2) 14910 .sr(1) 14911 .m(m) 14912 .n(n) 14913 .k(k) 14914 .cm_stride(19) 14915 .iterations(1) 14916 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14917 } 14918 } 14919 } 14920 } 14921 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,a_offset)14922 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, a_offset) { 14923 TEST_REQUIRES_ARM_NEON; 14924 for (size_t k = 1; k <= 80; k += 17) { 14925 GemmMicrokernelTester() 14926 .mr(3) 14927 .nr(16) 14928 .kr(2) 14929 .sr(1) 14930 .m(3) 14931 .n(16) 14932 .k(k) 14933 .ks(3) 14934 .a_offset(251) 14935 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14936 } 14937 } 14938 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,zero)14939 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, zero) { 14940 TEST_REQUIRES_ARM_NEON; 14941 for (size_t k = 1; k <= 80; k += 17) { 14942 for (uint32_t mz = 0; mz < 3; mz++) { 14943 GemmMicrokernelTester() 14944 .mr(3) 14945 .nr(16) 14946 .kr(2) 14947 .sr(1) 14948 .m(3) 14949 .n(16) 14950 .k(k) 14951 .ks(3) 14952 .a_offset(251) 14953 .zero_index(mz) 14954 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14955 } 14956 } 14957 } 14958 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,qmin)14959 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, qmin) { 14960 TEST_REQUIRES_ARM_NEON; 14961 GemmMicrokernelTester() 14962 .mr(3) 14963 .nr(16) 14964 .kr(2) 14965 .sr(1) 14966 .m(3) 14967 .n(16) 14968 .k(16) 14969 .qmin(128) 14970 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14971 } 14972 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,qmax)14973 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, qmax) { 14974 TEST_REQUIRES_ARM_NEON; 14975 GemmMicrokernelTester() 14976 .mr(3) 14977 .nr(16) 14978 .kr(2) 14979 .sr(1) 14980 .m(3) 14981 .n(16) 14982 .k(16) 14983 .qmax(128) 14984 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14985 } 14986 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP,strided_cm)14987 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_DUP, strided_cm) { 14988 TEST_REQUIRES_ARM_NEON; 14989 GemmMicrokernelTester() 14990 .mr(3) 14991 .nr(16) 14992 .kr(2) 14993 .sr(1) 14994 .m(3) 14995 .n(16) 14996 .k(16) 14997 .cm_stride(19) 14998 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 14999 } 15000 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 15001 15002 15003 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16)15004 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16) { 15005 TEST_REQUIRES_ARM_NEON; 15006 GemmMicrokernelTester() 15007 .mr(3) 15008 .nr(16) 15009 .kr(2) 15010 .sr(1) 15011 .m(3) 15012 .n(16) 15013 .k(16) 15014 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15015 } 15016 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cn)15017 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cn) { 15018 TEST_REQUIRES_ARM_NEON; 15019 GemmMicrokernelTester() 15020 .mr(3) 15021 .nr(16) 15022 .kr(2) 15023 .sr(1) 15024 .m(3) 15025 .n(16) 15026 .k(16) 15027 .cn_stride(19) 15028 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15029 } 15030 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile)15031 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 15032 TEST_REQUIRES_ARM_NEON; 15033 for (uint32_t n = 1; n <= 16; n++) { 15034 for (uint32_t m = 1; m <= 3; m++) { 15035 GemmMicrokernelTester() 15036 .mr(3) 15037 .nr(16) 15038 .kr(2) 15039 .sr(1) 15040 .m(m) 15041 .n(n) 15042 .k(16) 15043 .iterations(1) 15044 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15045 } 15046 } 15047 } 15048 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)15049 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 15050 TEST_REQUIRES_ARM_NEON; 15051 for (uint32_t m = 1; m <= 3; m++) { 15052 GemmMicrokernelTester() 15053 .mr(3) 15054 .nr(16) 15055 .kr(2) 15056 .sr(1) 15057 .m(m) 15058 .n(16) 15059 .k(16) 15060 .iterations(1) 15061 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15062 } 15063 } 15064 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)15065 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 15066 TEST_REQUIRES_ARM_NEON; 15067 for (uint32_t n = 1; n <= 16; n++) { 15068 GemmMicrokernelTester() 15069 .mr(3) 15070 .nr(16) 15071 .kr(2) 15072 .sr(1) 15073 .m(3) 15074 .n(n) 15075 .k(16) 15076 .iterations(1) 15077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15078 } 15079 } 15080 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_lt_16)15081 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16) { 15082 TEST_REQUIRES_ARM_NEON; 15083 for (size_t k = 1; k < 16; k++) { 15084 GemmMicrokernelTester() 15085 .mr(3) 15086 .nr(16) 15087 .kr(2) 15088 .sr(1) 15089 .m(3) 15090 .n(16) 15091 .k(k) 15092 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15093 } 15094 } 15095 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_lt_16_subtile)15096 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 15097 TEST_REQUIRES_ARM_NEON; 15098 for (size_t k = 1; k < 16; k++) { 15099 for (uint32_t n = 1; n <= 16; n++) { 15100 for (uint32_t m = 1; m <= 3; m++) { 15101 GemmMicrokernelTester() 15102 .mr(3) 15103 .nr(16) 15104 .kr(2) 15105 .sr(1) 15106 .m(m) 15107 .n(n) 15108 .k(k) 15109 .iterations(1) 15110 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15111 } 15112 } 15113 } 15114 } 15115 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_gt_16)15116 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16) { 15117 TEST_REQUIRES_ARM_NEON; 15118 for (size_t k = 17; k < 32; k++) { 15119 GemmMicrokernelTester() 15120 .mr(3) 15121 .nr(16) 15122 .kr(2) 15123 .sr(1) 15124 .m(3) 15125 .n(16) 15126 .k(k) 15127 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15128 } 15129 } 15130 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_gt_16_subtile)15131 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 15132 TEST_REQUIRES_ARM_NEON; 15133 for (size_t k = 17; k < 32; k++) { 15134 for (uint32_t n = 1; n <= 16; n++) { 15135 for (uint32_t m = 1; m <= 3; m++) { 15136 GemmMicrokernelTester() 15137 .mr(3) 15138 .nr(16) 15139 .kr(2) 15140 .sr(1) 15141 .m(m) 15142 .n(n) 15143 .k(k) 15144 .iterations(1) 15145 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15146 } 15147 } 15148 } 15149 } 15150 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_div_16)15151 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16) { 15152 TEST_REQUIRES_ARM_NEON; 15153 for (size_t k = 32; k <= 160; k += 16) { 15154 GemmMicrokernelTester() 15155 .mr(3) 15156 .nr(16) 15157 .kr(2) 15158 .sr(1) 15159 .m(3) 15160 .n(16) 15161 .k(k) 15162 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15163 } 15164 } 15165 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,k_div_16_subtile)15166 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, k_div_16_subtile) { 15167 TEST_REQUIRES_ARM_NEON; 15168 for (size_t k = 32; k <= 160; k += 16) { 15169 for (uint32_t n = 1; n <= 16; n++) { 15170 for (uint32_t m = 1; m <= 3; m++) { 15171 GemmMicrokernelTester() 15172 .mr(3) 15173 .nr(16) 15174 .kr(2) 15175 .sr(1) 15176 .m(m) 15177 .n(n) 15178 .k(k) 15179 .iterations(1) 15180 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15181 } 15182 } 15183 } 15184 } 15185 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16)15186 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16) { 15187 TEST_REQUIRES_ARM_NEON; 15188 for (uint32_t n = 17; n < 32; n++) { 15189 for (size_t k = 1; k <= 80; k += 17) { 15190 GemmMicrokernelTester() 15191 .mr(3) 15192 .nr(16) 15193 .kr(2) 15194 .sr(1) 15195 .m(3) 15196 .n(n) 15197 .k(k) 15198 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15199 } 15200 } 15201 } 15202 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_strided_cn)15203 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 15204 TEST_REQUIRES_ARM_NEON; 15205 for (uint32_t n = 17; n < 32; n++) { 15206 for (size_t k = 1; k <= 80; k += 17) { 15207 GemmMicrokernelTester() 15208 .mr(3) 15209 .nr(16) 15210 .kr(2) 15211 .sr(1) 15212 .m(3) 15213 .n(n) 15214 .k(k) 15215 .cn_stride(19) 15216 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15217 } 15218 } 15219 } 15220 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_subtile)15221 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) { 15222 TEST_REQUIRES_ARM_NEON; 15223 for (uint32_t n = 17; n < 32; n++) { 15224 for (size_t k = 1; k <= 80; k += 17) { 15225 for (uint32_t m = 1; m <= 3; m++) { 15226 GemmMicrokernelTester() 15227 .mr(3) 15228 .nr(16) 15229 .kr(2) 15230 .sr(1) 15231 .m(m) 15232 .n(n) 15233 .k(k) 15234 .iterations(1) 15235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15236 } 15237 } 15238 } 15239 } 15240 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16)15241 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16) { 15242 TEST_REQUIRES_ARM_NEON; 15243 for (uint32_t n = 32; n <= 48; n += 16) { 15244 for (size_t k = 1; k <= 80; k += 17) { 15245 GemmMicrokernelTester() 15246 .mr(3) 15247 .nr(16) 15248 .kr(2) 15249 .sr(1) 15250 .m(3) 15251 .n(n) 15252 .k(k) 15253 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15254 } 15255 } 15256 } 15257 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_strided_cn)15258 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) { 15259 TEST_REQUIRES_ARM_NEON; 15260 for (uint32_t n = 32; n <= 48; n += 16) { 15261 for (size_t k = 1; k <= 80; k += 17) { 15262 GemmMicrokernelTester() 15263 .mr(3) 15264 .nr(16) 15265 .kr(2) 15266 .sr(1) 15267 .m(3) 15268 .n(n) 15269 .k(k) 15270 .cn_stride(19) 15271 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15272 } 15273 } 15274 } 15275 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_subtile)15276 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_subtile) { 15277 TEST_REQUIRES_ARM_NEON; 15278 for (uint32_t n = 32; n <= 48; n += 16) { 15279 for (size_t k = 1; k <= 80; k += 17) { 15280 for (uint32_t m = 1; m <= 3; m++) { 15281 GemmMicrokernelTester() 15282 .mr(3) 15283 .nr(16) 15284 .kr(2) 15285 .sr(1) 15286 .m(m) 15287 .n(n) 15288 .k(k) 15289 .iterations(1) 15290 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15291 } 15292 } 15293 } 15294 } 15295 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,small_kernel)15296 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, small_kernel) { 15297 TEST_REQUIRES_ARM_NEON; 15298 for (size_t k = 1; k <= 80; k += 17) { 15299 GemmMicrokernelTester() 15300 .mr(3) 15301 .nr(16) 15302 .kr(2) 15303 .sr(1) 15304 .m(3) 15305 .n(16) 15306 .k(k) 15307 .ks(3) 15308 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15309 } 15310 } 15311 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,small_kernel_subtile)15312 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, small_kernel_subtile) { 15313 TEST_REQUIRES_ARM_NEON; 15314 for (size_t k = 1; k <= 80; k += 17) { 15315 for (uint32_t n = 1; n <= 16; n++) { 15316 for (uint32_t m = 1; m <= 3; m++) { 15317 GemmMicrokernelTester() 15318 .mr(3) 15319 .nr(16) 15320 .kr(2) 15321 .sr(1) 15322 .m(m) 15323 .n(n) 15324 .k(k) 15325 .ks(3) 15326 .iterations(1) 15327 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15328 } 15329 } 15330 } 15331 } 15332 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_gt_16_small_kernel)15333 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_gt_16_small_kernel) { 15334 TEST_REQUIRES_ARM_NEON; 15335 for (uint32_t n = 17; n < 32; n++) { 15336 for (size_t k = 1; k <= 80; k += 17) { 15337 GemmMicrokernelTester() 15338 .mr(3) 15339 .nr(16) 15340 .kr(2) 15341 .sr(1) 15342 .m(3) 15343 .n(n) 15344 .k(k) 15345 .ks(3) 15346 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15347 } 15348 } 15349 } 15350 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,n_div_16_small_kernel)15351 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, n_div_16_small_kernel) { 15352 TEST_REQUIRES_ARM_NEON; 15353 for (uint32_t n = 32; n <= 48; n += 16) { 15354 for (size_t k = 1; k <= 80; k += 17) { 15355 GemmMicrokernelTester() 15356 .mr(3) 15357 .nr(16) 15358 .kr(2) 15359 .sr(1) 15360 .m(3) 15361 .n(n) 15362 .k(k) 15363 .ks(3) 15364 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15365 } 15366 } 15367 } 15368 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cm_subtile)15369 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cm_subtile) { 15370 TEST_REQUIRES_ARM_NEON; 15371 for (size_t k = 1; k <= 80; k += 17) { 15372 for (uint32_t n = 1; n <= 16; n++) { 15373 for (uint32_t m = 1; m <= 3; m++) { 15374 GemmMicrokernelTester() 15375 .mr(3) 15376 .nr(16) 15377 .kr(2) 15378 .sr(1) 15379 .m(m) 15380 .n(n) 15381 .k(k) 15382 .cm_stride(19) 15383 .iterations(1) 15384 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15385 } 15386 } 15387 } 15388 } 15389 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,a_offset)15390 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, a_offset) { 15391 TEST_REQUIRES_ARM_NEON; 15392 for (size_t k = 1; k <= 80; k += 17) { 15393 GemmMicrokernelTester() 15394 .mr(3) 15395 .nr(16) 15396 .kr(2) 15397 .sr(1) 15398 .m(3) 15399 .n(16) 15400 .k(k) 15401 .ks(3) 15402 .a_offset(251) 15403 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15404 } 15405 } 15406 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,zero)15407 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, zero) { 15408 TEST_REQUIRES_ARM_NEON; 15409 for (size_t k = 1; k <= 80; k += 17) { 15410 for (uint32_t mz = 0; mz < 3; mz++) { 15411 GemmMicrokernelTester() 15412 .mr(3) 15413 .nr(16) 15414 .kr(2) 15415 .sr(1) 15416 .m(3) 15417 .n(16) 15418 .k(k) 15419 .ks(3) 15420 .a_offset(251) 15421 .zero_index(mz) 15422 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15423 } 15424 } 15425 } 15426 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,qmin)15427 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, qmin) { 15428 TEST_REQUIRES_ARM_NEON; 15429 GemmMicrokernelTester() 15430 .mr(3) 15431 .nr(16) 15432 .kr(2) 15433 .sr(1) 15434 .m(3) 15435 .n(16) 15436 .k(16) 15437 .qmin(128) 15438 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15439 } 15440 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,qmax)15441 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, qmax) { 15442 TEST_REQUIRES_ARM_NEON; 15443 GemmMicrokernelTester() 15444 .mr(3) 15445 .nr(16) 15446 .kr(2) 15447 .sr(1) 15448 .m(3) 15449 .n(16) 15450 .k(16) 15451 .qmax(128) 15452 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15453 } 15454 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R,strided_cm)15455 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD2R, strided_cm) { 15456 TEST_REQUIRES_ARM_NEON; 15457 GemmMicrokernelTester() 15458 .mr(3) 15459 .nr(16) 15460 .kr(2) 15461 .sr(1) 15462 .m(3) 15463 .n(16) 15464 .k(16) 15465 .cm_stride(19) 15466 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15467 } 15468 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 15469 15470 15471 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_eq_8)15472 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8) { 15473 TEST_REQUIRES_ARM_NEON; 15474 GemmMicrokernelTester() 15475 .mr(3) 15476 .nr(16) 15477 .kr(2) 15478 .sr(1) 15479 .m(3) 15480 .n(16) 15481 .k(8) 15482 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15483 } 15484 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,strided_cn)15485 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cn) { 15486 TEST_REQUIRES_ARM_NEON; 15487 GemmMicrokernelTester() 15488 .mr(3) 15489 .nr(16) 15490 .kr(2) 15491 .sr(1) 15492 .m(3) 15493 .n(16) 15494 .k(8) 15495 .cn_stride(19) 15496 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15497 } 15498 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_eq_8_subtile)15499 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile) { 15500 TEST_REQUIRES_ARM_NEON; 15501 for (uint32_t n = 1; n <= 16; n++) { 15502 for (uint32_t m = 1; m <= 3; m++) { 15503 GemmMicrokernelTester() 15504 .mr(3) 15505 .nr(16) 15506 .kr(2) 15507 .sr(1) 15508 .m(m) 15509 .n(n) 15510 .k(8) 15511 .iterations(1) 15512 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15513 } 15514 } 15515 } 15516 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_eq_8_subtile_m)15517 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) { 15518 TEST_REQUIRES_ARM_NEON; 15519 for (uint32_t m = 1; m <= 3; m++) { 15520 GemmMicrokernelTester() 15521 .mr(3) 15522 .nr(16) 15523 .kr(2) 15524 .sr(1) 15525 .m(m) 15526 .n(16) 15527 .k(8) 15528 .iterations(1) 15529 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15530 } 15531 } 15532 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_eq_8_subtile_n)15533 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) { 15534 TEST_REQUIRES_ARM_NEON; 15535 for (uint32_t n = 1; n <= 16; n++) { 15536 GemmMicrokernelTester() 15537 .mr(3) 15538 .nr(16) 15539 .kr(2) 15540 .sr(1) 15541 .m(3) 15542 .n(n) 15543 .k(8) 15544 .iterations(1) 15545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15546 } 15547 } 15548 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_lt_8)15549 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_lt_8) { 15550 TEST_REQUIRES_ARM_NEON; 15551 for (size_t k = 1; k < 8; k++) { 15552 GemmMicrokernelTester() 15553 .mr(3) 15554 .nr(16) 15555 .kr(2) 15556 .sr(1) 15557 .m(3) 15558 .n(16) 15559 .k(k) 15560 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15561 } 15562 } 15563 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_lt_8_subtile)15564 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_lt_8_subtile) { 15565 TEST_REQUIRES_ARM_NEON; 15566 for (size_t k = 1; k < 8; k++) { 15567 for (uint32_t n = 1; n <= 16; n++) { 15568 for (uint32_t m = 1; m <= 3; m++) { 15569 GemmMicrokernelTester() 15570 .mr(3) 15571 .nr(16) 15572 .kr(2) 15573 .sr(1) 15574 .m(m) 15575 .n(n) 15576 .k(k) 15577 .iterations(1) 15578 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15579 } 15580 } 15581 } 15582 } 15583 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_gt_8)15584 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_gt_8) { 15585 TEST_REQUIRES_ARM_NEON; 15586 for (size_t k = 9; k < 16; k++) { 15587 GemmMicrokernelTester() 15588 .mr(3) 15589 .nr(16) 15590 .kr(2) 15591 .sr(1) 15592 .m(3) 15593 .n(16) 15594 .k(k) 15595 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15596 } 15597 } 15598 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_gt_8_subtile)15599 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_gt_8_subtile) { 15600 TEST_REQUIRES_ARM_NEON; 15601 for (size_t k = 9; k < 16; k++) { 15602 for (uint32_t n = 1; n <= 16; n++) { 15603 for (uint32_t m = 1; m <= 3; m++) { 15604 GemmMicrokernelTester() 15605 .mr(3) 15606 .nr(16) 15607 .kr(2) 15608 .sr(1) 15609 .m(m) 15610 .n(n) 15611 .k(k) 15612 .iterations(1) 15613 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15614 } 15615 } 15616 } 15617 } 15618 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_div_8)15619 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_div_8) { 15620 TEST_REQUIRES_ARM_NEON; 15621 for (size_t k = 16; k <= 80; k += 8) { 15622 GemmMicrokernelTester() 15623 .mr(3) 15624 .nr(16) 15625 .kr(2) 15626 .sr(1) 15627 .m(3) 15628 .n(16) 15629 .k(k) 15630 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15631 } 15632 } 15633 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,k_div_8_subtile)15634 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_div_8_subtile) { 15635 TEST_REQUIRES_ARM_NEON; 15636 for (size_t k = 16; k <= 80; k += 8) { 15637 for (uint32_t n = 1; n <= 16; n++) { 15638 for (uint32_t m = 1; m <= 3; m++) { 15639 GemmMicrokernelTester() 15640 .mr(3) 15641 .nr(16) 15642 .kr(2) 15643 .sr(1) 15644 .m(m) 15645 .n(n) 15646 .k(k) 15647 .iterations(1) 15648 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15649 } 15650 } 15651 } 15652 } 15653 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,n_gt_16)15654 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16) { 15655 TEST_REQUIRES_ARM_NEON; 15656 for (uint32_t n = 17; n < 32; n++) { 15657 for (size_t k = 1; k <= 40; k += 9) { 15658 GemmMicrokernelTester() 15659 .mr(3) 15660 .nr(16) 15661 .kr(2) 15662 .sr(1) 15663 .m(3) 15664 .n(n) 15665 .k(k) 15666 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15667 } 15668 } 15669 } 15670 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,n_gt_16_strided_cn)15671 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) { 15672 TEST_REQUIRES_ARM_NEON; 15673 for (uint32_t n = 17; n < 32; n++) { 15674 for (size_t k = 1; k <= 40; k += 9) { 15675 GemmMicrokernelTester() 15676 .mr(3) 15677 .nr(16) 15678 .kr(2) 15679 .sr(1) 15680 .m(3) 15681 .n(n) 15682 .k(k) 15683 .cn_stride(19) 15684 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15685 } 15686 } 15687 } 15688 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,n_gt_16_subtile)15689 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_subtile) { 15690 TEST_REQUIRES_ARM_NEON; 15691 for (uint32_t n = 17; n < 32; n++) { 15692 for (size_t k = 1; k <= 40; k += 9) { 15693 for (uint32_t m = 1; m <= 3; m++) { 15694 GemmMicrokernelTester() 15695 .mr(3) 15696 .nr(16) 15697 .kr(2) 15698 .sr(1) 15699 .m(m) 15700 .n(n) 15701 .k(k) 15702 .iterations(1) 15703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15704 } 15705 } 15706 } 15707 } 15708 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,n_div_16)15709 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16) { 15710 TEST_REQUIRES_ARM_NEON; 15711 for (uint32_t n = 32; n <= 48; n += 16) { 15712 for (size_t k = 1; k <= 40; k += 9) { 15713 GemmMicrokernelTester() 15714 .mr(3) 15715 .nr(16) 15716 .kr(2) 15717 .sr(1) 15718 .m(3) 15719 .n(n) 15720 .k(k) 15721 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15722 } 15723 } 15724 } 15725 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,n_div_16_strided_cn)15726 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) { 15727 TEST_REQUIRES_ARM_NEON; 15728 for (uint32_t n = 32; n <= 48; n += 16) { 15729 for (size_t k = 1; k <= 40; k += 9) { 15730 GemmMicrokernelTester() 15731 .mr(3) 15732 .nr(16) 15733 .kr(2) 15734 .sr(1) 15735 .m(3) 15736 .n(n) 15737 .k(k) 15738 .cn_stride(19) 15739 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15740 } 15741 } 15742 } 15743 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,n_div_16_subtile)15744 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_subtile) { 15745 TEST_REQUIRES_ARM_NEON; 15746 for (uint32_t n = 32; n <= 48; n += 16) { 15747 for (size_t k = 1; k <= 40; k += 9) { 15748 for (uint32_t m = 1; m <= 3; m++) { 15749 GemmMicrokernelTester() 15750 .mr(3) 15751 .nr(16) 15752 .kr(2) 15753 .sr(1) 15754 .m(m) 15755 .n(n) 15756 .k(k) 15757 .iterations(1) 15758 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15759 } 15760 } 15761 } 15762 } 15763 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,small_kernel)15764 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, small_kernel) { 15765 TEST_REQUIRES_ARM_NEON; 15766 for (size_t k = 1; k <= 40; k += 9) { 15767 GemmMicrokernelTester() 15768 .mr(3) 15769 .nr(16) 15770 .kr(2) 15771 .sr(1) 15772 .m(3) 15773 .n(16) 15774 .k(k) 15775 .ks(3) 15776 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15777 } 15778 } 15779 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,small_kernel_subtile)15780 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, small_kernel_subtile) { 15781 TEST_REQUIRES_ARM_NEON; 15782 for (size_t k = 1; k <= 40; k += 9) { 15783 for (uint32_t n = 1; n <= 16; n++) { 15784 for (uint32_t m = 1; m <= 3; m++) { 15785 GemmMicrokernelTester() 15786 .mr(3) 15787 .nr(16) 15788 .kr(2) 15789 .sr(1) 15790 .m(m) 15791 .n(n) 15792 .k(k) 15793 .ks(3) 15794 .iterations(1) 15795 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15796 } 15797 } 15798 } 15799 } 15800 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,n_gt_16_small_kernel)15801 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_small_kernel) { 15802 TEST_REQUIRES_ARM_NEON; 15803 for (uint32_t n = 17; n < 32; n++) { 15804 for (size_t k = 1; k <= 40; k += 9) { 15805 GemmMicrokernelTester() 15806 .mr(3) 15807 .nr(16) 15808 .kr(2) 15809 .sr(1) 15810 .m(3) 15811 .n(n) 15812 .k(k) 15813 .ks(3) 15814 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15815 } 15816 } 15817 } 15818 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,n_div_16_small_kernel)15819 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_small_kernel) { 15820 TEST_REQUIRES_ARM_NEON; 15821 for (uint32_t n = 32; n <= 48; n += 16) { 15822 for (size_t k = 1; k <= 40; k += 9) { 15823 GemmMicrokernelTester() 15824 .mr(3) 15825 .nr(16) 15826 .kr(2) 15827 .sr(1) 15828 .m(3) 15829 .n(n) 15830 .k(k) 15831 .ks(3) 15832 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15833 } 15834 } 15835 } 15836 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,strided_cm_subtile)15837 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cm_subtile) { 15838 TEST_REQUIRES_ARM_NEON; 15839 for (size_t k = 1; k <= 40; k += 9) { 15840 for (uint32_t n = 1; n <= 16; n++) { 15841 for (uint32_t m = 1; m <= 3; m++) { 15842 GemmMicrokernelTester() 15843 .mr(3) 15844 .nr(16) 15845 .kr(2) 15846 .sr(1) 15847 .m(m) 15848 .n(n) 15849 .k(k) 15850 .cm_stride(19) 15851 .iterations(1) 15852 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15853 } 15854 } 15855 } 15856 } 15857 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,a_offset)15858 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, a_offset) { 15859 TEST_REQUIRES_ARM_NEON; 15860 for (size_t k = 1; k <= 40; k += 9) { 15861 GemmMicrokernelTester() 15862 .mr(3) 15863 .nr(16) 15864 .kr(2) 15865 .sr(1) 15866 .m(3) 15867 .n(16) 15868 .k(k) 15869 .ks(3) 15870 .a_offset(127) 15871 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15872 } 15873 } 15874 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,zero)15875 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, zero) { 15876 TEST_REQUIRES_ARM_NEON; 15877 for (size_t k = 1; k <= 40; k += 9) { 15878 for (uint32_t mz = 0; mz < 3; mz++) { 15879 GemmMicrokernelTester() 15880 .mr(3) 15881 .nr(16) 15882 .kr(2) 15883 .sr(1) 15884 .m(3) 15885 .n(16) 15886 .k(k) 15887 .ks(3) 15888 .a_offset(127) 15889 .zero_index(mz) 15890 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15891 } 15892 } 15893 } 15894 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,qmin)15895 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, qmin) { 15896 TEST_REQUIRES_ARM_NEON; 15897 GemmMicrokernelTester() 15898 .mr(3) 15899 .nr(16) 15900 .kr(2) 15901 .sr(1) 15902 .m(3) 15903 .n(16) 15904 .k(8) 15905 .qmin(128) 15906 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15907 } 15908 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,qmax)15909 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, qmax) { 15910 TEST_REQUIRES_ARM_NEON; 15911 GemmMicrokernelTester() 15912 .mr(3) 15913 .nr(16) 15914 .kr(2) 15915 .sr(1) 15916 .m(3) 15917 .n(16) 15918 .k(8) 15919 .qmax(128) 15920 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15921 } 15922 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R,strided_cm)15923 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cm) { 15924 TEST_REQUIRES_ARM_NEON; 15925 GemmMicrokernelTester() 15926 .mr(3) 15927 .nr(16) 15928 .kr(2) 15929 .sr(1) 15930 .m(3) 15931 .n(16) 15932 .k(8) 15933 .cm_stride(19) 15934 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15935 } 15936 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 15937 15938 15939 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_eq_8)15940 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8) { 15941 TEST_REQUIRES_ARM_NEON; 15942 GemmMicrokernelTester() 15943 .mr(3) 15944 .nr(16) 15945 .kr(2) 15946 .sr(4) 15947 .m(3) 15948 .n(16) 15949 .k(8) 15950 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15951 } 15952 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,strided_cn)15953 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cn) { 15954 TEST_REQUIRES_ARM_NEON; 15955 GemmMicrokernelTester() 15956 .mr(3) 15957 .nr(16) 15958 .kr(2) 15959 .sr(4) 15960 .m(3) 15961 .n(16) 15962 .k(8) 15963 .cn_stride(19) 15964 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15965 } 15966 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_eq_8_subtile)15967 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile) { 15968 TEST_REQUIRES_ARM_NEON; 15969 for (uint32_t n = 1; n <= 16; n++) { 15970 for (uint32_t m = 1; m <= 3; m++) { 15971 GemmMicrokernelTester() 15972 .mr(3) 15973 .nr(16) 15974 .kr(2) 15975 .sr(4) 15976 .m(m) 15977 .n(n) 15978 .k(8) 15979 .iterations(1) 15980 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15981 } 15982 } 15983 } 15984 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_eq_8_subtile_m)15985 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile_m) { 15986 TEST_REQUIRES_ARM_NEON; 15987 for (uint32_t m = 1; m <= 3; m++) { 15988 GemmMicrokernelTester() 15989 .mr(3) 15990 .nr(16) 15991 .kr(2) 15992 .sr(4) 15993 .m(m) 15994 .n(16) 15995 .k(8) 15996 .iterations(1) 15997 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 15998 } 15999 } 16000 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_eq_8_subtile_n)16001 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile_n) { 16002 TEST_REQUIRES_ARM_NEON; 16003 for (uint32_t n = 1; n <= 16; n++) { 16004 GemmMicrokernelTester() 16005 .mr(3) 16006 .nr(16) 16007 .kr(2) 16008 .sr(4) 16009 .m(3) 16010 .n(n) 16011 .k(8) 16012 .iterations(1) 16013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16014 } 16015 } 16016 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_lt_8)16017 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_lt_8) { 16018 TEST_REQUIRES_ARM_NEON; 16019 for (size_t k = 1; k < 8; k++) { 16020 GemmMicrokernelTester() 16021 .mr(3) 16022 .nr(16) 16023 .kr(2) 16024 .sr(4) 16025 .m(3) 16026 .n(16) 16027 .k(k) 16028 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16029 } 16030 } 16031 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_lt_8_subtile)16032 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_lt_8_subtile) { 16033 TEST_REQUIRES_ARM_NEON; 16034 for (size_t k = 1; k < 8; k++) { 16035 for (uint32_t n = 1; n <= 16; n++) { 16036 for (uint32_t m = 1; m <= 3; m++) { 16037 GemmMicrokernelTester() 16038 .mr(3) 16039 .nr(16) 16040 .kr(2) 16041 .sr(4) 16042 .m(m) 16043 .n(n) 16044 .k(k) 16045 .iterations(1) 16046 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16047 } 16048 } 16049 } 16050 } 16051 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_gt_8)16052 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_gt_8) { 16053 TEST_REQUIRES_ARM_NEON; 16054 for (size_t k = 9; k < 16; k++) { 16055 GemmMicrokernelTester() 16056 .mr(3) 16057 .nr(16) 16058 .kr(2) 16059 .sr(4) 16060 .m(3) 16061 .n(16) 16062 .k(k) 16063 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16064 } 16065 } 16066 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_gt_8_subtile)16067 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_gt_8_subtile) { 16068 TEST_REQUIRES_ARM_NEON; 16069 for (size_t k = 9; k < 16; k++) { 16070 for (uint32_t n = 1; n <= 16; n++) { 16071 for (uint32_t m = 1; m <= 3; m++) { 16072 GemmMicrokernelTester() 16073 .mr(3) 16074 .nr(16) 16075 .kr(2) 16076 .sr(4) 16077 .m(m) 16078 .n(n) 16079 .k(k) 16080 .iterations(1) 16081 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16082 } 16083 } 16084 } 16085 } 16086 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_div_8)16087 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_div_8) { 16088 TEST_REQUIRES_ARM_NEON; 16089 for (size_t k = 16; k <= 80; k += 8) { 16090 GemmMicrokernelTester() 16091 .mr(3) 16092 .nr(16) 16093 .kr(2) 16094 .sr(4) 16095 .m(3) 16096 .n(16) 16097 .k(k) 16098 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16099 } 16100 } 16101 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,k_div_8_subtile)16102 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_div_8_subtile) { 16103 TEST_REQUIRES_ARM_NEON; 16104 for (size_t k = 16; k <= 80; k += 8) { 16105 for (uint32_t n = 1; n <= 16; n++) { 16106 for (uint32_t m = 1; m <= 3; m++) { 16107 GemmMicrokernelTester() 16108 .mr(3) 16109 .nr(16) 16110 .kr(2) 16111 .sr(4) 16112 .m(m) 16113 .n(n) 16114 .k(k) 16115 .iterations(1) 16116 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16117 } 16118 } 16119 } 16120 } 16121 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,n_gt_16)16122 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16) { 16123 TEST_REQUIRES_ARM_NEON; 16124 for (uint32_t n = 17; n < 32; n++) { 16125 for (size_t k = 1; k <= 40; k += 9) { 16126 GemmMicrokernelTester() 16127 .mr(3) 16128 .nr(16) 16129 .kr(2) 16130 .sr(4) 16131 .m(3) 16132 .n(n) 16133 .k(k) 16134 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16135 } 16136 } 16137 } 16138 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,n_gt_16_strided_cn)16139 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_strided_cn) { 16140 TEST_REQUIRES_ARM_NEON; 16141 for (uint32_t n = 17; n < 32; n++) { 16142 for (size_t k = 1; k <= 40; k += 9) { 16143 GemmMicrokernelTester() 16144 .mr(3) 16145 .nr(16) 16146 .kr(2) 16147 .sr(4) 16148 .m(3) 16149 .n(n) 16150 .k(k) 16151 .cn_stride(19) 16152 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16153 } 16154 } 16155 } 16156 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,n_gt_16_subtile)16157 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_subtile) { 16158 TEST_REQUIRES_ARM_NEON; 16159 for (uint32_t n = 17; n < 32; n++) { 16160 for (size_t k = 1; k <= 40; k += 9) { 16161 for (uint32_t m = 1; m <= 3; m++) { 16162 GemmMicrokernelTester() 16163 .mr(3) 16164 .nr(16) 16165 .kr(2) 16166 .sr(4) 16167 .m(m) 16168 .n(n) 16169 .k(k) 16170 .iterations(1) 16171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16172 } 16173 } 16174 } 16175 } 16176 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,n_div_16)16177 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16) { 16178 TEST_REQUIRES_ARM_NEON; 16179 for (uint32_t n = 32; n <= 48; n += 16) { 16180 for (size_t k = 1; k <= 40; k += 9) { 16181 GemmMicrokernelTester() 16182 .mr(3) 16183 .nr(16) 16184 .kr(2) 16185 .sr(4) 16186 .m(3) 16187 .n(n) 16188 .k(k) 16189 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16190 } 16191 } 16192 } 16193 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,n_div_16_strided_cn)16194 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_strided_cn) { 16195 TEST_REQUIRES_ARM_NEON; 16196 for (uint32_t n = 32; n <= 48; n += 16) { 16197 for (size_t k = 1; k <= 40; k += 9) { 16198 GemmMicrokernelTester() 16199 .mr(3) 16200 .nr(16) 16201 .kr(2) 16202 .sr(4) 16203 .m(3) 16204 .n(n) 16205 .k(k) 16206 .cn_stride(19) 16207 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16208 } 16209 } 16210 } 16211 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,n_div_16_subtile)16212 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_subtile) { 16213 TEST_REQUIRES_ARM_NEON; 16214 for (uint32_t n = 32; n <= 48; n += 16) { 16215 for (size_t k = 1; k <= 40; k += 9) { 16216 for (uint32_t m = 1; m <= 3; m++) { 16217 GemmMicrokernelTester() 16218 .mr(3) 16219 .nr(16) 16220 .kr(2) 16221 .sr(4) 16222 .m(m) 16223 .n(n) 16224 .k(k) 16225 .iterations(1) 16226 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16227 } 16228 } 16229 } 16230 } 16231 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,small_kernel)16232 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, small_kernel) { 16233 TEST_REQUIRES_ARM_NEON; 16234 for (size_t k = 1; k <= 40; k += 9) { 16235 GemmMicrokernelTester() 16236 .mr(3) 16237 .nr(16) 16238 .kr(2) 16239 .sr(4) 16240 .m(3) 16241 .n(16) 16242 .k(k) 16243 .ks(3) 16244 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16245 } 16246 } 16247 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,small_kernel_subtile)16248 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, small_kernel_subtile) { 16249 TEST_REQUIRES_ARM_NEON; 16250 for (size_t k = 1; k <= 40; k += 9) { 16251 for (uint32_t n = 1; n <= 16; n++) { 16252 for (uint32_t m = 1; m <= 3; m++) { 16253 GemmMicrokernelTester() 16254 .mr(3) 16255 .nr(16) 16256 .kr(2) 16257 .sr(4) 16258 .m(m) 16259 .n(n) 16260 .k(k) 16261 .ks(3) 16262 .iterations(1) 16263 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16264 } 16265 } 16266 } 16267 } 16268 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,n_gt_16_small_kernel)16269 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_small_kernel) { 16270 TEST_REQUIRES_ARM_NEON; 16271 for (uint32_t n = 17; n < 32; n++) { 16272 for (size_t k = 1; k <= 40; k += 9) { 16273 GemmMicrokernelTester() 16274 .mr(3) 16275 .nr(16) 16276 .kr(2) 16277 .sr(4) 16278 .m(3) 16279 .n(n) 16280 .k(k) 16281 .ks(3) 16282 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16283 } 16284 } 16285 } 16286 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,n_div_16_small_kernel)16287 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_small_kernel) { 16288 TEST_REQUIRES_ARM_NEON; 16289 for (uint32_t n = 32; n <= 48; n += 16) { 16290 for (size_t k = 1; k <= 40; k += 9) { 16291 GemmMicrokernelTester() 16292 .mr(3) 16293 .nr(16) 16294 .kr(2) 16295 .sr(4) 16296 .m(3) 16297 .n(n) 16298 .k(k) 16299 .ks(3) 16300 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16301 } 16302 } 16303 } 16304 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,strided_cm_subtile)16305 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cm_subtile) { 16306 TEST_REQUIRES_ARM_NEON; 16307 for (size_t k = 1; k <= 40; k += 9) { 16308 for (uint32_t n = 1; n <= 16; n++) { 16309 for (uint32_t m = 1; m <= 3; m++) { 16310 GemmMicrokernelTester() 16311 .mr(3) 16312 .nr(16) 16313 .kr(2) 16314 .sr(4) 16315 .m(m) 16316 .n(n) 16317 .k(k) 16318 .cm_stride(19) 16319 .iterations(1) 16320 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16321 } 16322 } 16323 } 16324 } 16325 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,a_offset)16326 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, a_offset) { 16327 TEST_REQUIRES_ARM_NEON; 16328 for (size_t k = 1; k <= 40; k += 9) { 16329 GemmMicrokernelTester() 16330 .mr(3) 16331 .nr(16) 16332 .kr(2) 16333 .sr(4) 16334 .m(3) 16335 .n(16) 16336 .k(k) 16337 .ks(3) 16338 .a_offset(127) 16339 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16340 } 16341 } 16342 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,zero)16343 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, zero) { 16344 TEST_REQUIRES_ARM_NEON; 16345 for (size_t k = 1; k <= 40; k += 9) { 16346 for (uint32_t mz = 0; mz < 3; mz++) { 16347 GemmMicrokernelTester() 16348 .mr(3) 16349 .nr(16) 16350 .kr(2) 16351 .sr(4) 16352 .m(3) 16353 .n(16) 16354 .k(k) 16355 .ks(3) 16356 .a_offset(127) 16357 .zero_index(mz) 16358 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16359 } 16360 } 16361 } 16362 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,qmin)16363 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, qmin) { 16364 TEST_REQUIRES_ARM_NEON; 16365 GemmMicrokernelTester() 16366 .mr(3) 16367 .nr(16) 16368 .kr(2) 16369 .sr(4) 16370 .m(3) 16371 .n(16) 16372 .k(8) 16373 .qmin(128) 16374 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16375 } 16376 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,qmax)16377 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, qmax) { 16378 TEST_REQUIRES_ARM_NEON; 16379 GemmMicrokernelTester() 16380 .mr(3) 16381 .nr(16) 16382 .kr(2) 16383 .sr(4) 16384 .m(3) 16385 .n(16) 16386 .k(8) 16387 .qmax(128) 16388 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16389 } 16390 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL,strided_cm)16391 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cm) { 16392 TEST_REQUIRES_ARM_NEON; 16393 GemmMicrokernelTester() 16394 .mr(3) 16395 .nr(16) 16396 .kr(2) 16397 .sr(4) 16398 .m(3) 16399 .n(16) 16400 .k(8) 16401 .cm_stride(19) 16402 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16403 } 16404 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 16405 16406 16407 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_eq_16)16408 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_eq_16) { 16409 TEST_REQUIRES_ARM_NEON; 16410 GemmMicrokernelTester() 16411 .mr(3) 16412 .nr(16) 16413 .kr(4) 16414 .sr(1) 16415 .m(3) 16416 .n(16) 16417 .k(16) 16418 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16419 } 16420 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,strided_cn)16421 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, strided_cn) { 16422 TEST_REQUIRES_ARM_NEON; 16423 GemmMicrokernelTester() 16424 .mr(3) 16425 .nr(16) 16426 .kr(4) 16427 .sr(1) 16428 .m(3) 16429 .n(16) 16430 .k(16) 16431 .cn_stride(19) 16432 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16433 } 16434 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_eq_16_subtile)16435 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_eq_16_subtile) { 16436 TEST_REQUIRES_ARM_NEON; 16437 for (uint32_t n = 1; n <= 16; n++) { 16438 for (uint32_t m = 1; m <= 3; m++) { 16439 GemmMicrokernelTester() 16440 .mr(3) 16441 .nr(16) 16442 .kr(4) 16443 .sr(1) 16444 .m(m) 16445 .n(n) 16446 .k(16) 16447 .iterations(1) 16448 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16449 } 16450 } 16451 } 16452 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_eq_16_subtile_m)16453 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 16454 TEST_REQUIRES_ARM_NEON; 16455 for (uint32_t m = 1; m <= 3; m++) { 16456 GemmMicrokernelTester() 16457 .mr(3) 16458 .nr(16) 16459 .kr(4) 16460 .sr(1) 16461 .m(m) 16462 .n(16) 16463 .k(16) 16464 .iterations(1) 16465 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16466 } 16467 } 16468 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_eq_16_subtile_n)16469 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 16470 TEST_REQUIRES_ARM_NEON; 16471 for (uint32_t n = 1; n <= 16; n++) { 16472 GemmMicrokernelTester() 16473 .mr(3) 16474 .nr(16) 16475 .kr(4) 16476 .sr(1) 16477 .m(3) 16478 .n(n) 16479 .k(16) 16480 .iterations(1) 16481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16482 } 16483 } 16484 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_lt_16)16485 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_lt_16) { 16486 TEST_REQUIRES_ARM_NEON; 16487 for (size_t k = 1; k < 16; k++) { 16488 GemmMicrokernelTester() 16489 .mr(3) 16490 .nr(16) 16491 .kr(4) 16492 .sr(1) 16493 .m(3) 16494 .n(16) 16495 .k(k) 16496 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16497 } 16498 } 16499 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_lt_16_subtile)16500 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_lt_16_subtile) { 16501 TEST_REQUIRES_ARM_NEON; 16502 for (size_t k = 1; k < 16; k++) { 16503 for (uint32_t n = 1; n <= 16; n++) { 16504 for (uint32_t m = 1; m <= 3; m++) { 16505 GemmMicrokernelTester() 16506 .mr(3) 16507 .nr(16) 16508 .kr(4) 16509 .sr(1) 16510 .m(m) 16511 .n(n) 16512 .k(k) 16513 .iterations(1) 16514 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16515 } 16516 } 16517 } 16518 } 16519 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_gt_16)16520 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_gt_16) { 16521 TEST_REQUIRES_ARM_NEON; 16522 for (size_t k = 17; k < 32; k++) { 16523 GemmMicrokernelTester() 16524 .mr(3) 16525 .nr(16) 16526 .kr(4) 16527 .sr(1) 16528 .m(3) 16529 .n(16) 16530 .k(k) 16531 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16532 } 16533 } 16534 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_gt_16_subtile)16535 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_gt_16_subtile) { 16536 TEST_REQUIRES_ARM_NEON; 16537 for (size_t k = 17; k < 32; k++) { 16538 for (uint32_t n = 1; n <= 16; n++) { 16539 for (uint32_t m = 1; m <= 3; m++) { 16540 GemmMicrokernelTester() 16541 .mr(3) 16542 .nr(16) 16543 .kr(4) 16544 .sr(1) 16545 .m(m) 16546 .n(n) 16547 .k(k) 16548 .iterations(1) 16549 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16550 } 16551 } 16552 } 16553 } 16554 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_div_16)16555 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_div_16) { 16556 TEST_REQUIRES_ARM_NEON; 16557 for (size_t k = 32; k <= 160; k += 16) { 16558 GemmMicrokernelTester() 16559 .mr(3) 16560 .nr(16) 16561 .kr(4) 16562 .sr(1) 16563 .m(3) 16564 .n(16) 16565 .k(k) 16566 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16567 } 16568 } 16569 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,k_div_16_subtile)16570 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, k_div_16_subtile) { 16571 TEST_REQUIRES_ARM_NEON; 16572 for (size_t k = 32; k <= 160; k += 16) { 16573 for (uint32_t n = 1; n <= 16; n++) { 16574 for (uint32_t m = 1; m <= 3; m++) { 16575 GemmMicrokernelTester() 16576 .mr(3) 16577 .nr(16) 16578 .kr(4) 16579 .sr(1) 16580 .m(m) 16581 .n(n) 16582 .k(k) 16583 .iterations(1) 16584 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16585 } 16586 } 16587 } 16588 } 16589 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,n_gt_16)16590 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_gt_16) { 16591 TEST_REQUIRES_ARM_NEON; 16592 for (uint32_t n = 17; n < 32; n++) { 16593 for (size_t k = 1; k <= 80; k += 17) { 16594 GemmMicrokernelTester() 16595 .mr(3) 16596 .nr(16) 16597 .kr(4) 16598 .sr(1) 16599 .m(3) 16600 .n(n) 16601 .k(k) 16602 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16603 } 16604 } 16605 } 16606 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,n_gt_16_strided_cn)16607 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) { 16608 TEST_REQUIRES_ARM_NEON; 16609 for (uint32_t n = 17; n < 32; n++) { 16610 for (size_t k = 1; k <= 80; k += 17) { 16611 GemmMicrokernelTester() 16612 .mr(3) 16613 .nr(16) 16614 .kr(4) 16615 .sr(1) 16616 .m(3) 16617 .n(n) 16618 .k(k) 16619 .cn_stride(19) 16620 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16621 } 16622 } 16623 } 16624 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,n_gt_16_subtile)16625 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_gt_16_subtile) { 16626 TEST_REQUIRES_ARM_NEON; 16627 for (uint32_t n = 17; n < 32; n++) { 16628 for (size_t k = 1; k <= 80; k += 17) { 16629 for (uint32_t m = 1; m <= 3; m++) { 16630 GemmMicrokernelTester() 16631 .mr(3) 16632 .nr(16) 16633 .kr(4) 16634 .sr(1) 16635 .m(m) 16636 .n(n) 16637 .k(k) 16638 .iterations(1) 16639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16640 } 16641 } 16642 } 16643 } 16644 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,n_div_16)16645 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_div_16) { 16646 TEST_REQUIRES_ARM_NEON; 16647 for (uint32_t n = 32; n <= 48; n += 16) { 16648 for (size_t k = 1; k <= 80; k += 17) { 16649 GemmMicrokernelTester() 16650 .mr(3) 16651 .nr(16) 16652 .kr(4) 16653 .sr(1) 16654 .m(3) 16655 .n(n) 16656 .k(k) 16657 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16658 } 16659 } 16660 } 16661 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,n_div_16_strided_cn)16662 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) { 16663 TEST_REQUIRES_ARM_NEON; 16664 for (uint32_t n = 32; n <= 48; n += 16) { 16665 for (size_t k = 1; k <= 80; k += 17) { 16666 GemmMicrokernelTester() 16667 .mr(3) 16668 .nr(16) 16669 .kr(4) 16670 .sr(1) 16671 .m(3) 16672 .n(n) 16673 .k(k) 16674 .cn_stride(19) 16675 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16676 } 16677 } 16678 } 16679 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,n_div_16_subtile)16680 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_div_16_subtile) { 16681 TEST_REQUIRES_ARM_NEON; 16682 for (uint32_t n = 32; n <= 48; n += 16) { 16683 for (size_t k = 1; k <= 80; k += 17) { 16684 for (uint32_t m = 1; m <= 3; m++) { 16685 GemmMicrokernelTester() 16686 .mr(3) 16687 .nr(16) 16688 .kr(4) 16689 .sr(1) 16690 .m(m) 16691 .n(n) 16692 .k(k) 16693 .iterations(1) 16694 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16695 } 16696 } 16697 } 16698 } 16699 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,small_kernel)16700 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, small_kernel) { 16701 TEST_REQUIRES_ARM_NEON; 16702 for (size_t k = 1; k <= 80; k += 17) { 16703 GemmMicrokernelTester() 16704 .mr(3) 16705 .nr(16) 16706 .kr(4) 16707 .sr(1) 16708 .m(3) 16709 .n(16) 16710 .k(k) 16711 .ks(3) 16712 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16713 } 16714 } 16715 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,small_kernel_subtile)16716 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, small_kernel_subtile) { 16717 TEST_REQUIRES_ARM_NEON; 16718 for (size_t k = 1; k <= 80; k += 17) { 16719 for (uint32_t n = 1; n <= 16; n++) { 16720 for (uint32_t m = 1; m <= 3; m++) { 16721 GemmMicrokernelTester() 16722 .mr(3) 16723 .nr(16) 16724 .kr(4) 16725 .sr(1) 16726 .m(m) 16727 .n(n) 16728 .k(k) 16729 .ks(3) 16730 .iterations(1) 16731 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16732 } 16733 } 16734 } 16735 } 16736 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,n_gt_16_small_kernel)16737 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) { 16738 TEST_REQUIRES_ARM_NEON; 16739 for (uint32_t n = 17; n < 32; n++) { 16740 for (size_t k = 1; k <= 80; k += 17) { 16741 GemmMicrokernelTester() 16742 .mr(3) 16743 .nr(16) 16744 .kr(4) 16745 .sr(1) 16746 .m(3) 16747 .n(n) 16748 .k(k) 16749 .ks(3) 16750 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16751 } 16752 } 16753 } 16754 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,n_div_16_small_kernel)16755 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) { 16756 TEST_REQUIRES_ARM_NEON; 16757 for (uint32_t n = 32; n <= 48; n += 16) { 16758 for (size_t k = 1; k <= 80; k += 17) { 16759 GemmMicrokernelTester() 16760 .mr(3) 16761 .nr(16) 16762 .kr(4) 16763 .sr(1) 16764 .m(3) 16765 .n(n) 16766 .k(k) 16767 .ks(3) 16768 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16769 } 16770 } 16771 } 16772 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,strided_cm_subtile)16773 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, strided_cm_subtile) { 16774 TEST_REQUIRES_ARM_NEON; 16775 for (size_t k = 1; k <= 80; k += 17) { 16776 for (uint32_t n = 1; n <= 16; n++) { 16777 for (uint32_t m = 1; m <= 3; m++) { 16778 GemmMicrokernelTester() 16779 .mr(3) 16780 .nr(16) 16781 .kr(4) 16782 .sr(1) 16783 .m(m) 16784 .n(n) 16785 .k(k) 16786 .cm_stride(19) 16787 .iterations(1) 16788 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16789 } 16790 } 16791 } 16792 } 16793 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,a_offset)16794 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, a_offset) { 16795 TEST_REQUIRES_ARM_NEON; 16796 for (size_t k = 1; k <= 80; k += 17) { 16797 GemmMicrokernelTester() 16798 .mr(3) 16799 .nr(16) 16800 .kr(4) 16801 .sr(1) 16802 .m(3) 16803 .n(16) 16804 .k(k) 16805 .ks(3) 16806 .a_offset(251) 16807 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16808 } 16809 } 16810 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,zero)16811 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, zero) { 16812 TEST_REQUIRES_ARM_NEON; 16813 for (size_t k = 1; k <= 80; k += 17) { 16814 for (uint32_t mz = 0; mz < 3; mz++) { 16815 GemmMicrokernelTester() 16816 .mr(3) 16817 .nr(16) 16818 .kr(4) 16819 .sr(1) 16820 .m(3) 16821 .n(16) 16822 .k(k) 16823 .ks(3) 16824 .a_offset(251) 16825 .zero_index(mz) 16826 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16827 } 16828 } 16829 } 16830 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,qmin)16831 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, qmin) { 16832 TEST_REQUIRES_ARM_NEON; 16833 GemmMicrokernelTester() 16834 .mr(3) 16835 .nr(16) 16836 .kr(4) 16837 .sr(1) 16838 .m(3) 16839 .n(16) 16840 .k(16) 16841 .qmin(128) 16842 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16843 } 16844 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,qmax)16845 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, qmax) { 16846 TEST_REQUIRES_ARM_NEON; 16847 GemmMicrokernelTester() 16848 .mr(3) 16849 .nr(16) 16850 .kr(4) 16851 .sr(1) 16852 .m(3) 16853 .n(16) 16854 .k(16) 16855 .qmax(128) 16856 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16857 } 16858 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP,strided_cm)16859 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_DUP, strided_cm) { 16860 TEST_REQUIRES_ARM_NEON; 16861 GemmMicrokernelTester() 16862 .mr(3) 16863 .nr(16) 16864 .kr(4) 16865 .sr(1) 16866 .m(3) 16867 .n(16) 16868 .k(16) 16869 .cm_stride(19) 16870 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16871 } 16872 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 16873 16874 16875 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_eq_16)16876 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16) { 16877 TEST_REQUIRES_ARM_NEON; 16878 GemmMicrokernelTester() 16879 .mr(3) 16880 .nr(16) 16881 .kr(4) 16882 .sr(1) 16883 .m(3) 16884 .n(16) 16885 .k(16) 16886 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16887 } 16888 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,strided_cn)16889 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, strided_cn) { 16890 TEST_REQUIRES_ARM_NEON; 16891 GemmMicrokernelTester() 16892 .mr(3) 16893 .nr(16) 16894 .kr(4) 16895 .sr(1) 16896 .m(3) 16897 .n(16) 16898 .k(16) 16899 .cn_stride(19) 16900 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16901 } 16902 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_eq_16_subtile)16903 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 16904 TEST_REQUIRES_ARM_NEON; 16905 for (uint32_t n = 1; n <= 16; n++) { 16906 for (uint32_t m = 1; m <= 3; m++) { 16907 GemmMicrokernelTester() 16908 .mr(3) 16909 .nr(16) 16910 .kr(4) 16911 .sr(1) 16912 .m(m) 16913 .n(n) 16914 .k(16) 16915 .iterations(1) 16916 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16917 } 16918 } 16919 } 16920 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)16921 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 16922 TEST_REQUIRES_ARM_NEON; 16923 for (uint32_t m = 1; m <= 3; m++) { 16924 GemmMicrokernelTester() 16925 .mr(3) 16926 .nr(16) 16927 .kr(4) 16928 .sr(1) 16929 .m(m) 16930 .n(16) 16931 .k(16) 16932 .iterations(1) 16933 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16934 } 16935 } 16936 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)16937 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 16938 TEST_REQUIRES_ARM_NEON; 16939 for (uint32_t n = 1; n <= 16; n++) { 16940 GemmMicrokernelTester() 16941 .mr(3) 16942 .nr(16) 16943 .kr(4) 16944 .sr(1) 16945 .m(3) 16946 .n(n) 16947 .k(16) 16948 .iterations(1) 16949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16950 } 16951 } 16952 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_lt_16)16953 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_lt_16) { 16954 TEST_REQUIRES_ARM_NEON; 16955 for (size_t k = 1; k < 16; k++) { 16956 GemmMicrokernelTester() 16957 .mr(3) 16958 .nr(16) 16959 .kr(4) 16960 .sr(1) 16961 .m(3) 16962 .n(16) 16963 .k(k) 16964 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16965 } 16966 } 16967 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_lt_16_subtile)16968 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 16969 TEST_REQUIRES_ARM_NEON; 16970 for (size_t k = 1; k < 16; k++) { 16971 for (uint32_t n = 1; n <= 16; n++) { 16972 for (uint32_t m = 1; m <= 3; m++) { 16973 GemmMicrokernelTester() 16974 .mr(3) 16975 .nr(16) 16976 .kr(4) 16977 .sr(1) 16978 .m(m) 16979 .n(n) 16980 .k(k) 16981 .iterations(1) 16982 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 16983 } 16984 } 16985 } 16986 } 16987 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_gt_16)16988 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_gt_16) { 16989 TEST_REQUIRES_ARM_NEON; 16990 for (size_t k = 17; k < 32; k++) { 16991 GemmMicrokernelTester() 16992 .mr(3) 16993 .nr(16) 16994 .kr(4) 16995 .sr(1) 16996 .m(3) 16997 .n(16) 16998 .k(k) 16999 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17000 } 17001 } 17002 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_gt_16_subtile)17003 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 17004 TEST_REQUIRES_ARM_NEON; 17005 for (size_t k = 17; k < 32; k++) { 17006 for (uint32_t n = 1; n <= 16; n++) { 17007 for (uint32_t m = 1; m <= 3; m++) { 17008 GemmMicrokernelTester() 17009 .mr(3) 17010 .nr(16) 17011 .kr(4) 17012 .sr(1) 17013 .m(m) 17014 .n(n) 17015 .k(k) 17016 .iterations(1) 17017 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17018 } 17019 } 17020 } 17021 } 17022 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_div_16)17023 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_div_16) { 17024 TEST_REQUIRES_ARM_NEON; 17025 for (size_t k = 32; k <= 160; k += 16) { 17026 GemmMicrokernelTester() 17027 .mr(3) 17028 .nr(16) 17029 .kr(4) 17030 .sr(1) 17031 .m(3) 17032 .n(16) 17033 .k(k) 17034 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17035 } 17036 } 17037 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,k_div_16_subtile)17038 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, k_div_16_subtile) { 17039 TEST_REQUIRES_ARM_NEON; 17040 for (size_t k = 32; k <= 160; k += 16) { 17041 for (uint32_t n = 1; n <= 16; n++) { 17042 for (uint32_t m = 1; m <= 3; m++) { 17043 GemmMicrokernelTester() 17044 .mr(3) 17045 .nr(16) 17046 .kr(4) 17047 .sr(1) 17048 .m(m) 17049 .n(n) 17050 .k(k) 17051 .iterations(1) 17052 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17053 } 17054 } 17055 } 17056 } 17057 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,n_gt_16)17058 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_gt_16) { 17059 TEST_REQUIRES_ARM_NEON; 17060 for (uint32_t n = 17; n < 32; n++) { 17061 for (size_t k = 1; k <= 80; k += 17) { 17062 GemmMicrokernelTester() 17063 .mr(3) 17064 .nr(16) 17065 .kr(4) 17066 .sr(1) 17067 .m(3) 17068 .n(n) 17069 .k(k) 17070 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17071 } 17072 } 17073 } 17074 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,n_gt_16_strided_cn)17075 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 17076 TEST_REQUIRES_ARM_NEON; 17077 for (uint32_t n = 17; n < 32; n++) { 17078 for (size_t k = 1; k <= 80; k += 17) { 17079 GemmMicrokernelTester() 17080 .mr(3) 17081 .nr(16) 17082 .kr(4) 17083 .sr(1) 17084 .m(3) 17085 .n(n) 17086 .k(k) 17087 .cn_stride(19) 17088 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17089 } 17090 } 17091 } 17092 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,n_gt_16_subtile)17093 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) { 17094 TEST_REQUIRES_ARM_NEON; 17095 for (uint32_t n = 17; n < 32; n++) { 17096 for (size_t k = 1; k <= 80; k += 17) { 17097 for (uint32_t m = 1; m <= 3; m++) { 17098 GemmMicrokernelTester() 17099 .mr(3) 17100 .nr(16) 17101 .kr(4) 17102 .sr(1) 17103 .m(m) 17104 .n(n) 17105 .k(k) 17106 .iterations(1) 17107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17108 } 17109 } 17110 } 17111 } 17112 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,n_div_16)17113 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_div_16) { 17114 TEST_REQUIRES_ARM_NEON; 17115 for (uint32_t n = 32; n <= 48; n += 16) { 17116 for (size_t k = 1; k <= 80; k += 17) { 17117 GemmMicrokernelTester() 17118 .mr(3) 17119 .nr(16) 17120 .kr(4) 17121 .sr(1) 17122 .m(3) 17123 .n(n) 17124 .k(k) 17125 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17126 } 17127 } 17128 } 17129 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,n_div_16_strided_cn)17130 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) { 17131 TEST_REQUIRES_ARM_NEON; 17132 for (uint32_t n = 32; n <= 48; n += 16) { 17133 for (size_t k = 1; k <= 80; k += 17) { 17134 GemmMicrokernelTester() 17135 .mr(3) 17136 .nr(16) 17137 .kr(4) 17138 .sr(1) 17139 .m(3) 17140 .n(n) 17141 .k(k) 17142 .cn_stride(19) 17143 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17144 } 17145 } 17146 } 17147 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,n_div_16_subtile)17148 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_div_16_subtile) { 17149 TEST_REQUIRES_ARM_NEON; 17150 for (uint32_t n = 32; n <= 48; n += 16) { 17151 for (size_t k = 1; k <= 80; k += 17) { 17152 for (uint32_t m = 1; m <= 3; m++) { 17153 GemmMicrokernelTester() 17154 .mr(3) 17155 .nr(16) 17156 .kr(4) 17157 .sr(1) 17158 .m(m) 17159 .n(n) 17160 .k(k) 17161 .iterations(1) 17162 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17163 } 17164 } 17165 } 17166 } 17167 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,small_kernel)17168 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, small_kernel) { 17169 TEST_REQUIRES_ARM_NEON; 17170 for (size_t k = 1; k <= 80; k += 17) { 17171 GemmMicrokernelTester() 17172 .mr(3) 17173 .nr(16) 17174 .kr(4) 17175 .sr(1) 17176 .m(3) 17177 .n(16) 17178 .k(k) 17179 .ks(3) 17180 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17181 } 17182 } 17183 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,small_kernel_subtile)17184 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, small_kernel_subtile) { 17185 TEST_REQUIRES_ARM_NEON; 17186 for (size_t k = 1; k <= 80; k += 17) { 17187 for (uint32_t n = 1; n <= 16; n++) { 17188 for (uint32_t m = 1; m <= 3; m++) { 17189 GemmMicrokernelTester() 17190 .mr(3) 17191 .nr(16) 17192 .kr(4) 17193 .sr(1) 17194 .m(m) 17195 .n(n) 17196 .k(k) 17197 .ks(3) 17198 .iterations(1) 17199 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17200 } 17201 } 17202 } 17203 } 17204 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,n_gt_16_small_kernel)17205 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_gt_16_small_kernel) { 17206 TEST_REQUIRES_ARM_NEON; 17207 for (uint32_t n = 17; n < 32; n++) { 17208 for (size_t k = 1; k <= 80; k += 17) { 17209 GemmMicrokernelTester() 17210 .mr(3) 17211 .nr(16) 17212 .kr(4) 17213 .sr(1) 17214 .m(3) 17215 .n(n) 17216 .k(k) 17217 .ks(3) 17218 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17219 } 17220 } 17221 } 17222 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,n_div_16_small_kernel)17223 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, n_div_16_small_kernel) { 17224 TEST_REQUIRES_ARM_NEON; 17225 for (uint32_t n = 32; n <= 48; n += 16) { 17226 for (size_t k = 1; k <= 80; k += 17) { 17227 GemmMicrokernelTester() 17228 .mr(3) 17229 .nr(16) 17230 .kr(4) 17231 .sr(1) 17232 .m(3) 17233 .n(n) 17234 .k(k) 17235 .ks(3) 17236 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17237 } 17238 } 17239 } 17240 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,strided_cm_subtile)17241 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, strided_cm_subtile) { 17242 TEST_REQUIRES_ARM_NEON; 17243 for (size_t k = 1; k <= 80; k += 17) { 17244 for (uint32_t n = 1; n <= 16; n++) { 17245 for (uint32_t m = 1; m <= 3; m++) { 17246 GemmMicrokernelTester() 17247 .mr(3) 17248 .nr(16) 17249 .kr(4) 17250 .sr(1) 17251 .m(m) 17252 .n(n) 17253 .k(k) 17254 .cm_stride(19) 17255 .iterations(1) 17256 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17257 } 17258 } 17259 } 17260 } 17261 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,a_offset)17262 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, a_offset) { 17263 TEST_REQUIRES_ARM_NEON; 17264 for (size_t k = 1; k <= 80; k += 17) { 17265 GemmMicrokernelTester() 17266 .mr(3) 17267 .nr(16) 17268 .kr(4) 17269 .sr(1) 17270 .m(3) 17271 .n(16) 17272 .k(k) 17273 .ks(3) 17274 .a_offset(251) 17275 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17276 } 17277 } 17278 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,zero)17279 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, zero) { 17280 TEST_REQUIRES_ARM_NEON; 17281 for (size_t k = 1; k <= 80; k += 17) { 17282 for (uint32_t mz = 0; mz < 3; mz++) { 17283 GemmMicrokernelTester() 17284 .mr(3) 17285 .nr(16) 17286 .kr(4) 17287 .sr(1) 17288 .m(3) 17289 .n(16) 17290 .k(k) 17291 .ks(3) 17292 .a_offset(251) 17293 .zero_index(mz) 17294 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17295 } 17296 } 17297 } 17298 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,qmin)17299 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, qmin) { 17300 TEST_REQUIRES_ARM_NEON; 17301 GemmMicrokernelTester() 17302 .mr(3) 17303 .nr(16) 17304 .kr(4) 17305 .sr(1) 17306 .m(3) 17307 .n(16) 17308 .k(16) 17309 .qmin(128) 17310 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17311 } 17312 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,qmax)17313 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, qmax) { 17314 TEST_REQUIRES_ARM_NEON; 17315 GemmMicrokernelTester() 17316 .mr(3) 17317 .nr(16) 17318 .kr(4) 17319 .sr(1) 17320 .m(3) 17321 .n(16) 17322 .k(16) 17323 .qmax(128) 17324 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17325 } 17326 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R,strided_cm)17327 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MLAL_LD2R, strided_cm) { 17328 TEST_REQUIRES_ARM_NEON; 17329 GemmMicrokernelTester() 17330 .mr(3) 17331 .nr(16) 17332 .kr(4) 17333 .sr(1) 17334 .m(3) 17335 .n(16) 17336 .k(16) 17337 .cm_stride(19) 17338 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17339 } 17340 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 17341 17342 17343 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_eq_8)17344 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8) { 17345 TEST_REQUIRES_ARM_NEON; 17346 GemmMicrokernelTester() 17347 .mr(3) 17348 .nr(16) 17349 .kr(4) 17350 .sr(1) 17351 .m(3) 17352 .n(16) 17353 .k(8) 17354 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17355 } 17356 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,strided_cn)17357 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cn) { 17358 TEST_REQUIRES_ARM_NEON; 17359 GemmMicrokernelTester() 17360 .mr(3) 17361 .nr(16) 17362 .kr(4) 17363 .sr(1) 17364 .m(3) 17365 .n(16) 17366 .k(8) 17367 .cn_stride(19) 17368 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17369 } 17370 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_eq_8_subtile)17371 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile) { 17372 TEST_REQUIRES_ARM_NEON; 17373 for (uint32_t n = 1; n <= 16; n++) { 17374 for (uint32_t m = 1; m <= 3; m++) { 17375 GemmMicrokernelTester() 17376 .mr(3) 17377 .nr(16) 17378 .kr(4) 17379 .sr(1) 17380 .m(m) 17381 .n(n) 17382 .k(8) 17383 .iterations(1) 17384 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17385 } 17386 } 17387 } 17388 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_eq_8_subtile_m)17389 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 17390 TEST_REQUIRES_ARM_NEON; 17391 for (uint32_t m = 1; m <= 3; m++) { 17392 GemmMicrokernelTester() 17393 .mr(3) 17394 .nr(16) 17395 .kr(4) 17396 .sr(1) 17397 .m(m) 17398 .n(16) 17399 .k(8) 17400 .iterations(1) 17401 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17402 } 17403 } 17404 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_eq_8_subtile_n)17405 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 17406 TEST_REQUIRES_ARM_NEON; 17407 for (uint32_t n = 1; n <= 16; n++) { 17408 GemmMicrokernelTester() 17409 .mr(3) 17410 .nr(16) 17411 .kr(4) 17412 .sr(1) 17413 .m(3) 17414 .n(n) 17415 .k(8) 17416 .iterations(1) 17417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17418 } 17419 } 17420 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_lt_8)17421 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_lt_8) { 17422 TEST_REQUIRES_ARM_NEON; 17423 for (size_t k = 1; k < 8; k++) { 17424 GemmMicrokernelTester() 17425 .mr(3) 17426 .nr(16) 17427 .kr(4) 17428 .sr(1) 17429 .m(3) 17430 .n(16) 17431 .k(k) 17432 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17433 } 17434 } 17435 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_lt_8_subtile)17436 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_lt_8_subtile) { 17437 TEST_REQUIRES_ARM_NEON; 17438 for (size_t k = 1; k < 8; k++) { 17439 for (uint32_t n = 1; n <= 16; n++) { 17440 for (uint32_t m = 1; m <= 3; m++) { 17441 GemmMicrokernelTester() 17442 .mr(3) 17443 .nr(16) 17444 .kr(4) 17445 .sr(1) 17446 .m(m) 17447 .n(n) 17448 .k(k) 17449 .iterations(1) 17450 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17451 } 17452 } 17453 } 17454 } 17455 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_gt_8)17456 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_gt_8) { 17457 TEST_REQUIRES_ARM_NEON; 17458 for (size_t k = 9; k < 16; k++) { 17459 GemmMicrokernelTester() 17460 .mr(3) 17461 .nr(16) 17462 .kr(4) 17463 .sr(1) 17464 .m(3) 17465 .n(16) 17466 .k(k) 17467 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17468 } 17469 } 17470 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_gt_8_subtile)17471 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_gt_8_subtile) { 17472 TEST_REQUIRES_ARM_NEON; 17473 for (size_t k = 9; k < 16; k++) { 17474 for (uint32_t n = 1; n <= 16; n++) { 17475 for (uint32_t m = 1; m <= 3; m++) { 17476 GemmMicrokernelTester() 17477 .mr(3) 17478 .nr(16) 17479 .kr(4) 17480 .sr(1) 17481 .m(m) 17482 .n(n) 17483 .k(k) 17484 .iterations(1) 17485 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17486 } 17487 } 17488 } 17489 } 17490 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_div_8)17491 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_div_8) { 17492 TEST_REQUIRES_ARM_NEON; 17493 for (size_t k = 16; k <= 80; k += 8) { 17494 GemmMicrokernelTester() 17495 .mr(3) 17496 .nr(16) 17497 .kr(4) 17498 .sr(1) 17499 .m(3) 17500 .n(16) 17501 .k(k) 17502 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17503 } 17504 } 17505 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,k_div_8_subtile)17506 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, k_div_8_subtile) { 17507 TEST_REQUIRES_ARM_NEON; 17508 for (size_t k = 16; k <= 80; k += 8) { 17509 for (uint32_t n = 1; n <= 16; n++) { 17510 for (uint32_t m = 1; m <= 3; m++) { 17511 GemmMicrokernelTester() 17512 .mr(3) 17513 .nr(16) 17514 .kr(4) 17515 .sr(1) 17516 .m(m) 17517 .n(n) 17518 .k(k) 17519 .iterations(1) 17520 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17521 } 17522 } 17523 } 17524 } 17525 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,n_gt_16)17526 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16) { 17527 TEST_REQUIRES_ARM_NEON; 17528 for (uint32_t n = 17; n < 32; n++) { 17529 for (size_t k = 1; k <= 40; k += 9) { 17530 GemmMicrokernelTester() 17531 .mr(3) 17532 .nr(16) 17533 .kr(4) 17534 .sr(1) 17535 .m(3) 17536 .n(n) 17537 .k(k) 17538 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17539 } 17540 } 17541 } 17542 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,n_gt_16_strided_cn)17543 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) { 17544 TEST_REQUIRES_ARM_NEON; 17545 for (uint32_t n = 17; n < 32; n++) { 17546 for (size_t k = 1; k <= 40; k += 9) { 17547 GemmMicrokernelTester() 17548 .mr(3) 17549 .nr(16) 17550 .kr(4) 17551 .sr(1) 17552 .m(3) 17553 .n(n) 17554 .k(k) 17555 .cn_stride(19) 17556 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17557 } 17558 } 17559 } 17560 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,n_gt_16_subtile)17561 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_subtile) { 17562 TEST_REQUIRES_ARM_NEON; 17563 for (uint32_t n = 17; n < 32; n++) { 17564 for (size_t k = 1; k <= 40; k += 9) { 17565 for (uint32_t m = 1; m <= 3; m++) { 17566 GemmMicrokernelTester() 17567 .mr(3) 17568 .nr(16) 17569 .kr(4) 17570 .sr(1) 17571 .m(m) 17572 .n(n) 17573 .k(k) 17574 .iterations(1) 17575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17576 } 17577 } 17578 } 17579 } 17580 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,n_div_16)17581 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16) { 17582 TEST_REQUIRES_ARM_NEON; 17583 for (uint32_t n = 32; n <= 48; n += 16) { 17584 for (size_t k = 1; k <= 40; k += 9) { 17585 GemmMicrokernelTester() 17586 .mr(3) 17587 .nr(16) 17588 .kr(4) 17589 .sr(1) 17590 .m(3) 17591 .n(n) 17592 .k(k) 17593 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17594 } 17595 } 17596 } 17597 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,n_div_16_strided_cn)17598 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) { 17599 TEST_REQUIRES_ARM_NEON; 17600 for (uint32_t n = 32; n <= 48; n += 16) { 17601 for (size_t k = 1; k <= 40; k += 9) { 17602 GemmMicrokernelTester() 17603 .mr(3) 17604 .nr(16) 17605 .kr(4) 17606 .sr(1) 17607 .m(3) 17608 .n(n) 17609 .k(k) 17610 .cn_stride(19) 17611 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17612 } 17613 } 17614 } 17615 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,n_div_16_subtile)17616 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_subtile) { 17617 TEST_REQUIRES_ARM_NEON; 17618 for (uint32_t n = 32; n <= 48; n += 16) { 17619 for (size_t k = 1; k <= 40; k += 9) { 17620 for (uint32_t m = 1; m <= 3; m++) { 17621 GemmMicrokernelTester() 17622 .mr(3) 17623 .nr(16) 17624 .kr(4) 17625 .sr(1) 17626 .m(m) 17627 .n(n) 17628 .k(k) 17629 .iterations(1) 17630 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17631 } 17632 } 17633 } 17634 } 17635 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,small_kernel)17636 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, small_kernel) { 17637 TEST_REQUIRES_ARM_NEON; 17638 for (size_t k = 1; k <= 40; k += 9) { 17639 GemmMicrokernelTester() 17640 .mr(3) 17641 .nr(16) 17642 .kr(4) 17643 .sr(1) 17644 .m(3) 17645 .n(16) 17646 .k(k) 17647 .ks(3) 17648 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17649 } 17650 } 17651 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,small_kernel_subtile)17652 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, small_kernel_subtile) { 17653 TEST_REQUIRES_ARM_NEON; 17654 for (size_t k = 1; k <= 40; k += 9) { 17655 for (uint32_t n = 1; n <= 16; n++) { 17656 for (uint32_t m = 1; m <= 3; m++) { 17657 GemmMicrokernelTester() 17658 .mr(3) 17659 .nr(16) 17660 .kr(4) 17661 .sr(1) 17662 .m(m) 17663 .n(n) 17664 .k(k) 17665 .ks(3) 17666 .iterations(1) 17667 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17668 } 17669 } 17670 } 17671 } 17672 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,n_gt_16_small_kernel)17673 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_gt_16_small_kernel) { 17674 TEST_REQUIRES_ARM_NEON; 17675 for (uint32_t n = 17; n < 32; n++) { 17676 for (size_t k = 1; k <= 40; k += 9) { 17677 GemmMicrokernelTester() 17678 .mr(3) 17679 .nr(16) 17680 .kr(4) 17681 .sr(1) 17682 .m(3) 17683 .n(n) 17684 .k(k) 17685 .ks(3) 17686 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17687 } 17688 } 17689 } 17690 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,n_div_16_small_kernel)17691 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, n_div_16_small_kernel) { 17692 TEST_REQUIRES_ARM_NEON; 17693 for (uint32_t n = 32; n <= 48; n += 16) { 17694 for (size_t k = 1; k <= 40; k += 9) { 17695 GemmMicrokernelTester() 17696 .mr(3) 17697 .nr(16) 17698 .kr(4) 17699 .sr(1) 17700 .m(3) 17701 .n(n) 17702 .k(k) 17703 .ks(3) 17704 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17705 } 17706 } 17707 } 17708 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,strided_cm_subtile)17709 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cm_subtile) { 17710 TEST_REQUIRES_ARM_NEON; 17711 for (size_t k = 1; k <= 40; k += 9) { 17712 for (uint32_t n = 1; n <= 16; n++) { 17713 for (uint32_t m = 1; m <= 3; m++) { 17714 GemmMicrokernelTester() 17715 .mr(3) 17716 .nr(16) 17717 .kr(4) 17718 .sr(1) 17719 .m(m) 17720 .n(n) 17721 .k(k) 17722 .cm_stride(19) 17723 .iterations(1) 17724 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17725 } 17726 } 17727 } 17728 } 17729 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,a_offset)17730 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, a_offset) { 17731 TEST_REQUIRES_ARM_NEON; 17732 for (size_t k = 1; k <= 40; k += 9) { 17733 GemmMicrokernelTester() 17734 .mr(3) 17735 .nr(16) 17736 .kr(4) 17737 .sr(1) 17738 .m(3) 17739 .n(16) 17740 .k(k) 17741 .ks(3) 17742 .a_offset(127) 17743 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17744 } 17745 } 17746 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,zero)17747 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, zero) { 17748 TEST_REQUIRES_ARM_NEON; 17749 for (size_t k = 1; k <= 40; k += 9) { 17750 for (uint32_t mz = 0; mz < 3; mz++) { 17751 GemmMicrokernelTester() 17752 .mr(3) 17753 .nr(16) 17754 .kr(4) 17755 .sr(1) 17756 .m(3) 17757 .n(16) 17758 .k(k) 17759 .ks(3) 17760 .a_offset(127) 17761 .zero_index(mz) 17762 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17763 } 17764 } 17765 } 17766 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,qmin)17767 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, qmin) { 17768 TEST_REQUIRES_ARM_NEON; 17769 GemmMicrokernelTester() 17770 .mr(3) 17771 .nr(16) 17772 .kr(4) 17773 .sr(1) 17774 .m(3) 17775 .n(16) 17776 .k(8) 17777 .qmin(128) 17778 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17779 } 17780 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,qmax)17781 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, qmax) { 17782 TEST_REQUIRES_ARM_NEON; 17783 GemmMicrokernelTester() 17784 .mr(3) 17785 .nr(16) 17786 .kr(4) 17787 .sr(1) 17788 .m(3) 17789 .n(16) 17790 .k(8) 17791 .qmax(128) 17792 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17793 } 17794 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R,strided_cm)17795 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD2R, strided_cm) { 17796 TEST_REQUIRES_ARM_NEON; 17797 GemmMicrokernelTester() 17798 .mr(3) 17799 .nr(16) 17800 .kr(4) 17801 .sr(1) 17802 .m(3) 17803 .n(16) 17804 .k(8) 17805 .cm_stride(19) 17806 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17807 } 17808 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 17809 17810 17811 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_eq_16)17812 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16) { 17813 TEST_REQUIRES_ARM_NEON; 17814 GemmMicrokernelTester() 17815 .mr(3) 17816 .nr(16) 17817 .kr(4) 17818 .sr(2) 17819 .m(3) 17820 .n(16) 17821 .k(16) 17822 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17823 } 17824 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,strided_cn)17825 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cn) { 17826 TEST_REQUIRES_ARM_NEON; 17827 GemmMicrokernelTester() 17828 .mr(3) 17829 .nr(16) 17830 .kr(4) 17831 .sr(2) 17832 .m(3) 17833 .n(16) 17834 .k(16) 17835 .cn_stride(19) 17836 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17837 } 17838 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_eq_16_subtile)17839 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile) { 17840 TEST_REQUIRES_ARM_NEON; 17841 for (uint32_t n = 1; n <= 16; n++) { 17842 for (uint32_t m = 1; m <= 3; m++) { 17843 GemmMicrokernelTester() 17844 .mr(3) 17845 .nr(16) 17846 .kr(4) 17847 .sr(2) 17848 .m(m) 17849 .n(n) 17850 .k(16) 17851 .iterations(1) 17852 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17853 } 17854 } 17855 } 17856 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_eq_16_subtile_m)17857 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile_m) { 17858 TEST_REQUIRES_ARM_NEON; 17859 for (uint32_t m = 1; m <= 3; m++) { 17860 GemmMicrokernelTester() 17861 .mr(3) 17862 .nr(16) 17863 .kr(4) 17864 .sr(2) 17865 .m(m) 17866 .n(16) 17867 .k(16) 17868 .iterations(1) 17869 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17870 } 17871 } 17872 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_eq_16_subtile_n)17873 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile_n) { 17874 TEST_REQUIRES_ARM_NEON; 17875 for (uint32_t n = 1; n <= 16; n++) { 17876 GemmMicrokernelTester() 17877 .mr(3) 17878 .nr(16) 17879 .kr(4) 17880 .sr(2) 17881 .m(3) 17882 .n(n) 17883 .k(16) 17884 .iterations(1) 17885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17886 } 17887 } 17888 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_lt_16)17889 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_lt_16) { 17890 TEST_REQUIRES_ARM_NEON; 17891 for (size_t k = 1; k < 16; k++) { 17892 GemmMicrokernelTester() 17893 .mr(3) 17894 .nr(16) 17895 .kr(4) 17896 .sr(2) 17897 .m(3) 17898 .n(16) 17899 .k(k) 17900 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17901 } 17902 } 17903 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_lt_16_subtile)17904 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_lt_16_subtile) { 17905 TEST_REQUIRES_ARM_NEON; 17906 for (size_t k = 1; k < 16; k++) { 17907 for (uint32_t n = 1; n <= 16; n++) { 17908 for (uint32_t m = 1; m <= 3; m++) { 17909 GemmMicrokernelTester() 17910 .mr(3) 17911 .nr(16) 17912 .kr(4) 17913 .sr(2) 17914 .m(m) 17915 .n(n) 17916 .k(k) 17917 .iterations(1) 17918 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17919 } 17920 } 17921 } 17922 } 17923 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_gt_16)17924 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_gt_16) { 17925 TEST_REQUIRES_ARM_NEON; 17926 for (size_t k = 17; k < 32; k++) { 17927 GemmMicrokernelTester() 17928 .mr(3) 17929 .nr(16) 17930 .kr(4) 17931 .sr(2) 17932 .m(3) 17933 .n(16) 17934 .k(k) 17935 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17936 } 17937 } 17938 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_gt_16_subtile)17939 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_gt_16_subtile) { 17940 TEST_REQUIRES_ARM_NEON; 17941 for (size_t k = 17; k < 32; k++) { 17942 for (uint32_t n = 1; n <= 16; n++) { 17943 for (uint32_t m = 1; m <= 3; m++) { 17944 GemmMicrokernelTester() 17945 .mr(3) 17946 .nr(16) 17947 .kr(4) 17948 .sr(2) 17949 .m(m) 17950 .n(n) 17951 .k(k) 17952 .iterations(1) 17953 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17954 } 17955 } 17956 } 17957 } 17958 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_div_16)17959 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_div_16) { 17960 TEST_REQUIRES_ARM_NEON; 17961 for (size_t k = 32; k <= 160; k += 16) { 17962 GemmMicrokernelTester() 17963 .mr(3) 17964 .nr(16) 17965 .kr(4) 17966 .sr(2) 17967 .m(3) 17968 .n(16) 17969 .k(k) 17970 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17971 } 17972 } 17973 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,k_div_16_subtile)17974 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_div_16_subtile) { 17975 TEST_REQUIRES_ARM_NEON; 17976 for (size_t k = 32; k <= 160; k += 16) { 17977 for (uint32_t n = 1; n <= 16; n++) { 17978 for (uint32_t m = 1; m <= 3; m++) { 17979 GemmMicrokernelTester() 17980 .mr(3) 17981 .nr(16) 17982 .kr(4) 17983 .sr(2) 17984 .m(m) 17985 .n(n) 17986 .k(k) 17987 .iterations(1) 17988 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 17989 } 17990 } 17991 } 17992 } 17993 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,n_gt_16)17994 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16) { 17995 TEST_REQUIRES_ARM_NEON; 17996 for (uint32_t n = 17; n < 32; n++) { 17997 for (size_t k = 1; k <= 80; k += 17) { 17998 GemmMicrokernelTester() 17999 .mr(3) 18000 .nr(16) 18001 .kr(4) 18002 .sr(2) 18003 .m(3) 18004 .n(n) 18005 .k(k) 18006 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18007 } 18008 } 18009 } 18010 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,n_gt_16_strided_cn)18011 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_strided_cn) { 18012 TEST_REQUIRES_ARM_NEON; 18013 for (uint32_t n = 17; n < 32; n++) { 18014 for (size_t k = 1; k <= 80; k += 17) { 18015 GemmMicrokernelTester() 18016 .mr(3) 18017 .nr(16) 18018 .kr(4) 18019 .sr(2) 18020 .m(3) 18021 .n(n) 18022 .k(k) 18023 .cn_stride(19) 18024 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18025 } 18026 } 18027 } 18028 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,n_gt_16_subtile)18029 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_subtile) { 18030 TEST_REQUIRES_ARM_NEON; 18031 for (uint32_t n = 17; n < 32; n++) { 18032 for (size_t k = 1; k <= 80; k += 17) { 18033 for (uint32_t m = 1; m <= 3; m++) { 18034 GemmMicrokernelTester() 18035 .mr(3) 18036 .nr(16) 18037 .kr(4) 18038 .sr(2) 18039 .m(m) 18040 .n(n) 18041 .k(k) 18042 .iterations(1) 18043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18044 } 18045 } 18046 } 18047 } 18048 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,n_div_16)18049 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16) { 18050 TEST_REQUIRES_ARM_NEON; 18051 for (uint32_t n = 32; n <= 48; n += 16) { 18052 for (size_t k = 1; k <= 80; k += 17) { 18053 GemmMicrokernelTester() 18054 .mr(3) 18055 .nr(16) 18056 .kr(4) 18057 .sr(2) 18058 .m(3) 18059 .n(n) 18060 .k(k) 18061 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18062 } 18063 } 18064 } 18065 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,n_div_16_strided_cn)18066 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_strided_cn) { 18067 TEST_REQUIRES_ARM_NEON; 18068 for (uint32_t n = 32; n <= 48; n += 16) { 18069 for (size_t k = 1; k <= 80; k += 17) { 18070 GemmMicrokernelTester() 18071 .mr(3) 18072 .nr(16) 18073 .kr(4) 18074 .sr(2) 18075 .m(3) 18076 .n(n) 18077 .k(k) 18078 .cn_stride(19) 18079 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18080 } 18081 } 18082 } 18083 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,n_div_16_subtile)18084 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_subtile) { 18085 TEST_REQUIRES_ARM_NEON; 18086 for (uint32_t n = 32; n <= 48; n += 16) { 18087 for (size_t k = 1; k <= 80; k += 17) { 18088 for (uint32_t m = 1; m <= 3; m++) { 18089 GemmMicrokernelTester() 18090 .mr(3) 18091 .nr(16) 18092 .kr(4) 18093 .sr(2) 18094 .m(m) 18095 .n(n) 18096 .k(k) 18097 .iterations(1) 18098 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18099 } 18100 } 18101 } 18102 } 18103 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,small_kernel)18104 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, small_kernel) { 18105 TEST_REQUIRES_ARM_NEON; 18106 for (size_t k = 1; k <= 80; k += 17) { 18107 GemmMicrokernelTester() 18108 .mr(3) 18109 .nr(16) 18110 .kr(4) 18111 .sr(2) 18112 .m(3) 18113 .n(16) 18114 .k(k) 18115 .ks(3) 18116 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18117 } 18118 } 18119 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,small_kernel_subtile)18120 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, small_kernel_subtile) { 18121 TEST_REQUIRES_ARM_NEON; 18122 for (size_t k = 1; k <= 80; k += 17) { 18123 for (uint32_t n = 1; n <= 16; n++) { 18124 for (uint32_t m = 1; m <= 3; m++) { 18125 GemmMicrokernelTester() 18126 .mr(3) 18127 .nr(16) 18128 .kr(4) 18129 .sr(2) 18130 .m(m) 18131 .n(n) 18132 .k(k) 18133 .ks(3) 18134 .iterations(1) 18135 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18136 } 18137 } 18138 } 18139 } 18140 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,n_gt_16_small_kernel)18141 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_small_kernel) { 18142 TEST_REQUIRES_ARM_NEON; 18143 for (uint32_t n = 17; n < 32; n++) { 18144 for (size_t k = 1; k <= 80; k += 17) { 18145 GemmMicrokernelTester() 18146 .mr(3) 18147 .nr(16) 18148 .kr(4) 18149 .sr(2) 18150 .m(3) 18151 .n(n) 18152 .k(k) 18153 .ks(3) 18154 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18155 } 18156 } 18157 } 18158 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,n_div_16_small_kernel)18159 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_small_kernel) { 18160 TEST_REQUIRES_ARM_NEON; 18161 for (uint32_t n = 32; n <= 48; n += 16) { 18162 for (size_t k = 1; k <= 80; k += 17) { 18163 GemmMicrokernelTester() 18164 .mr(3) 18165 .nr(16) 18166 .kr(4) 18167 .sr(2) 18168 .m(3) 18169 .n(n) 18170 .k(k) 18171 .ks(3) 18172 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18173 } 18174 } 18175 } 18176 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,strided_cm_subtile)18177 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cm_subtile) { 18178 TEST_REQUIRES_ARM_NEON; 18179 for (size_t k = 1; k <= 80; k += 17) { 18180 for (uint32_t n = 1; n <= 16; n++) { 18181 for (uint32_t m = 1; m <= 3; m++) { 18182 GemmMicrokernelTester() 18183 .mr(3) 18184 .nr(16) 18185 .kr(4) 18186 .sr(2) 18187 .m(m) 18188 .n(n) 18189 .k(k) 18190 .cm_stride(19) 18191 .iterations(1) 18192 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18193 } 18194 } 18195 } 18196 } 18197 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,a_offset)18198 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, a_offset) { 18199 TEST_REQUIRES_ARM_NEON; 18200 for (size_t k = 1; k <= 80; k += 17) { 18201 GemmMicrokernelTester() 18202 .mr(3) 18203 .nr(16) 18204 .kr(4) 18205 .sr(2) 18206 .m(3) 18207 .n(16) 18208 .k(k) 18209 .ks(3) 18210 .a_offset(251) 18211 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18212 } 18213 } 18214 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,zero)18215 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, zero) { 18216 TEST_REQUIRES_ARM_NEON; 18217 for (size_t k = 1; k <= 80; k += 17) { 18218 for (uint32_t mz = 0; mz < 3; mz++) { 18219 GemmMicrokernelTester() 18220 .mr(3) 18221 .nr(16) 18222 .kr(4) 18223 .sr(2) 18224 .m(3) 18225 .n(16) 18226 .k(k) 18227 .ks(3) 18228 .a_offset(251) 18229 .zero_index(mz) 18230 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18231 } 18232 } 18233 } 18234 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,qmin)18235 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, qmin) { 18236 TEST_REQUIRES_ARM_NEON; 18237 GemmMicrokernelTester() 18238 .mr(3) 18239 .nr(16) 18240 .kr(4) 18241 .sr(2) 18242 .m(3) 18243 .n(16) 18244 .k(16) 18245 .qmin(128) 18246 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18247 } 18248 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,qmax)18249 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, qmax) { 18250 TEST_REQUIRES_ARM_NEON; 18251 GemmMicrokernelTester() 18252 .mr(3) 18253 .nr(16) 18254 .kr(4) 18255 .sr(2) 18256 .m(3) 18257 .n(16) 18258 .k(16) 18259 .qmax(128) 18260 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18261 } 18262 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL,strided_cm)18263 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cm) { 18264 TEST_REQUIRES_ARM_NEON; 18265 GemmMicrokernelTester() 18266 .mr(3) 18267 .nr(16) 18268 .kr(4) 18269 .sr(2) 18270 .m(3) 18271 .n(16) 18272 .k(16) 18273 .cm_stride(19) 18274 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18275 } 18276 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 18277 18278 18279 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_eq_16)18280 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16) { 18281 TEST_REQUIRES_ARM_NEON; 18282 GemmMicrokernelTester() 18283 .mr(3) 18284 .nr(16) 18285 .kr(8) 18286 .sr(1) 18287 .m(3) 18288 .n(16) 18289 .k(16) 18290 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18291 } 18292 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,strided_cn)18293 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cn) { 18294 TEST_REQUIRES_ARM_NEON; 18295 GemmMicrokernelTester() 18296 .mr(3) 18297 .nr(16) 18298 .kr(8) 18299 .sr(1) 18300 .m(3) 18301 .n(16) 18302 .k(16) 18303 .cn_stride(19) 18304 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18305 } 18306 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_eq_16_subtile)18307 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile) { 18308 TEST_REQUIRES_ARM_NEON; 18309 for (uint32_t n = 1; n <= 16; n++) { 18310 for (uint32_t m = 1; m <= 3; m++) { 18311 GemmMicrokernelTester() 18312 .mr(3) 18313 .nr(16) 18314 .kr(8) 18315 .sr(1) 18316 .m(m) 18317 .n(n) 18318 .k(16) 18319 .iterations(1) 18320 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18321 } 18322 } 18323 } 18324 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_eq_16_subtile_m)18325 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile_m) { 18326 TEST_REQUIRES_ARM_NEON; 18327 for (uint32_t m = 1; m <= 3; m++) { 18328 GemmMicrokernelTester() 18329 .mr(3) 18330 .nr(16) 18331 .kr(8) 18332 .sr(1) 18333 .m(m) 18334 .n(16) 18335 .k(16) 18336 .iterations(1) 18337 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18338 } 18339 } 18340 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_eq_16_subtile_n)18341 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_eq_16_subtile_n) { 18342 TEST_REQUIRES_ARM_NEON; 18343 for (uint32_t n = 1; n <= 16; n++) { 18344 GemmMicrokernelTester() 18345 .mr(3) 18346 .nr(16) 18347 .kr(8) 18348 .sr(1) 18349 .m(3) 18350 .n(n) 18351 .k(16) 18352 .iterations(1) 18353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18354 } 18355 } 18356 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_lt_16)18357 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_lt_16) { 18358 TEST_REQUIRES_ARM_NEON; 18359 for (size_t k = 1; k < 16; k++) { 18360 GemmMicrokernelTester() 18361 .mr(3) 18362 .nr(16) 18363 .kr(8) 18364 .sr(1) 18365 .m(3) 18366 .n(16) 18367 .k(k) 18368 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18369 } 18370 } 18371 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_lt_16_subtile)18372 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_lt_16_subtile) { 18373 TEST_REQUIRES_ARM_NEON; 18374 for (size_t k = 1; k < 16; k++) { 18375 for (uint32_t n = 1; n <= 16; n++) { 18376 for (uint32_t m = 1; m <= 3; m++) { 18377 GemmMicrokernelTester() 18378 .mr(3) 18379 .nr(16) 18380 .kr(8) 18381 .sr(1) 18382 .m(m) 18383 .n(n) 18384 .k(k) 18385 .iterations(1) 18386 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18387 } 18388 } 18389 } 18390 } 18391 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_gt_16)18392 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_gt_16) { 18393 TEST_REQUIRES_ARM_NEON; 18394 for (size_t k = 17; k < 32; k++) { 18395 GemmMicrokernelTester() 18396 .mr(3) 18397 .nr(16) 18398 .kr(8) 18399 .sr(1) 18400 .m(3) 18401 .n(16) 18402 .k(k) 18403 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18404 } 18405 } 18406 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_gt_16_subtile)18407 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_gt_16_subtile) { 18408 TEST_REQUIRES_ARM_NEON; 18409 for (size_t k = 17; k < 32; k++) { 18410 for (uint32_t n = 1; n <= 16; n++) { 18411 for (uint32_t m = 1; m <= 3; m++) { 18412 GemmMicrokernelTester() 18413 .mr(3) 18414 .nr(16) 18415 .kr(8) 18416 .sr(1) 18417 .m(m) 18418 .n(n) 18419 .k(k) 18420 .iterations(1) 18421 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18422 } 18423 } 18424 } 18425 } 18426 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_div_16)18427 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_div_16) { 18428 TEST_REQUIRES_ARM_NEON; 18429 for (size_t k = 32; k <= 160; k += 16) { 18430 GemmMicrokernelTester() 18431 .mr(3) 18432 .nr(16) 18433 .kr(8) 18434 .sr(1) 18435 .m(3) 18436 .n(16) 18437 .k(k) 18438 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18439 } 18440 } 18441 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,k_div_16_subtile)18442 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, k_div_16_subtile) { 18443 TEST_REQUIRES_ARM_NEON; 18444 for (size_t k = 32; k <= 160; k += 16) { 18445 for (uint32_t n = 1; n <= 16; n++) { 18446 for (uint32_t m = 1; m <= 3; m++) { 18447 GemmMicrokernelTester() 18448 .mr(3) 18449 .nr(16) 18450 .kr(8) 18451 .sr(1) 18452 .m(m) 18453 .n(n) 18454 .k(k) 18455 .iterations(1) 18456 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18457 } 18458 } 18459 } 18460 } 18461 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,n_gt_16)18462 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16) { 18463 TEST_REQUIRES_ARM_NEON; 18464 for (uint32_t n = 17; n < 32; n++) { 18465 for (size_t k = 1; k <= 80; k += 17) { 18466 GemmMicrokernelTester() 18467 .mr(3) 18468 .nr(16) 18469 .kr(8) 18470 .sr(1) 18471 .m(3) 18472 .n(n) 18473 .k(k) 18474 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18475 } 18476 } 18477 } 18478 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,n_gt_16_strided_cn)18479 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_strided_cn) { 18480 TEST_REQUIRES_ARM_NEON; 18481 for (uint32_t n = 17; n < 32; n++) { 18482 for (size_t k = 1; k <= 80; k += 17) { 18483 GemmMicrokernelTester() 18484 .mr(3) 18485 .nr(16) 18486 .kr(8) 18487 .sr(1) 18488 .m(3) 18489 .n(n) 18490 .k(k) 18491 .cn_stride(19) 18492 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18493 } 18494 } 18495 } 18496 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,n_gt_16_subtile)18497 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_subtile) { 18498 TEST_REQUIRES_ARM_NEON; 18499 for (uint32_t n = 17; n < 32; n++) { 18500 for (size_t k = 1; k <= 80; k += 17) { 18501 for (uint32_t m = 1; m <= 3; m++) { 18502 GemmMicrokernelTester() 18503 .mr(3) 18504 .nr(16) 18505 .kr(8) 18506 .sr(1) 18507 .m(m) 18508 .n(n) 18509 .k(k) 18510 .iterations(1) 18511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18512 } 18513 } 18514 } 18515 } 18516 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,n_div_16)18517 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16) { 18518 TEST_REQUIRES_ARM_NEON; 18519 for (uint32_t n = 32; n <= 48; n += 16) { 18520 for (size_t k = 1; k <= 80; k += 17) { 18521 GemmMicrokernelTester() 18522 .mr(3) 18523 .nr(16) 18524 .kr(8) 18525 .sr(1) 18526 .m(3) 18527 .n(n) 18528 .k(k) 18529 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18530 } 18531 } 18532 } 18533 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,n_div_16_strided_cn)18534 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_strided_cn) { 18535 TEST_REQUIRES_ARM_NEON; 18536 for (uint32_t n = 32; n <= 48; n += 16) { 18537 for (size_t k = 1; k <= 80; k += 17) { 18538 GemmMicrokernelTester() 18539 .mr(3) 18540 .nr(16) 18541 .kr(8) 18542 .sr(1) 18543 .m(3) 18544 .n(n) 18545 .k(k) 18546 .cn_stride(19) 18547 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18548 } 18549 } 18550 } 18551 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,n_div_16_subtile)18552 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_subtile) { 18553 TEST_REQUIRES_ARM_NEON; 18554 for (uint32_t n = 32; n <= 48; n += 16) { 18555 for (size_t k = 1; k <= 80; k += 17) { 18556 for (uint32_t m = 1; m <= 3; m++) { 18557 GemmMicrokernelTester() 18558 .mr(3) 18559 .nr(16) 18560 .kr(8) 18561 .sr(1) 18562 .m(m) 18563 .n(n) 18564 .k(k) 18565 .iterations(1) 18566 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18567 } 18568 } 18569 } 18570 } 18571 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,small_kernel)18572 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, small_kernel) { 18573 TEST_REQUIRES_ARM_NEON; 18574 for (size_t k = 1; k <= 80; k += 17) { 18575 GemmMicrokernelTester() 18576 .mr(3) 18577 .nr(16) 18578 .kr(8) 18579 .sr(1) 18580 .m(3) 18581 .n(16) 18582 .k(k) 18583 .ks(3) 18584 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18585 } 18586 } 18587 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,small_kernel_subtile)18588 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, small_kernel_subtile) { 18589 TEST_REQUIRES_ARM_NEON; 18590 for (size_t k = 1; k <= 80; k += 17) { 18591 for (uint32_t n = 1; n <= 16; n++) { 18592 for (uint32_t m = 1; m <= 3; m++) { 18593 GemmMicrokernelTester() 18594 .mr(3) 18595 .nr(16) 18596 .kr(8) 18597 .sr(1) 18598 .m(m) 18599 .n(n) 18600 .k(k) 18601 .ks(3) 18602 .iterations(1) 18603 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18604 } 18605 } 18606 } 18607 } 18608 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,n_gt_16_small_kernel)18609 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_gt_16_small_kernel) { 18610 TEST_REQUIRES_ARM_NEON; 18611 for (uint32_t n = 17; n < 32; n++) { 18612 for (size_t k = 1; k <= 80; k += 17) { 18613 GemmMicrokernelTester() 18614 .mr(3) 18615 .nr(16) 18616 .kr(8) 18617 .sr(1) 18618 .m(3) 18619 .n(n) 18620 .k(k) 18621 .ks(3) 18622 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18623 } 18624 } 18625 } 18626 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,n_div_16_small_kernel)18627 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, n_div_16_small_kernel) { 18628 TEST_REQUIRES_ARM_NEON; 18629 for (uint32_t n = 32; n <= 48; n += 16) { 18630 for (size_t k = 1; k <= 80; k += 17) { 18631 GemmMicrokernelTester() 18632 .mr(3) 18633 .nr(16) 18634 .kr(8) 18635 .sr(1) 18636 .m(3) 18637 .n(n) 18638 .k(k) 18639 .ks(3) 18640 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18641 } 18642 } 18643 } 18644 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,strided_cm_subtile)18645 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cm_subtile) { 18646 TEST_REQUIRES_ARM_NEON; 18647 for (size_t k = 1; k <= 80; k += 17) { 18648 for (uint32_t n = 1; n <= 16; n++) { 18649 for (uint32_t m = 1; m <= 3; m++) { 18650 GemmMicrokernelTester() 18651 .mr(3) 18652 .nr(16) 18653 .kr(8) 18654 .sr(1) 18655 .m(m) 18656 .n(n) 18657 .k(k) 18658 .cm_stride(19) 18659 .iterations(1) 18660 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18661 } 18662 } 18663 } 18664 } 18665 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,a_offset)18666 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, a_offset) { 18667 TEST_REQUIRES_ARM_NEON; 18668 for (size_t k = 1; k <= 80; k += 17) { 18669 GemmMicrokernelTester() 18670 .mr(3) 18671 .nr(16) 18672 .kr(8) 18673 .sr(1) 18674 .m(3) 18675 .n(16) 18676 .k(k) 18677 .ks(3) 18678 .a_offset(251) 18679 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18680 } 18681 } 18682 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,zero)18683 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, zero) { 18684 TEST_REQUIRES_ARM_NEON; 18685 for (size_t k = 1; k <= 80; k += 17) { 18686 for (uint32_t mz = 0; mz < 3; mz++) { 18687 GemmMicrokernelTester() 18688 .mr(3) 18689 .nr(16) 18690 .kr(8) 18691 .sr(1) 18692 .m(3) 18693 .n(16) 18694 .k(k) 18695 .ks(3) 18696 .a_offset(251) 18697 .zero_index(mz) 18698 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18699 } 18700 } 18701 } 18702 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,qmin)18703 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, qmin) { 18704 TEST_REQUIRES_ARM_NEON; 18705 GemmMicrokernelTester() 18706 .mr(3) 18707 .nr(16) 18708 .kr(8) 18709 .sr(1) 18710 .m(3) 18711 .n(16) 18712 .k(16) 18713 .qmin(128) 18714 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18715 } 18716 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,qmax)18717 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, qmax) { 18718 TEST_REQUIRES_ARM_NEON; 18719 GemmMicrokernelTester() 18720 .mr(3) 18721 .nr(16) 18722 .kr(8) 18723 .sr(1) 18724 .m(3) 18725 .n(16) 18726 .k(16) 18727 .qmax(128) 18728 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18729 } 18730 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL,strided_cm)18731 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MLAL, strided_cm) { 18732 TEST_REQUIRES_ARM_NEON; 18733 GemmMicrokernelTester() 18734 .mr(3) 18735 .nr(16) 18736 .kr(8) 18737 .sr(1) 18738 .m(3) 18739 .n(16) 18740 .k(16) 18741 .cm_stride(19) 18742 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18743 } 18744 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 18745 18746 18747 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8)18748 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8) { 18749 TEST_REQUIRES_ARM_NEON; 18750 GemmMicrokernelTester() 18751 .mr(4) 18752 .nr(8) 18753 .kr(1) 18754 .sr(1) 18755 .m(4) 18756 .n(8) 18757 .k(8) 18758 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18759 } 18760 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cn)18761 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cn) { 18762 TEST_REQUIRES_ARM_NEON; 18763 GemmMicrokernelTester() 18764 .mr(4) 18765 .nr(8) 18766 .kr(1) 18767 .sr(1) 18768 .m(4) 18769 .n(8) 18770 .k(8) 18771 .cn_stride(11) 18772 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18773 } 18774 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile)18775 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile) { 18776 TEST_REQUIRES_ARM_NEON; 18777 for (uint32_t n = 1; n <= 8; n++) { 18778 for (uint32_t m = 1; m <= 4; m++) { 18779 GemmMicrokernelTester() 18780 .mr(4) 18781 .nr(8) 18782 .kr(1) 18783 .sr(1) 18784 .m(m) 18785 .n(n) 18786 .k(8) 18787 .iterations(1) 18788 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18789 } 18790 } 18791 } 18792 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile_m)18793 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) { 18794 TEST_REQUIRES_ARM_NEON; 18795 for (uint32_t m = 1; m <= 4; m++) { 18796 GemmMicrokernelTester() 18797 .mr(4) 18798 .nr(8) 18799 .kr(1) 18800 .sr(1) 18801 .m(m) 18802 .n(8) 18803 .k(8) 18804 .iterations(1) 18805 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18806 } 18807 } 18808 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_eq_8_subtile_n)18809 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) { 18810 TEST_REQUIRES_ARM_NEON; 18811 for (uint32_t n = 1; n <= 8; n++) { 18812 GemmMicrokernelTester() 18813 .mr(4) 18814 .nr(8) 18815 .kr(1) 18816 .sr(1) 18817 .m(4) 18818 .n(n) 18819 .k(8) 18820 .iterations(1) 18821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18822 } 18823 } 18824 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_lt_8)18825 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8) { 18826 TEST_REQUIRES_ARM_NEON; 18827 for (size_t k = 1; k < 8; k++) { 18828 GemmMicrokernelTester() 18829 .mr(4) 18830 .nr(8) 18831 .kr(1) 18832 .sr(1) 18833 .m(4) 18834 .n(8) 18835 .k(k) 18836 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18837 } 18838 } 18839 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_lt_8_subtile)18840 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_subtile) { 18841 TEST_REQUIRES_ARM_NEON; 18842 for (size_t k = 1; k < 8; k++) { 18843 for (uint32_t n = 1; n <= 8; n++) { 18844 for (uint32_t m = 1; m <= 4; m++) { 18845 GemmMicrokernelTester() 18846 .mr(4) 18847 .nr(8) 18848 .kr(1) 18849 .sr(1) 18850 .m(m) 18851 .n(n) 18852 .k(k) 18853 .iterations(1) 18854 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18855 } 18856 } 18857 } 18858 } 18859 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_gt_8)18860 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8) { 18861 TEST_REQUIRES_ARM_NEON; 18862 for (size_t k = 9; k < 16; k++) { 18863 GemmMicrokernelTester() 18864 .mr(4) 18865 .nr(8) 18866 .kr(1) 18867 .sr(1) 18868 .m(4) 18869 .n(8) 18870 .k(k) 18871 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18872 } 18873 } 18874 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_gt_8_subtile)18875 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_subtile) { 18876 TEST_REQUIRES_ARM_NEON; 18877 for (size_t k = 9; k < 16; k++) { 18878 for (uint32_t n = 1; n <= 8; n++) { 18879 for (uint32_t m = 1; m <= 4; m++) { 18880 GemmMicrokernelTester() 18881 .mr(4) 18882 .nr(8) 18883 .kr(1) 18884 .sr(1) 18885 .m(m) 18886 .n(n) 18887 .k(k) 18888 .iterations(1) 18889 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18890 } 18891 } 18892 } 18893 } 18894 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_div_8)18895 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8) { 18896 TEST_REQUIRES_ARM_NEON; 18897 for (size_t k = 16; k <= 80; k += 8) { 18898 GemmMicrokernelTester() 18899 .mr(4) 18900 .nr(8) 18901 .kr(1) 18902 .sr(1) 18903 .m(4) 18904 .n(8) 18905 .k(k) 18906 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18907 } 18908 } 18909 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,k_div_8_subtile)18910 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_subtile) { 18911 TEST_REQUIRES_ARM_NEON; 18912 for (size_t k = 16; k <= 80; k += 8) { 18913 for (uint32_t n = 1; n <= 8; n++) { 18914 for (uint32_t m = 1; m <= 4; m++) { 18915 GemmMicrokernelTester() 18916 .mr(4) 18917 .nr(8) 18918 .kr(1) 18919 .sr(1) 18920 .m(m) 18921 .n(n) 18922 .k(k) 18923 .iterations(1) 18924 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18925 } 18926 } 18927 } 18928 } 18929 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8)18930 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8) { 18931 TEST_REQUIRES_ARM_NEON; 18932 for (uint32_t n = 9; n < 16; n++) { 18933 for (size_t k = 1; k <= 40; k += 9) { 18934 GemmMicrokernelTester() 18935 .mr(4) 18936 .nr(8) 18937 .kr(1) 18938 .sr(1) 18939 .m(4) 18940 .n(n) 18941 .k(k) 18942 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18943 } 18944 } 18945 } 18946 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_strided_cn)18947 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) { 18948 TEST_REQUIRES_ARM_NEON; 18949 for (uint32_t n = 9; n < 16; n++) { 18950 for (size_t k = 1; k <= 40; k += 9) { 18951 GemmMicrokernelTester() 18952 .mr(4) 18953 .nr(8) 18954 .kr(1) 18955 .sr(1) 18956 .m(4) 18957 .n(n) 18958 .k(k) 18959 .cn_stride(11) 18960 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18961 } 18962 } 18963 } 18964 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_subtile)18965 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_subtile) { 18966 TEST_REQUIRES_ARM_NEON; 18967 for (uint32_t n = 9; n < 16; n++) { 18968 for (size_t k = 1; k <= 40; k += 9) { 18969 for (uint32_t m = 1; m <= 4; m++) { 18970 GemmMicrokernelTester() 18971 .mr(4) 18972 .nr(8) 18973 .kr(1) 18974 .sr(1) 18975 .m(m) 18976 .n(n) 18977 .k(k) 18978 .iterations(1) 18979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18980 } 18981 } 18982 } 18983 } 18984 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8)18985 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8) { 18986 TEST_REQUIRES_ARM_NEON; 18987 for (uint32_t n = 16; n <= 24; n += 8) { 18988 for (size_t k = 1; k <= 40; k += 9) { 18989 GemmMicrokernelTester() 18990 .mr(4) 18991 .nr(8) 18992 .kr(1) 18993 .sr(1) 18994 .m(4) 18995 .n(n) 18996 .k(k) 18997 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 18998 } 18999 } 19000 } 19001 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_strided_cn)19002 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) { 19003 TEST_REQUIRES_ARM_NEON; 19004 for (uint32_t n = 16; n <= 24; n += 8) { 19005 for (size_t k = 1; k <= 40; k += 9) { 19006 GemmMicrokernelTester() 19007 .mr(4) 19008 .nr(8) 19009 .kr(1) 19010 .sr(1) 19011 .m(4) 19012 .n(n) 19013 .k(k) 19014 .cn_stride(11) 19015 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19016 } 19017 } 19018 } 19019 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_subtile)19020 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_subtile) { 19021 TEST_REQUIRES_ARM_NEON; 19022 for (uint32_t n = 16; n <= 24; n += 8) { 19023 for (size_t k = 1; k <= 40; k += 9) { 19024 for (uint32_t m = 1; m <= 4; m++) { 19025 GemmMicrokernelTester() 19026 .mr(4) 19027 .nr(8) 19028 .kr(1) 19029 .sr(1) 19030 .m(m) 19031 .n(n) 19032 .k(k) 19033 .iterations(1) 19034 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19035 } 19036 } 19037 } 19038 } 19039 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,small_kernel)19040 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel) { 19041 TEST_REQUIRES_ARM_NEON; 19042 for (size_t k = 1; k <= 40; k += 9) { 19043 GemmMicrokernelTester() 19044 .mr(4) 19045 .nr(8) 19046 .kr(1) 19047 .sr(1) 19048 .m(4) 19049 .n(8) 19050 .k(k) 19051 .ks(3) 19052 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19053 } 19054 } 19055 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,small_kernel_subtile)19056 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel_subtile) { 19057 TEST_REQUIRES_ARM_NEON; 19058 for (size_t k = 1; k <= 40; k += 9) { 19059 for (uint32_t n = 1; n <= 8; n++) { 19060 for (uint32_t m = 1; m <= 4; m++) { 19061 GemmMicrokernelTester() 19062 .mr(4) 19063 .nr(8) 19064 .kr(1) 19065 .sr(1) 19066 .m(m) 19067 .n(n) 19068 .k(k) 19069 .ks(3) 19070 .iterations(1) 19071 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19072 } 19073 } 19074 } 19075 } 19076 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_gt_8_small_kernel)19077 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) { 19078 TEST_REQUIRES_ARM_NEON; 19079 for (uint32_t n = 9; n < 16; n++) { 19080 for (size_t k = 1; k <= 40; k += 9) { 19081 GemmMicrokernelTester() 19082 .mr(4) 19083 .nr(8) 19084 .kr(1) 19085 .sr(1) 19086 .m(4) 19087 .n(n) 19088 .k(k) 19089 .ks(3) 19090 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19091 } 19092 } 19093 } 19094 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,n_div_8_small_kernel)19095 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) { 19096 TEST_REQUIRES_ARM_NEON; 19097 for (uint32_t n = 16; n <= 24; n += 8) { 19098 for (size_t k = 1; k <= 40; k += 9) { 19099 GemmMicrokernelTester() 19100 .mr(4) 19101 .nr(8) 19102 .kr(1) 19103 .sr(1) 19104 .m(4) 19105 .n(n) 19106 .k(k) 19107 .ks(3) 19108 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19109 } 19110 } 19111 } 19112 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cm_subtile)19113 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm_subtile) { 19114 TEST_REQUIRES_ARM_NEON; 19115 for (size_t k = 1; k <= 40; k += 9) { 19116 for (uint32_t n = 1; n <= 8; n++) { 19117 for (uint32_t m = 1; m <= 4; m++) { 19118 GemmMicrokernelTester() 19119 .mr(4) 19120 .nr(8) 19121 .kr(1) 19122 .sr(1) 19123 .m(m) 19124 .n(n) 19125 .k(k) 19126 .cm_stride(11) 19127 .iterations(1) 19128 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19129 } 19130 } 19131 } 19132 } 19133 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,a_offset)19134 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, a_offset) { 19135 TEST_REQUIRES_ARM_NEON; 19136 for (size_t k = 1; k <= 40; k += 9) { 19137 GemmMicrokernelTester() 19138 .mr(4) 19139 .nr(8) 19140 .kr(1) 19141 .sr(1) 19142 .m(4) 19143 .n(8) 19144 .k(k) 19145 .ks(3) 19146 .a_offset(163) 19147 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19148 } 19149 } 19150 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,zero)19151 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, zero) { 19152 TEST_REQUIRES_ARM_NEON; 19153 for (size_t k = 1; k <= 40; k += 9) { 19154 for (uint32_t mz = 0; mz < 4; mz++) { 19155 GemmMicrokernelTester() 19156 .mr(4) 19157 .nr(8) 19158 .kr(1) 19159 .sr(1) 19160 .m(4) 19161 .n(8) 19162 .k(k) 19163 .ks(3) 19164 .a_offset(163) 19165 .zero_index(mz) 19166 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19167 } 19168 } 19169 } 19170 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,qmin)19171 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmin) { 19172 TEST_REQUIRES_ARM_NEON; 19173 GemmMicrokernelTester() 19174 .mr(4) 19175 .nr(8) 19176 .kr(1) 19177 .sr(1) 19178 .m(4) 19179 .n(8) 19180 .k(8) 19181 .qmin(128) 19182 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19183 } 19184 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,qmax)19185 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmax) { 19186 TEST_REQUIRES_ARM_NEON; 19187 GemmMicrokernelTester() 19188 .mr(4) 19189 .nr(8) 19190 .kr(1) 19191 .sr(1) 19192 .m(4) 19193 .n(8) 19194 .k(8) 19195 .qmax(128) 19196 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19197 } 19198 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE,strided_cm)19199 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm) { 19200 TEST_REQUIRES_ARM_NEON; 19201 GemmMicrokernelTester() 19202 .mr(4) 19203 .nr(8) 19204 .kr(1) 19205 .sr(1) 19206 .m(4) 19207 .n(8) 19208 .k(8) 19209 .cm_stride(11) 19210 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19211 } 19212 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 19213 19214 19215 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_eq_8)19216 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8) { 19217 TEST_REQUIRES_ARM_NEON; 19218 GemmMicrokernelTester() 19219 .mr(4) 19220 .nr(8) 19221 .kr(1) 19222 .sr(1) 19223 .m(4) 19224 .n(8) 19225 .k(8) 19226 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19227 } 19228 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,strided_cn)19229 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cn) { 19230 TEST_REQUIRES_ARM_NEON; 19231 GemmMicrokernelTester() 19232 .mr(4) 19233 .nr(8) 19234 .kr(1) 19235 .sr(1) 19236 .m(4) 19237 .n(8) 19238 .k(8) 19239 .cn_stride(11) 19240 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19241 } 19242 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile)19243 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) { 19244 TEST_REQUIRES_ARM_NEON; 19245 for (uint32_t n = 1; n <= 8; n++) { 19246 for (uint32_t m = 1; m <= 4; m++) { 19247 GemmMicrokernelTester() 19248 .mr(4) 19249 .nr(8) 19250 .kr(1) 19251 .sr(1) 19252 .m(m) 19253 .n(n) 19254 .k(8) 19255 .iterations(1) 19256 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19257 } 19258 } 19259 } 19260 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_m)19261 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) { 19262 TEST_REQUIRES_ARM_NEON; 19263 for (uint32_t m = 1; m <= 4; m++) { 19264 GemmMicrokernelTester() 19265 .mr(4) 19266 .nr(8) 19267 .kr(1) 19268 .sr(1) 19269 .m(m) 19270 .n(8) 19271 .k(8) 19272 .iterations(1) 19273 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19274 } 19275 } 19276 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_eq_8_subtile_n)19277 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) { 19278 TEST_REQUIRES_ARM_NEON; 19279 for (uint32_t n = 1; n <= 8; n++) { 19280 GemmMicrokernelTester() 19281 .mr(4) 19282 .nr(8) 19283 .kr(1) 19284 .sr(1) 19285 .m(4) 19286 .n(n) 19287 .k(8) 19288 .iterations(1) 19289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19290 } 19291 } 19292 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_lt_8)19293 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_lt_8) { 19294 TEST_REQUIRES_ARM_NEON; 19295 for (size_t k = 1; k < 8; k++) { 19296 GemmMicrokernelTester() 19297 .mr(4) 19298 .nr(8) 19299 .kr(1) 19300 .sr(1) 19301 .m(4) 19302 .n(8) 19303 .k(k) 19304 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19305 } 19306 } 19307 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_lt_8_subtile)19308 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) { 19309 TEST_REQUIRES_ARM_NEON; 19310 for (size_t k = 1; k < 8; k++) { 19311 for (uint32_t n = 1; n <= 8; n++) { 19312 for (uint32_t m = 1; m <= 4; m++) { 19313 GemmMicrokernelTester() 19314 .mr(4) 19315 .nr(8) 19316 .kr(1) 19317 .sr(1) 19318 .m(m) 19319 .n(n) 19320 .k(k) 19321 .iterations(1) 19322 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19323 } 19324 } 19325 } 19326 } 19327 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_gt_8)19328 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_gt_8) { 19329 TEST_REQUIRES_ARM_NEON; 19330 for (size_t k = 9; k < 16; k++) { 19331 GemmMicrokernelTester() 19332 .mr(4) 19333 .nr(8) 19334 .kr(1) 19335 .sr(1) 19336 .m(4) 19337 .n(8) 19338 .k(k) 19339 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19340 } 19341 } 19342 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_gt_8_subtile)19343 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) { 19344 TEST_REQUIRES_ARM_NEON; 19345 for (size_t k = 9; k < 16; k++) { 19346 for (uint32_t n = 1; n <= 8; n++) { 19347 for (uint32_t m = 1; m <= 4; m++) { 19348 GemmMicrokernelTester() 19349 .mr(4) 19350 .nr(8) 19351 .kr(1) 19352 .sr(1) 19353 .m(m) 19354 .n(n) 19355 .k(k) 19356 .iterations(1) 19357 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19358 } 19359 } 19360 } 19361 } 19362 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_div_8)19363 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_div_8) { 19364 TEST_REQUIRES_ARM_NEON; 19365 for (size_t k = 16; k <= 80; k += 8) { 19366 GemmMicrokernelTester() 19367 .mr(4) 19368 .nr(8) 19369 .kr(1) 19370 .sr(1) 19371 .m(4) 19372 .n(8) 19373 .k(k) 19374 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19375 } 19376 } 19377 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,k_div_8_subtile)19378 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) { 19379 TEST_REQUIRES_ARM_NEON; 19380 for (size_t k = 16; k <= 80; k += 8) { 19381 for (uint32_t n = 1; n <= 8; n++) { 19382 for (uint32_t m = 1; m <= 4; m++) { 19383 GemmMicrokernelTester() 19384 .mr(4) 19385 .nr(8) 19386 .kr(1) 19387 .sr(1) 19388 .m(m) 19389 .n(n) 19390 .k(k) 19391 .iterations(1) 19392 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19393 } 19394 } 19395 } 19396 } 19397 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,n_gt_8)19398 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8) { 19399 TEST_REQUIRES_ARM_NEON; 19400 for (uint32_t n = 9; n < 16; n++) { 19401 for (size_t k = 1; k <= 40; k += 9) { 19402 GemmMicrokernelTester() 19403 .mr(4) 19404 .nr(8) 19405 .kr(1) 19406 .sr(1) 19407 .m(4) 19408 .n(n) 19409 .k(k) 19410 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19411 } 19412 } 19413 } 19414 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,n_gt_8_strided_cn)19415 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) { 19416 TEST_REQUIRES_ARM_NEON; 19417 for (uint32_t n = 9; n < 16; n++) { 19418 for (size_t k = 1; k <= 40; k += 9) { 19419 GemmMicrokernelTester() 19420 .mr(4) 19421 .nr(8) 19422 .kr(1) 19423 .sr(1) 19424 .m(4) 19425 .n(n) 19426 .k(k) 19427 .cn_stride(11) 19428 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19429 } 19430 } 19431 } 19432 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,n_gt_8_subtile)19433 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) { 19434 TEST_REQUIRES_ARM_NEON; 19435 for (uint32_t n = 9; n < 16; n++) { 19436 for (size_t k = 1; k <= 40; k += 9) { 19437 for (uint32_t m = 1; m <= 4; m++) { 19438 GemmMicrokernelTester() 19439 .mr(4) 19440 .nr(8) 19441 .kr(1) 19442 .sr(1) 19443 .m(m) 19444 .n(n) 19445 .k(k) 19446 .iterations(1) 19447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19448 } 19449 } 19450 } 19451 } 19452 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,n_div_8)19453 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8) { 19454 TEST_REQUIRES_ARM_NEON; 19455 for (uint32_t n = 16; n <= 24; n += 8) { 19456 for (size_t k = 1; k <= 40; k += 9) { 19457 GemmMicrokernelTester() 19458 .mr(4) 19459 .nr(8) 19460 .kr(1) 19461 .sr(1) 19462 .m(4) 19463 .n(n) 19464 .k(k) 19465 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19466 } 19467 } 19468 } 19469 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,n_div_8_strided_cn)19470 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) { 19471 TEST_REQUIRES_ARM_NEON; 19472 for (uint32_t n = 16; n <= 24; n += 8) { 19473 for (size_t k = 1; k <= 40; k += 9) { 19474 GemmMicrokernelTester() 19475 .mr(4) 19476 .nr(8) 19477 .kr(1) 19478 .sr(1) 19479 .m(4) 19480 .n(n) 19481 .k(k) 19482 .cn_stride(11) 19483 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19484 } 19485 } 19486 } 19487 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,n_div_8_subtile)19488 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) { 19489 TEST_REQUIRES_ARM_NEON; 19490 for (uint32_t n = 16; n <= 24; n += 8) { 19491 for (size_t k = 1; k <= 40; k += 9) { 19492 for (uint32_t m = 1; m <= 4; m++) { 19493 GemmMicrokernelTester() 19494 .mr(4) 19495 .nr(8) 19496 .kr(1) 19497 .sr(1) 19498 .m(m) 19499 .n(n) 19500 .k(k) 19501 .iterations(1) 19502 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19503 } 19504 } 19505 } 19506 } 19507 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,small_kernel)19508 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, small_kernel) { 19509 TEST_REQUIRES_ARM_NEON; 19510 for (size_t k = 1; k <= 40; k += 9) { 19511 GemmMicrokernelTester() 19512 .mr(4) 19513 .nr(8) 19514 .kr(1) 19515 .sr(1) 19516 .m(4) 19517 .n(8) 19518 .k(k) 19519 .ks(3) 19520 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19521 } 19522 } 19523 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,small_kernel_subtile)19524 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) { 19525 TEST_REQUIRES_ARM_NEON; 19526 for (size_t k = 1; k <= 40; k += 9) { 19527 for (uint32_t n = 1; n <= 8; n++) { 19528 for (uint32_t m = 1; m <= 4; m++) { 19529 GemmMicrokernelTester() 19530 .mr(4) 19531 .nr(8) 19532 .kr(1) 19533 .sr(1) 19534 .m(m) 19535 .n(n) 19536 .k(k) 19537 .ks(3) 19538 .iterations(1) 19539 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19540 } 19541 } 19542 } 19543 } 19544 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,n_gt_8_small_kernel)19545 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) { 19546 TEST_REQUIRES_ARM_NEON; 19547 for (uint32_t n = 9; n < 16; n++) { 19548 for (size_t k = 1; k <= 40; k += 9) { 19549 GemmMicrokernelTester() 19550 .mr(4) 19551 .nr(8) 19552 .kr(1) 19553 .sr(1) 19554 .m(4) 19555 .n(n) 19556 .k(k) 19557 .ks(3) 19558 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19559 } 19560 } 19561 } 19562 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,n_div_8_small_kernel)19563 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) { 19564 TEST_REQUIRES_ARM_NEON; 19565 for (uint32_t n = 16; n <= 24; n += 8) { 19566 for (size_t k = 1; k <= 40; k += 9) { 19567 GemmMicrokernelTester() 19568 .mr(4) 19569 .nr(8) 19570 .kr(1) 19571 .sr(1) 19572 .m(4) 19573 .n(n) 19574 .k(k) 19575 .ks(3) 19576 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19577 } 19578 } 19579 } 19580 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,strided_cm_subtile)19581 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) { 19582 TEST_REQUIRES_ARM_NEON; 19583 for (size_t k = 1; k <= 40; k += 9) { 19584 for (uint32_t n = 1; n <= 8; n++) { 19585 for (uint32_t m = 1; m <= 4; m++) { 19586 GemmMicrokernelTester() 19587 .mr(4) 19588 .nr(8) 19589 .kr(1) 19590 .sr(1) 19591 .m(m) 19592 .n(n) 19593 .k(k) 19594 .cm_stride(11) 19595 .iterations(1) 19596 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19597 } 19598 } 19599 } 19600 } 19601 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,a_offset)19602 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, a_offset) { 19603 TEST_REQUIRES_ARM_NEON; 19604 for (size_t k = 1; k <= 40; k += 9) { 19605 GemmMicrokernelTester() 19606 .mr(4) 19607 .nr(8) 19608 .kr(1) 19609 .sr(1) 19610 .m(4) 19611 .n(8) 19612 .k(k) 19613 .ks(3) 19614 .a_offset(163) 19615 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19616 } 19617 } 19618 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,zero)19619 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, zero) { 19620 TEST_REQUIRES_ARM_NEON; 19621 for (size_t k = 1; k <= 40; k += 9) { 19622 for (uint32_t mz = 0; mz < 4; mz++) { 19623 GemmMicrokernelTester() 19624 .mr(4) 19625 .nr(8) 19626 .kr(1) 19627 .sr(1) 19628 .m(4) 19629 .n(8) 19630 .k(k) 19631 .ks(3) 19632 .a_offset(163) 19633 .zero_index(mz) 19634 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19635 } 19636 } 19637 } 19638 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,qmin)19639 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, qmin) { 19640 TEST_REQUIRES_ARM_NEON; 19641 GemmMicrokernelTester() 19642 .mr(4) 19643 .nr(8) 19644 .kr(1) 19645 .sr(1) 19646 .m(4) 19647 .n(8) 19648 .k(8) 19649 .qmin(128) 19650 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19651 } 19652 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,qmax)19653 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, qmax) { 19654 TEST_REQUIRES_ARM_NEON; 19655 GemmMicrokernelTester() 19656 .mr(4) 19657 .nr(8) 19658 .kr(1) 19659 .sr(1) 19660 .m(4) 19661 .n(8) 19662 .k(8) 19663 .qmax(128) 19664 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19665 } 19666 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP,strided_cm)19667 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cm) { 19668 TEST_REQUIRES_ARM_NEON; 19669 GemmMicrokernelTester() 19670 .mr(4) 19671 .nr(8) 19672 .kr(1) 19673 .sr(1) 19674 .m(4) 19675 .n(8) 19676 .k(8) 19677 .cm_stride(11) 19678 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19679 } 19680 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 19681 19682 19683 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16)19684 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16) { 19685 TEST_REQUIRES_ARM_NEON; 19686 GemmMicrokernelTester() 19687 .mr(4) 19688 .nr(8) 19689 .kr(2) 19690 .sr(1) 19691 .m(4) 19692 .n(8) 19693 .k(16) 19694 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19695 } 19696 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cn)19697 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cn) { 19698 TEST_REQUIRES_ARM_NEON; 19699 GemmMicrokernelTester() 19700 .mr(4) 19701 .nr(8) 19702 .kr(2) 19703 .sr(1) 19704 .m(4) 19705 .n(8) 19706 .k(16) 19707 .cn_stride(11) 19708 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19709 } 19710 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile)19711 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) { 19712 TEST_REQUIRES_ARM_NEON; 19713 for (uint32_t n = 1; n <= 8; n++) { 19714 for (uint32_t m = 1; m <= 4; m++) { 19715 GemmMicrokernelTester() 19716 .mr(4) 19717 .nr(8) 19718 .kr(2) 19719 .sr(1) 19720 .m(m) 19721 .n(n) 19722 .k(16) 19723 .iterations(1) 19724 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19725 } 19726 } 19727 } 19728 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_m)19729 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 19730 TEST_REQUIRES_ARM_NEON; 19731 for (uint32_t m = 1; m <= 4; m++) { 19732 GemmMicrokernelTester() 19733 .mr(4) 19734 .nr(8) 19735 .kr(2) 19736 .sr(1) 19737 .m(m) 19738 .n(8) 19739 .k(16) 19740 .iterations(1) 19741 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19742 } 19743 } 19744 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_eq_16_subtile_n)19745 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 19746 TEST_REQUIRES_ARM_NEON; 19747 for (uint32_t n = 1; n <= 8; n++) { 19748 GemmMicrokernelTester() 19749 .mr(4) 19750 .nr(8) 19751 .kr(2) 19752 .sr(1) 19753 .m(4) 19754 .n(n) 19755 .k(16) 19756 .iterations(1) 19757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19758 } 19759 } 19760 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_lt_16)19761 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16) { 19762 TEST_REQUIRES_ARM_NEON; 19763 for (size_t k = 1; k < 16; k++) { 19764 GemmMicrokernelTester() 19765 .mr(4) 19766 .nr(8) 19767 .kr(2) 19768 .sr(1) 19769 .m(4) 19770 .n(8) 19771 .k(k) 19772 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19773 } 19774 } 19775 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_lt_16_subtile)19776 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) { 19777 TEST_REQUIRES_ARM_NEON; 19778 for (size_t k = 1; k < 16; k++) { 19779 for (uint32_t n = 1; n <= 8; n++) { 19780 for (uint32_t m = 1; m <= 4; m++) { 19781 GemmMicrokernelTester() 19782 .mr(4) 19783 .nr(8) 19784 .kr(2) 19785 .sr(1) 19786 .m(m) 19787 .n(n) 19788 .k(k) 19789 .iterations(1) 19790 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19791 } 19792 } 19793 } 19794 } 19795 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_gt_16)19796 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16) { 19797 TEST_REQUIRES_ARM_NEON; 19798 for (size_t k = 17; k < 32; k++) { 19799 GemmMicrokernelTester() 19800 .mr(4) 19801 .nr(8) 19802 .kr(2) 19803 .sr(1) 19804 .m(4) 19805 .n(8) 19806 .k(k) 19807 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19808 } 19809 } 19810 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_gt_16_subtile)19811 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) { 19812 TEST_REQUIRES_ARM_NEON; 19813 for (size_t k = 17; k < 32; k++) { 19814 for (uint32_t n = 1; n <= 8; n++) { 19815 for (uint32_t m = 1; m <= 4; m++) { 19816 GemmMicrokernelTester() 19817 .mr(4) 19818 .nr(8) 19819 .kr(2) 19820 .sr(1) 19821 .m(m) 19822 .n(n) 19823 .k(k) 19824 .iterations(1) 19825 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19826 } 19827 } 19828 } 19829 } 19830 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_div_16)19831 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16) { 19832 TEST_REQUIRES_ARM_NEON; 19833 for (size_t k = 32; k <= 160; k += 16) { 19834 GemmMicrokernelTester() 19835 .mr(4) 19836 .nr(8) 19837 .kr(2) 19838 .sr(1) 19839 .m(4) 19840 .n(8) 19841 .k(k) 19842 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19843 } 19844 } 19845 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,k_div_16_subtile)19846 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, k_div_16_subtile) { 19847 TEST_REQUIRES_ARM_NEON; 19848 for (size_t k = 32; k <= 160; k += 16) { 19849 for (uint32_t n = 1; n <= 8; n++) { 19850 for (uint32_t m = 1; m <= 4; m++) { 19851 GemmMicrokernelTester() 19852 .mr(4) 19853 .nr(8) 19854 .kr(2) 19855 .sr(1) 19856 .m(m) 19857 .n(n) 19858 .k(k) 19859 .iterations(1) 19860 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19861 } 19862 } 19863 } 19864 } 19865 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8)19866 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8) { 19867 TEST_REQUIRES_ARM_NEON; 19868 for (uint32_t n = 9; n < 16; n++) { 19869 for (size_t k = 1; k <= 80; k += 17) { 19870 GemmMicrokernelTester() 19871 .mr(4) 19872 .nr(8) 19873 .kr(2) 19874 .sr(1) 19875 .m(4) 19876 .n(n) 19877 .k(k) 19878 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19879 } 19880 } 19881 } 19882 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_strided_cn)19883 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 19884 TEST_REQUIRES_ARM_NEON; 19885 for (uint32_t n = 9; n < 16; n++) { 19886 for (size_t k = 1; k <= 80; k += 17) { 19887 GemmMicrokernelTester() 19888 .mr(4) 19889 .nr(8) 19890 .kr(2) 19891 .sr(1) 19892 .m(4) 19893 .n(n) 19894 .k(k) 19895 .cn_stride(11) 19896 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19897 } 19898 } 19899 } 19900 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_subtile)19901 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) { 19902 TEST_REQUIRES_ARM_NEON; 19903 for (uint32_t n = 9; n < 16; n++) { 19904 for (size_t k = 1; k <= 80; k += 17) { 19905 for (uint32_t m = 1; m <= 4; m++) { 19906 GemmMicrokernelTester() 19907 .mr(4) 19908 .nr(8) 19909 .kr(2) 19910 .sr(1) 19911 .m(m) 19912 .n(n) 19913 .k(k) 19914 .iterations(1) 19915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19916 } 19917 } 19918 } 19919 } 19920 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8)19921 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8) { 19922 TEST_REQUIRES_ARM_NEON; 19923 for (uint32_t n = 16; n <= 24; n += 8) { 19924 for (size_t k = 1; k <= 80; k += 17) { 19925 GemmMicrokernelTester() 19926 .mr(4) 19927 .nr(8) 19928 .kr(2) 19929 .sr(1) 19930 .m(4) 19931 .n(n) 19932 .k(k) 19933 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19934 } 19935 } 19936 } 19937 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_strided_cn)19938 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) { 19939 TEST_REQUIRES_ARM_NEON; 19940 for (uint32_t n = 16; n <= 24; n += 8) { 19941 for (size_t k = 1; k <= 80; k += 17) { 19942 GemmMicrokernelTester() 19943 .mr(4) 19944 .nr(8) 19945 .kr(2) 19946 .sr(1) 19947 .m(4) 19948 .n(n) 19949 .k(k) 19950 .cn_stride(11) 19951 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19952 } 19953 } 19954 } 19955 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_subtile)19956 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_subtile) { 19957 TEST_REQUIRES_ARM_NEON; 19958 for (uint32_t n = 16; n <= 24; n += 8) { 19959 for (size_t k = 1; k <= 80; k += 17) { 19960 for (uint32_t m = 1; m <= 4; m++) { 19961 GemmMicrokernelTester() 19962 .mr(4) 19963 .nr(8) 19964 .kr(2) 19965 .sr(1) 19966 .m(m) 19967 .n(n) 19968 .k(k) 19969 .iterations(1) 19970 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19971 } 19972 } 19973 } 19974 } 19975 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,small_kernel)19976 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, small_kernel) { 19977 TEST_REQUIRES_ARM_NEON; 19978 for (size_t k = 1; k <= 80; k += 17) { 19979 GemmMicrokernelTester() 19980 .mr(4) 19981 .nr(8) 19982 .kr(2) 19983 .sr(1) 19984 .m(4) 19985 .n(8) 19986 .k(k) 19987 .ks(3) 19988 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 19989 } 19990 } 19991 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,small_kernel_subtile)19992 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, small_kernel_subtile) { 19993 TEST_REQUIRES_ARM_NEON; 19994 for (size_t k = 1; k <= 80; k += 17) { 19995 for (uint32_t n = 1; n <= 8; n++) { 19996 for (uint32_t m = 1; m <= 4; m++) { 19997 GemmMicrokernelTester() 19998 .mr(4) 19999 .nr(8) 20000 .kr(2) 20001 .sr(1) 20002 .m(m) 20003 .n(n) 20004 .k(k) 20005 .ks(3) 20006 .iterations(1) 20007 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20008 } 20009 } 20010 } 20011 } 20012 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_gt_8_small_kernel)20013 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) { 20014 TEST_REQUIRES_ARM_NEON; 20015 for (uint32_t n = 9; n < 16; n++) { 20016 for (size_t k = 1; k <= 80; k += 17) { 20017 GemmMicrokernelTester() 20018 .mr(4) 20019 .nr(8) 20020 .kr(2) 20021 .sr(1) 20022 .m(4) 20023 .n(n) 20024 .k(k) 20025 .ks(3) 20026 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20027 } 20028 } 20029 } 20030 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,n_div_8_small_kernel)20031 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) { 20032 TEST_REQUIRES_ARM_NEON; 20033 for (uint32_t n = 16; n <= 24; n += 8) { 20034 for (size_t k = 1; k <= 80; k += 17) { 20035 GemmMicrokernelTester() 20036 .mr(4) 20037 .nr(8) 20038 .kr(2) 20039 .sr(1) 20040 .m(4) 20041 .n(n) 20042 .k(k) 20043 .ks(3) 20044 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20045 } 20046 } 20047 } 20048 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cm_subtile)20049 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cm_subtile) { 20050 TEST_REQUIRES_ARM_NEON; 20051 for (size_t k = 1; k <= 80; k += 17) { 20052 for (uint32_t n = 1; n <= 8; n++) { 20053 for (uint32_t m = 1; m <= 4; m++) { 20054 GemmMicrokernelTester() 20055 .mr(4) 20056 .nr(8) 20057 .kr(2) 20058 .sr(1) 20059 .m(m) 20060 .n(n) 20061 .k(k) 20062 .cm_stride(11) 20063 .iterations(1) 20064 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20065 } 20066 } 20067 } 20068 } 20069 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,a_offset)20070 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, a_offset) { 20071 TEST_REQUIRES_ARM_NEON; 20072 for (size_t k = 1; k <= 80; k += 17) { 20073 GemmMicrokernelTester() 20074 .mr(4) 20075 .nr(8) 20076 .kr(2) 20077 .sr(1) 20078 .m(4) 20079 .n(8) 20080 .k(k) 20081 .ks(3) 20082 .a_offset(331) 20083 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20084 } 20085 } 20086 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,zero)20087 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, zero) { 20088 TEST_REQUIRES_ARM_NEON; 20089 for (size_t k = 1; k <= 80; k += 17) { 20090 for (uint32_t mz = 0; mz < 4; mz++) { 20091 GemmMicrokernelTester() 20092 .mr(4) 20093 .nr(8) 20094 .kr(2) 20095 .sr(1) 20096 .m(4) 20097 .n(8) 20098 .k(k) 20099 .ks(3) 20100 .a_offset(331) 20101 .zero_index(mz) 20102 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20103 } 20104 } 20105 } 20106 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,qmin)20107 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, qmin) { 20108 TEST_REQUIRES_ARM_NEON; 20109 GemmMicrokernelTester() 20110 .mr(4) 20111 .nr(8) 20112 .kr(2) 20113 .sr(1) 20114 .m(4) 20115 .n(8) 20116 .k(16) 20117 .qmin(128) 20118 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20119 } 20120 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,qmax)20121 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, qmax) { 20122 TEST_REQUIRES_ARM_NEON; 20123 GemmMicrokernelTester() 20124 .mr(4) 20125 .nr(8) 20126 .kr(2) 20127 .sr(1) 20128 .m(4) 20129 .n(8) 20130 .k(16) 20131 .qmax(128) 20132 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20133 } 20134 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R,strided_cm)20135 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_LD1R, strided_cm) { 20136 TEST_REQUIRES_ARM_NEON; 20137 GemmMicrokernelTester() 20138 .mr(4) 20139 .nr(8) 20140 .kr(2) 20141 .sr(1) 20142 .m(4) 20143 .n(8) 20144 .k(16) 20145 .cm_stride(11) 20146 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20147 } 20148 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 20149 20150 20151 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_eq_8)20152 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8) { 20153 TEST_REQUIRES_ARM_NEON; 20154 GemmMicrokernelTester() 20155 .mr(4) 20156 .nr(8) 20157 .kr(2) 20158 .sr(1) 20159 .m(4) 20160 .n(8) 20161 .k(8) 20162 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20163 } 20164 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,strided_cn)20165 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cn) { 20166 TEST_REQUIRES_ARM_NEON; 20167 GemmMicrokernelTester() 20168 .mr(4) 20169 .nr(8) 20170 .kr(2) 20171 .sr(1) 20172 .m(4) 20173 .n(8) 20174 .k(8) 20175 .cn_stride(11) 20176 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20177 } 20178 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_eq_8_subtile)20179 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile) { 20180 TEST_REQUIRES_ARM_NEON; 20181 for (uint32_t n = 1; n <= 8; n++) { 20182 for (uint32_t m = 1; m <= 4; m++) { 20183 GemmMicrokernelTester() 20184 .mr(4) 20185 .nr(8) 20186 .kr(2) 20187 .sr(1) 20188 .m(m) 20189 .n(n) 20190 .k(8) 20191 .iterations(1) 20192 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20193 } 20194 } 20195 } 20196 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_eq_8_subtile_m)20197 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) { 20198 TEST_REQUIRES_ARM_NEON; 20199 for (uint32_t m = 1; m <= 4; m++) { 20200 GemmMicrokernelTester() 20201 .mr(4) 20202 .nr(8) 20203 .kr(2) 20204 .sr(1) 20205 .m(m) 20206 .n(8) 20207 .k(8) 20208 .iterations(1) 20209 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20210 } 20211 } 20212 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_eq_8_subtile_n)20213 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) { 20214 TEST_REQUIRES_ARM_NEON; 20215 for (uint32_t n = 1; n <= 8; n++) { 20216 GemmMicrokernelTester() 20217 .mr(4) 20218 .nr(8) 20219 .kr(2) 20220 .sr(1) 20221 .m(4) 20222 .n(n) 20223 .k(8) 20224 .iterations(1) 20225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20226 } 20227 } 20228 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_lt_8)20229 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_lt_8) { 20230 TEST_REQUIRES_ARM_NEON; 20231 for (size_t k = 1; k < 8; k++) { 20232 GemmMicrokernelTester() 20233 .mr(4) 20234 .nr(8) 20235 .kr(2) 20236 .sr(1) 20237 .m(4) 20238 .n(8) 20239 .k(k) 20240 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20241 } 20242 } 20243 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_lt_8_subtile)20244 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_lt_8_subtile) { 20245 TEST_REQUIRES_ARM_NEON; 20246 for (size_t k = 1; k < 8; k++) { 20247 for (uint32_t n = 1; n <= 8; n++) { 20248 for (uint32_t m = 1; m <= 4; m++) { 20249 GemmMicrokernelTester() 20250 .mr(4) 20251 .nr(8) 20252 .kr(2) 20253 .sr(1) 20254 .m(m) 20255 .n(n) 20256 .k(k) 20257 .iterations(1) 20258 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20259 } 20260 } 20261 } 20262 } 20263 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_gt_8)20264 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_gt_8) { 20265 TEST_REQUIRES_ARM_NEON; 20266 for (size_t k = 9; k < 16; k++) { 20267 GemmMicrokernelTester() 20268 .mr(4) 20269 .nr(8) 20270 .kr(2) 20271 .sr(1) 20272 .m(4) 20273 .n(8) 20274 .k(k) 20275 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20276 } 20277 } 20278 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_gt_8_subtile)20279 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_gt_8_subtile) { 20280 TEST_REQUIRES_ARM_NEON; 20281 for (size_t k = 9; k < 16; k++) { 20282 for (uint32_t n = 1; n <= 8; n++) { 20283 for (uint32_t m = 1; m <= 4; m++) { 20284 GemmMicrokernelTester() 20285 .mr(4) 20286 .nr(8) 20287 .kr(2) 20288 .sr(1) 20289 .m(m) 20290 .n(n) 20291 .k(k) 20292 .iterations(1) 20293 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20294 } 20295 } 20296 } 20297 } 20298 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_div_8)20299 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_div_8) { 20300 TEST_REQUIRES_ARM_NEON; 20301 for (size_t k = 16; k <= 80; k += 8) { 20302 GemmMicrokernelTester() 20303 .mr(4) 20304 .nr(8) 20305 .kr(2) 20306 .sr(1) 20307 .m(4) 20308 .n(8) 20309 .k(k) 20310 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20311 } 20312 } 20313 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,k_div_8_subtile)20314 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_div_8_subtile) { 20315 TEST_REQUIRES_ARM_NEON; 20316 for (size_t k = 16; k <= 80; k += 8) { 20317 for (uint32_t n = 1; n <= 8; n++) { 20318 for (uint32_t m = 1; m <= 4; m++) { 20319 GemmMicrokernelTester() 20320 .mr(4) 20321 .nr(8) 20322 .kr(2) 20323 .sr(1) 20324 .m(m) 20325 .n(n) 20326 .k(k) 20327 .iterations(1) 20328 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20329 } 20330 } 20331 } 20332 } 20333 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,n_gt_8)20334 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8) { 20335 TEST_REQUIRES_ARM_NEON; 20336 for (uint32_t n = 9; n < 16; n++) { 20337 for (size_t k = 1; k <= 40; k += 9) { 20338 GemmMicrokernelTester() 20339 .mr(4) 20340 .nr(8) 20341 .kr(2) 20342 .sr(1) 20343 .m(4) 20344 .n(n) 20345 .k(k) 20346 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20347 } 20348 } 20349 } 20350 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,n_gt_8_strided_cn)20351 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) { 20352 TEST_REQUIRES_ARM_NEON; 20353 for (uint32_t n = 9; n < 16; n++) { 20354 for (size_t k = 1; k <= 40; k += 9) { 20355 GemmMicrokernelTester() 20356 .mr(4) 20357 .nr(8) 20358 .kr(2) 20359 .sr(1) 20360 .m(4) 20361 .n(n) 20362 .k(k) 20363 .cn_stride(11) 20364 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20365 } 20366 } 20367 } 20368 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,n_gt_8_subtile)20369 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_subtile) { 20370 TEST_REQUIRES_ARM_NEON; 20371 for (uint32_t n = 9; n < 16; n++) { 20372 for (size_t k = 1; k <= 40; k += 9) { 20373 for (uint32_t m = 1; m <= 4; m++) { 20374 GemmMicrokernelTester() 20375 .mr(4) 20376 .nr(8) 20377 .kr(2) 20378 .sr(1) 20379 .m(m) 20380 .n(n) 20381 .k(k) 20382 .iterations(1) 20383 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20384 } 20385 } 20386 } 20387 } 20388 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,n_div_8)20389 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8) { 20390 TEST_REQUIRES_ARM_NEON; 20391 for (uint32_t n = 16; n <= 24; n += 8) { 20392 for (size_t k = 1; k <= 40; k += 9) { 20393 GemmMicrokernelTester() 20394 .mr(4) 20395 .nr(8) 20396 .kr(2) 20397 .sr(1) 20398 .m(4) 20399 .n(n) 20400 .k(k) 20401 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20402 } 20403 } 20404 } 20405 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,n_div_8_strided_cn)20406 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) { 20407 TEST_REQUIRES_ARM_NEON; 20408 for (uint32_t n = 16; n <= 24; n += 8) { 20409 for (size_t k = 1; k <= 40; k += 9) { 20410 GemmMicrokernelTester() 20411 .mr(4) 20412 .nr(8) 20413 .kr(2) 20414 .sr(1) 20415 .m(4) 20416 .n(n) 20417 .k(k) 20418 .cn_stride(11) 20419 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20420 } 20421 } 20422 } 20423 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,n_div_8_subtile)20424 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_subtile) { 20425 TEST_REQUIRES_ARM_NEON; 20426 for (uint32_t n = 16; n <= 24; n += 8) { 20427 for (size_t k = 1; k <= 40; k += 9) { 20428 for (uint32_t m = 1; m <= 4; m++) { 20429 GemmMicrokernelTester() 20430 .mr(4) 20431 .nr(8) 20432 .kr(2) 20433 .sr(1) 20434 .m(m) 20435 .n(n) 20436 .k(k) 20437 .iterations(1) 20438 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20439 } 20440 } 20441 } 20442 } 20443 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,small_kernel)20444 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, small_kernel) { 20445 TEST_REQUIRES_ARM_NEON; 20446 for (size_t k = 1; k <= 40; k += 9) { 20447 GemmMicrokernelTester() 20448 .mr(4) 20449 .nr(8) 20450 .kr(2) 20451 .sr(1) 20452 .m(4) 20453 .n(8) 20454 .k(k) 20455 .ks(3) 20456 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20457 } 20458 } 20459 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,small_kernel_subtile)20460 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, small_kernel_subtile) { 20461 TEST_REQUIRES_ARM_NEON; 20462 for (size_t k = 1; k <= 40; k += 9) { 20463 for (uint32_t n = 1; n <= 8; n++) { 20464 for (uint32_t m = 1; m <= 4; m++) { 20465 GemmMicrokernelTester() 20466 .mr(4) 20467 .nr(8) 20468 .kr(2) 20469 .sr(1) 20470 .m(m) 20471 .n(n) 20472 .k(k) 20473 .ks(3) 20474 .iterations(1) 20475 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20476 } 20477 } 20478 } 20479 } 20480 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,n_gt_8_small_kernel)20481 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) { 20482 TEST_REQUIRES_ARM_NEON; 20483 for (uint32_t n = 9; n < 16; n++) { 20484 for (size_t k = 1; k <= 40; k += 9) { 20485 GemmMicrokernelTester() 20486 .mr(4) 20487 .nr(8) 20488 .kr(2) 20489 .sr(1) 20490 .m(4) 20491 .n(n) 20492 .k(k) 20493 .ks(3) 20494 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20495 } 20496 } 20497 } 20498 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,n_div_8_small_kernel)20499 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) { 20500 TEST_REQUIRES_ARM_NEON; 20501 for (uint32_t n = 16; n <= 24; n += 8) { 20502 for (size_t k = 1; k <= 40; k += 9) { 20503 GemmMicrokernelTester() 20504 .mr(4) 20505 .nr(8) 20506 .kr(2) 20507 .sr(1) 20508 .m(4) 20509 .n(n) 20510 .k(k) 20511 .ks(3) 20512 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20513 } 20514 } 20515 } 20516 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,strided_cm_subtile)20517 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cm_subtile) { 20518 TEST_REQUIRES_ARM_NEON; 20519 for (size_t k = 1; k <= 40; k += 9) { 20520 for (uint32_t n = 1; n <= 8; n++) { 20521 for (uint32_t m = 1; m <= 4; m++) { 20522 GemmMicrokernelTester() 20523 .mr(4) 20524 .nr(8) 20525 .kr(2) 20526 .sr(1) 20527 .m(m) 20528 .n(n) 20529 .k(k) 20530 .cm_stride(11) 20531 .iterations(1) 20532 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20533 } 20534 } 20535 } 20536 } 20537 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,a_offset)20538 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, a_offset) { 20539 TEST_REQUIRES_ARM_NEON; 20540 for (size_t k = 1; k <= 40; k += 9) { 20541 GemmMicrokernelTester() 20542 .mr(4) 20543 .nr(8) 20544 .kr(2) 20545 .sr(1) 20546 .m(4) 20547 .n(8) 20548 .k(k) 20549 .ks(3) 20550 .a_offset(163) 20551 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20552 } 20553 } 20554 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,zero)20555 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, zero) { 20556 TEST_REQUIRES_ARM_NEON; 20557 for (size_t k = 1; k <= 40; k += 9) { 20558 for (uint32_t mz = 0; mz < 4; mz++) { 20559 GemmMicrokernelTester() 20560 .mr(4) 20561 .nr(8) 20562 .kr(2) 20563 .sr(1) 20564 .m(4) 20565 .n(8) 20566 .k(k) 20567 .ks(3) 20568 .a_offset(163) 20569 .zero_index(mz) 20570 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20571 } 20572 } 20573 } 20574 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,qmin)20575 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, qmin) { 20576 TEST_REQUIRES_ARM_NEON; 20577 GemmMicrokernelTester() 20578 .mr(4) 20579 .nr(8) 20580 .kr(2) 20581 .sr(1) 20582 .m(4) 20583 .n(8) 20584 .k(8) 20585 .qmin(128) 20586 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20587 } 20588 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,qmax)20589 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, qmax) { 20590 TEST_REQUIRES_ARM_NEON; 20591 GemmMicrokernelTester() 20592 .mr(4) 20593 .nr(8) 20594 .kr(2) 20595 .sr(1) 20596 .m(4) 20597 .n(8) 20598 .k(8) 20599 .qmax(128) 20600 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20601 } 20602 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R,strided_cm)20603 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cm) { 20604 TEST_REQUIRES_ARM_NEON; 20605 GemmMicrokernelTester() 20606 .mr(4) 20607 .nr(8) 20608 .kr(2) 20609 .sr(1) 20610 .m(4) 20611 .n(8) 20612 .k(8) 20613 .cm_stride(11) 20614 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20615 } 20616 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 20617 20618 20619 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_eq_8)20620 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8) { 20621 TEST_REQUIRES_ARM_NEON; 20622 GemmMicrokernelTester() 20623 .mr(4) 20624 .nr(8) 20625 .kr(2) 20626 .sr(4) 20627 .m(4) 20628 .n(8) 20629 .k(8) 20630 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20631 } 20632 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,strided_cn)20633 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cn) { 20634 TEST_REQUIRES_ARM_NEON; 20635 GemmMicrokernelTester() 20636 .mr(4) 20637 .nr(8) 20638 .kr(2) 20639 .sr(4) 20640 .m(4) 20641 .n(8) 20642 .k(8) 20643 .cn_stride(11) 20644 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20645 } 20646 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_eq_8_subtile)20647 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile) { 20648 TEST_REQUIRES_ARM_NEON; 20649 for (uint32_t n = 1; n <= 8; n++) { 20650 for (uint32_t m = 1; m <= 4; m++) { 20651 GemmMicrokernelTester() 20652 .mr(4) 20653 .nr(8) 20654 .kr(2) 20655 .sr(4) 20656 .m(m) 20657 .n(n) 20658 .k(8) 20659 .iterations(1) 20660 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20661 } 20662 } 20663 } 20664 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_eq_8_subtile_m)20665 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile_m) { 20666 TEST_REQUIRES_ARM_NEON; 20667 for (uint32_t m = 1; m <= 4; m++) { 20668 GemmMicrokernelTester() 20669 .mr(4) 20670 .nr(8) 20671 .kr(2) 20672 .sr(4) 20673 .m(m) 20674 .n(8) 20675 .k(8) 20676 .iterations(1) 20677 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20678 } 20679 } 20680 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_eq_8_subtile_n)20681 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile_n) { 20682 TEST_REQUIRES_ARM_NEON; 20683 for (uint32_t n = 1; n <= 8; n++) { 20684 GemmMicrokernelTester() 20685 .mr(4) 20686 .nr(8) 20687 .kr(2) 20688 .sr(4) 20689 .m(4) 20690 .n(n) 20691 .k(8) 20692 .iterations(1) 20693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20694 } 20695 } 20696 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_lt_8)20697 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_lt_8) { 20698 TEST_REQUIRES_ARM_NEON; 20699 for (size_t k = 1; k < 8; k++) { 20700 GemmMicrokernelTester() 20701 .mr(4) 20702 .nr(8) 20703 .kr(2) 20704 .sr(4) 20705 .m(4) 20706 .n(8) 20707 .k(k) 20708 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20709 } 20710 } 20711 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_lt_8_subtile)20712 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_lt_8_subtile) { 20713 TEST_REQUIRES_ARM_NEON; 20714 for (size_t k = 1; k < 8; k++) { 20715 for (uint32_t n = 1; n <= 8; n++) { 20716 for (uint32_t m = 1; m <= 4; m++) { 20717 GemmMicrokernelTester() 20718 .mr(4) 20719 .nr(8) 20720 .kr(2) 20721 .sr(4) 20722 .m(m) 20723 .n(n) 20724 .k(k) 20725 .iterations(1) 20726 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20727 } 20728 } 20729 } 20730 } 20731 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_gt_8)20732 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_gt_8) { 20733 TEST_REQUIRES_ARM_NEON; 20734 for (size_t k = 9; k < 16; k++) { 20735 GemmMicrokernelTester() 20736 .mr(4) 20737 .nr(8) 20738 .kr(2) 20739 .sr(4) 20740 .m(4) 20741 .n(8) 20742 .k(k) 20743 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20744 } 20745 } 20746 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_gt_8_subtile)20747 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_gt_8_subtile) { 20748 TEST_REQUIRES_ARM_NEON; 20749 for (size_t k = 9; k < 16; k++) { 20750 for (uint32_t n = 1; n <= 8; n++) { 20751 for (uint32_t m = 1; m <= 4; m++) { 20752 GemmMicrokernelTester() 20753 .mr(4) 20754 .nr(8) 20755 .kr(2) 20756 .sr(4) 20757 .m(m) 20758 .n(n) 20759 .k(k) 20760 .iterations(1) 20761 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20762 } 20763 } 20764 } 20765 } 20766 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_div_8)20767 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_div_8) { 20768 TEST_REQUIRES_ARM_NEON; 20769 for (size_t k = 16; k <= 80; k += 8) { 20770 GemmMicrokernelTester() 20771 .mr(4) 20772 .nr(8) 20773 .kr(2) 20774 .sr(4) 20775 .m(4) 20776 .n(8) 20777 .k(k) 20778 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20779 } 20780 } 20781 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,k_div_8_subtile)20782 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_div_8_subtile) { 20783 TEST_REQUIRES_ARM_NEON; 20784 for (size_t k = 16; k <= 80; k += 8) { 20785 for (uint32_t n = 1; n <= 8; n++) { 20786 for (uint32_t m = 1; m <= 4; m++) { 20787 GemmMicrokernelTester() 20788 .mr(4) 20789 .nr(8) 20790 .kr(2) 20791 .sr(4) 20792 .m(m) 20793 .n(n) 20794 .k(k) 20795 .iterations(1) 20796 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20797 } 20798 } 20799 } 20800 } 20801 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,n_gt_8)20802 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8) { 20803 TEST_REQUIRES_ARM_NEON; 20804 for (uint32_t n = 9; n < 16; n++) { 20805 for (size_t k = 1; k <= 40; k += 9) { 20806 GemmMicrokernelTester() 20807 .mr(4) 20808 .nr(8) 20809 .kr(2) 20810 .sr(4) 20811 .m(4) 20812 .n(n) 20813 .k(k) 20814 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20815 } 20816 } 20817 } 20818 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,n_gt_8_strided_cn)20819 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_strided_cn) { 20820 TEST_REQUIRES_ARM_NEON; 20821 for (uint32_t n = 9; n < 16; n++) { 20822 for (size_t k = 1; k <= 40; k += 9) { 20823 GemmMicrokernelTester() 20824 .mr(4) 20825 .nr(8) 20826 .kr(2) 20827 .sr(4) 20828 .m(4) 20829 .n(n) 20830 .k(k) 20831 .cn_stride(11) 20832 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20833 } 20834 } 20835 } 20836 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,n_gt_8_subtile)20837 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_subtile) { 20838 TEST_REQUIRES_ARM_NEON; 20839 for (uint32_t n = 9; n < 16; n++) { 20840 for (size_t k = 1; k <= 40; k += 9) { 20841 for (uint32_t m = 1; m <= 4; m++) { 20842 GemmMicrokernelTester() 20843 .mr(4) 20844 .nr(8) 20845 .kr(2) 20846 .sr(4) 20847 .m(m) 20848 .n(n) 20849 .k(k) 20850 .iterations(1) 20851 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20852 } 20853 } 20854 } 20855 } 20856 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,n_div_8)20857 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8) { 20858 TEST_REQUIRES_ARM_NEON; 20859 for (uint32_t n = 16; n <= 24; n += 8) { 20860 for (size_t k = 1; k <= 40; k += 9) { 20861 GemmMicrokernelTester() 20862 .mr(4) 20863 .nr(8) 20864 .kr(2) 20865 .sr(4) 20866 .m(4) 20867 .n(n) 20868 .k(k) 20869 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20870 } 20871 } 20872 } 20873 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,n_div_8_strided_cn)20874 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_strided_cn) { 20875 TEST_REQUIRES_ARM_NEON; 20876 for (uint32_t n = 16; n <= 24; n += 8) { 20877 for (size_t k = 1; k <= 40; k += 9) { 20878 GemmMicrokernelTester() 20879 .mr(4) 20880 .nr(8) 20881 .kr(2) 20882 .sr(4) 20883 .m(4) 20884 .n(n) 20885 .k(k) 20886 .cn_stride(11) 20887 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20888 } 20889 } 20890 } 20891 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,n_div_8_subtile)20892 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_subtile) { 20893 TEST_REQUIRES_ARM_NEON; 20894 for (uint32_t n = 16; n <= 24; n += 8) { 20895 for (size_t k = 1; k <= 40; k += 9) { 20896 for (uint32_t m = 1; m <= 4; m++) { 20897 GemmMicrokernelTester() 20898 .mr(4) 20899 .nr(8) 20900 .kr(2) 20901 .sr(4) 20902 .m(m) 20903 .n(n) 20904 .k(k) 20905 .iterations(1) 20906 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20907 } 20908 } 20909 } 20910 } 20911 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,small_kernel)20912 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, small_kernel) { 20913 TEST_REQUIRES_ARM_NEON; 20914 for (size_t k = 1; k <= 40; k += 9) { 20915 GemmMicrokernelTester() 20916 .mr(4) 20917 .nr(8) 20918 .kr(2) 20919 .sr(4) 20920 .m(4) 20921 .n(8) 20922 .k(k) 20923 .ks(3) 20924 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20925 } 20926 } 20927 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,small_kernel_subtile)20928 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, small_kernel_subtile) { 20929 TEST_REQUIRES_ARM_NEON; 20930 for (size_t k = 1; k <= 40; k += 9) { 20931 for (uint32_t n = 1; n <= 8; n++) { 20932 for (uint32_t m = 1; m <= 4; m++) { 20933 GemmMicrokernelTester() 20934 .mr(4) 20935 .nr(8) 20936 .kr(2) 20937 .sr(4) 20938 .m(m) 20939 .n(n) 20940 .k(k) 20941 .ks(3) 20942 .iterations(1) 20943 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20944 } 20945 } 20946 } 20947 } 20948 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,n_gt_8_small_kernel)20949 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_small_kernel) { 20950 TEST_REQUIRES_ARM_NEON; 20951 for (uint32_t n = 9; n < 16; n++) { 20952 for (size_t k = 1; k <= 40; k += 9) { 20953 GemmMicrokernelTester() 20954 .mr(4) 20955 .nr(8) 20956 .kr(2) 20957 .sr(4) 20958 .m(4) 20959 .n(n) 20960 .k(k) 20961 .ks(3) 20962 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20963 } 20964 } 20965 } 20966 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,n_div_8_small_kernel)20967 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_small_kernel) { 20968 TEST_REQUIRES_ARM_NEON; 20969 for (uint32_t n = 16; n <= 24; n += 8) { 20970 for (size_t k = 1; k <= 40; k += 9) { 20971 GemmMicrokernelTester() 20972 .mr(4) 20973 .nr(8) 20974 .kr(2) 20975 .sr(4) 20976 .m(4) 20977 .n(n) 20978 .k(k) 20979 .ks(3) 20980 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 20981 } 20982 } 20983 } 20984 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,strided_cm_subtile)20985 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cm_subtile) { 20986 TEST_REQUIRES_ARM_NEON; 20987 for (size_t k = 1; k <= 40; k += 9) { 20988 for (uint32_t n = 1; n <= 8; n++) { 20989 for (uint32_t m = 1; m <= 4; m++) { 20990 GemmMicrokernelTester() 20991 .mr(4) 20992 .nr(8) 20993 .kr(2) 20994 .sr(4) 20995 .m(m) 20996 .n(n) 20997 .k(k) 20998 .cm_stride(11) 20999 .iterations(1) 21000 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21001 } 21002 } 21003 } 21004 } 21005 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,a_offset)21006 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, a_offset) { 21007 TEST_REQUIRES_ARM_NEON; 21008 for (size_t k = 1; k <= 40; k += 9) { 21009 GemmMicrokernelTester() 21010 .mr(4) 21011 .nr(8) 21012 .kr(2) 21013 .sr(4) 21014 .m(4) 21015 .n(8) 21016 .k(k) 21017 .ks(3) 21018 .a_offset(163) 21019 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21020 } 21021 } 21022 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,zero)21023 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, zero) { 21024 TEST_REQUIRES_ARM_NEON; 21025 for (size_t k = 1; k <= 40; k += 9) { 21026 for (uint32_t mz = 0; mz < 4; mz++) { 21027 GemmMicrokernelTester() 21028 .mr(4) 21029 .nr(8) 21030 .kr(2) 21031 .sr(4) 21032 .m(4) 21033 .n(8) 21034 .k(k) 21035 .ks(3) 21036 .a_offset(163) 21037 .zero_index(mz) 21038 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21039 } 21040 } 21041 } 21042 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,qmin)21043 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, qmin) { 21044 TEST_REQUIRES_ARM_NEON; 21045 GemmMicrokernelTester() 21046 .mr(4) 21047 .nr(8) 21048 .kr(2) 21049 .sr(4) 21050 .m(4) 21051 .n(8) 21052 .k(8) 21053 .qmin(128) 21054 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21055 } 21056 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,qmax)21057 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, qmax) { 21058 TEST_REQUIRES_ARM_NEON; 21059 GemmMicrokernelTester() 21060 .mr(4) 21061 .nr(8) 21062 .kr(2) 21063 .sr(4) 21064 .m(4) 21065 .n(8) 21066 .k(8) 21067 .qmax(128) 21068 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21069 } 21070 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL,strided_cm)21071 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cm) { 21072 TEST_REQUIRES_ARM_NEON; 21073 GemmMicrokernelTester() 21074 .mr(4) 21075 .nr(8) 21076 .kr(2) 21077 .sr(4) 21078 .m(4) 21079 .n(8) 21080 .k(8) 21081 .cm_stride(11) 21082 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21083 } 21084 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 21085 21086 21087 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_eq_16)21088 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16) { 21089 TEST_REQUIRES_ARM_NEON; 21090 GemmMicrokernelTester() 21091 .mr(4) 21092 .nr(8) 21093 .kr(4) 21094 .sr(1) 21095 .m(4) 21096 .n(8) 21097 .k(16) 21098 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21099 } 21100 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,strided_cn)21101 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cn) { 21102 TEST_REQUIRES_ARM_NEON; 21103 GemmMicrokernelTester() 21104 .mr(4) 21105 .nr(8) 21106 .kr(4) 21107 .sr(1) 21108 .m(4) 21109 .n(8) 21110 .k(16) 21111 .cn_stride(11) 21112 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21113 } 21114 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_eq_16_subtile)21115 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) { 21116 TEST_REQUIRES_ARM_NEON; 21117 for (uint32_t n = 1; n <= 8; n++) { 21118 for (uint32_t m = 1; m <= 4; m++) { 21119 GemmMicrokernelTester() 21120 .mr(4) 21121 .nr(8) 21122 .kr(4) 21123 .sr(1) 21124 .m(m) 21125 .n(n) 21126 .k(16) 21127 .iterations(1) 21128 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21129 } 21130 } 21131 } 21132 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_m)21133 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) { 21134 TEST_REQUIRES_ARM_NEON; 21135 for (uint32_t m = 1; m <= 4; m++) { 21136 GemmMicrokernelTester() 21137 .mr(4) 21138 .nr(8) 21139 .kr(4) 21140 .sr(1) 21141 .m(m) 21142 .n(8) 21143 .k(16) 21144 .iterations(1) 21145 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21146 } 21147 } 21148 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_eq_16_subtile_n)21149 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) { 21150 TEST_REQUIRES_ARM_NEON; 21151 for (uint32_t n = 1; n <= 8; n++) { 21152 GemmMicrokernelTester() 21153 .mr(4) 21154 .nr(8) 21155 .kr(4) 21156 .sr(1) 21157 .m(4) 21158 .n(n) 21159 .k(16) 21160 .iterations(1) 21161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21162 } 21163 } 21164 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_lt_16)21165 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_lt_16) { 21166 TEST_REQUIRES_ARM_NEON; 21167 for (size_t k = 1; k < 16; k++) { 21168 GemmMicrokernelTester() 21169 .mr(4) 21170 .nr(8) 21171 .kr(4) 21172 .sr(1) 21173 .m(4) 21174 .n(8) 21175 .k(k) 21176 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21177 } 21178 } 21179 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_lt_16_subtile)21180 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) { 21181 TEST_REQUIRES_ARM_NEON; 21182 for (size_t k = 1; k < 16; k++) { 21183 for (uint32_t n = 1; n <= 8; n++) { 21184 for (uint32_t m = 1; m <= 4; m++) { 21185 GemmMicrokernelTester() 21186 .mr(4) 21187 .nr(8) 21188 .kr(4) 21189 .sr(1) 21190 .m(m) 21191 .n(n) 21192 .k(k) 21193 .iterations(1) 21194 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21195 } 21196 } 21197 } 21198 } 21199 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_gt_16)21200 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_gt_16) { 21201 TEST_REQUIRES_ARM_NEON; 21202 for (size_t k = 17; k < 32; k++) { 21203 GemmMicrokernelTester() 21204 .mr(4) 21205 .nr(8) 21206 .kr(4) 21207 .sr(1) 21208 .m(4) 21209 .n(8) 21210 .k(k) 21211 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21212 } 21213 } 21214 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_gt_16_subtile)21215 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) { 21216 TEST_REQUIRES_ARM_NEON; 21217 for (size_t k = 17; k < 32; k++) { 21218 for (uint32_t n = 1; n <= 8; n++) { 21219 for (uint32_t m = 1; m <= 4; m++) { 21220 GemmMicrokernelTester() 21221 .mr(4) 21222 .nr(8) 21223 .kr(4) 21224 .sr(1) 21225 .m(m) 21226 .n(n) 21227 .k(k) 21228 .iterations(1) 21229 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21230 } 21231 } 21232 } 21233 } 21234 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_div_16)21235 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_div_16) { 21236 TEST_REQUIRES_ARM_NEON; 21237 for (size_t k = 32; k <= 160; k += 16) { 21238 GemmMicrokernelTester() 21239 .mr(4) 21240 .nr(8) 21241 .kr(4) 21242 .sr(1) 21243 .m(4) 21244 .n(8) 21245 .k(k) 21246 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21247 } 21248 } 21249 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,k_div_16_subtile)21250 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, k_div_16_subtile) { 21251 TEST_REQUIRES_ARM_NEON; 21252 for (size_t k = 32; k <= 160; k += 16) { 21253 for (uint32_t n = 1; n <= 8; n++) { 21254 for (uint32_t m = 1; m <= 4; m++) { 21255 GemmMicrokernelTester() 21256 .mr(4) 21257 .nr(8) 21258 .kr(4) 21259 .sr(1) 21260 .m(m) 21261 .n(n) 21262 .k(k) 21263 .iterations(1) 21264 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21265 } 21266 } 21267 } 21268 } 21269 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,n_gt_8)21270 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8) { 21271 TEST_REQUIRES_ARM_NEON; 21272 for (uint32_t n = 9; n < 16; n++) { 21273 for (size_t k = 1; k <= 80; k += 17) { 21274 GemmMicrokernelTester() 21275 .mr(4) 21276 .nr(8) 21277 .kr(4) 21278 .sr(1) 21279 .m(4) 21280 .n(n) 21281 .k(k) 21282 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21283 } 21284 } 21285 } 21286 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,n_gt_8_strided_cn)21287 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) { 21288 TEST_REQUIRES_ARM_NEON; 21289 for (uint32_t n = 9; n < 16; n++) { 21290 for (size_t k = 1; k <= 80; k += 17) { 21291 GemmMicrokernelTester() 21292 .mr(4) 21293 .nr(8) 21294 .kr(4) 21295 .sr(1) 21296 .m(4) 21297 .n(n) 21298 .k(k) 21299 .cn_stride(11) 21300 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21301 } 21302 } 21303 } 21304 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,n_gt_8_subtile)21305 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) { 21306 TEST_REQUIRES_ARM_NEON; 21307 for (uint32_t n = 9; n < 16; n++) { 21308 for (size_t k = 1; k <= 80; k += 17) { 21309 for (uint32_t m = 1; m <= 4; m++) { 21310 GemmMicrokernelTester() 21311 .mr(4) 21312 .nr(8) 21313 .kr(4) 21314 .sr(1) 21315 .m(m) 21316 .n(n) 21317 .k(k) 21318 .iterations(1) 21319 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21320 } 21321 } 21322 } 21323 } 21324 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,n_div_8)21325 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8) { 21326 TEST_REQUIRES_ARM_NEON; 21327 for (uint32_t n = 16; n <= 24; n += 8) { 21328 for (size_t k = 1; k <= 80; k += 17) { 21329 GemmMicrokernelTester() 21330 .mr(4) 21331 .nr(8) 21332 .kr(4) 21333 .sr(1) 21334 .m(4) 21335 .n(n) 21336 .k(k) 21337 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21338 } 21339 } 21340 } 21341 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,n_div_8_strided_cn)21342 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) { 21343 TEST_REQUIRES_ARM_NEON; 21344 for (uint32_t n = 16; n <= 24; n += 8) { 21345 for (size_t k = 1; k <= 80; k += 17) { 21346 GemmMicrokernelTester() 21347 .mr(4) 21348 .nr(8) 21349 .kr(4) 21350 .sr(1) 21351 .m(4) 21352 .n(n) 21353 .k(k) 21354 .cn_stride(11) 21355 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21356 } 21357 } 21358 } 21359 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,n_div_8_subtile)21360 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_subtile) { 21361 TEST_REQUIRES_ARM_NEON; 21362 for (uint32_t n = 16; n <= 24; n += 8) { 21363 for (size_t k = 1; k <= 80; k += 17) { 21364 for (uint32_t m = 1; m <= 4; m++) { 21365 GemmMicrokernelTester() 21366 .mr(4) 21367 .nr(8) 21368 .kr(4) 21369 .sr(1) 21370 .m(m) 21371 .n(n) 21372 .k(k) 21373 .iterations(1) 21374 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21375 } 21376 } 21377 } 21378 } 21379 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,small_kernel)21380 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, small_kernel) { 21381 TEST_REQUIRES_ARM_NEON; 21382 for (size_t k = 1; k <= 80; k += 17) { 21383 GemmMicrokernelTester() 21384 .mr(4) 21385 .nr(8) 21386 .kr(4) 21387 .sr(1) 21388 .m(4) 21389 .n(8) 21390 .k(k) 21391 .ks(3) 21392 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21393 } 21394 } 21395 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,small_kernel_subtile)21396 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, small_kernel_subtile) { 21397 TEST_REQUIRES_ARM_NEON; 21398 for (size_t k = 1; k <= 80; k += 17) { 21399 for (uint32_t n = 1; n <= 8; n++) { 21400 for (uint32_t m = 1; m <= 4; m++) { 21401 GemmMicrokernelTester() 21402 .mr(4) 21403 .nr(8) 21404 .kr(4) 21405 .sr(1) 21406 .m(m) 21407 .n(n) 21408 .k(k) 21409 .ks(3) 21410 .iterations(1) 21411 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21412 } 21413 } 21414 } 21415 } 21416 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,n_gt_8_small_kernel)21417 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) { 21418 TEST_REQUIRES_ARM_NEON; 21419 for (uint32_t n = 9; n < 16; n++) { 21420 for (size_t k = 1; k <= 80; k += 17) { 21421 GemmMicrokernelTester() 21422 .mr(4) 21423 .nr(8) 21424 .kr(4) 21425 .sr(1) 21426 .m(4) 21427 .n(n) 21428 .k(k) 21429 .ks(3) 21430 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21431 } 21432 } 21433 } 21434 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,n_div_8_small_kernel)21435 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) { 21436 TEST_REQUIRES_ARM_NEON; 21437 for (uint32_t n = 16; n <= 24; n += 8) { 21438 for (size_t k = 1; k <= 80; k += 17) { 21439 GemmMicrokernelTester() 21440 .mr(4) 21441 .nr(8) 21442 .kr(4) 21443 .sr(1) 21444 .m(4) 21445 .n(n) 21446 .k(k) 21447 .ks(3) 21448 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21449 } 21450 } 21451 } 21452 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,strided_cm_subtile)21453 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cm_subtile) { 21454 TEST_REQUIRES_ARM_NEON; 21455 for (size_t k = 1; k <= 80; k += 17) { 21456 for (uint32_t n = 1; n <= 8; n++) { 21457 for (uint32_t m = 1; m <= 4; m++) { 21458 GemmMicrokernelTester() 21459 .mr(4) 21460 .nr(8) 21461 .kr(4) 21462 .sr(1) 21463 .m(m) 21464 .n(n) 21465 .k(k) 21466 .cm_stride(11) 21467 .iterations(1) 21468 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21469 } 21470 } 21471 } 21472 } 21473 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,a_offset)21474 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, a_offset) { 21475 TEST_REQUIRES_ARM_NEON; 21476 for (size_t k = 1; k <= 80; k += 17) { 21477 GemmMicrokernelTester() 21478 .mr(4) 21479 .nr(8) 21480 .kr(4) 21481 .sr(1) 21482 .m(4) 21483 .n(8) 21484 .k(k) 21485 .ks(3) 21486 .a_offset(331) 21487 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21488 } 21489 } 21490 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,zero)21491 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, zero) { 21492 TEST_REQUIRES_ARM_NEON; 21493 for (size_t k = 1; k <= 80; k += 17) { 21494 for (uint32_t mz = 0; mz < 4; mz++) { 21495 GemmMicrokernelTester() 21496 .mr(4) 21497 .nr(8) 21498 .kr(4) 21499 .sr(1) 21500 .m(4) 21501 .n(8) 21502 .k(k) 21503 .ks(3) 21504 .a_offset(331) 21505 .zero_index(mz) 21506 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21507 } 21508 } 21509 } 21510 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,qmin)21511 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, qmin) { 21512 TEST_REQUIRES_ARM_NEON; 21513 GemmMicrokernelTester() 21514 .mr(4) 21515 .nr(8) 21516 .kr(4) 21517 .sr(1) 21518 .m(4) 21519 .n(8) 21520 .k(16) 21521 .qmin(128) 21522 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21523 } 21524 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,qmax)21525 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, qmax) { 21526 TEST_REQUIRES_ARM_NEON; 21527 GemmMicrokernelTester() 21528 .mr(4) 21529 .nr(8) 21530 .kr(4) 21531 .sr(1) 21532 .m(4) 21533 .n(8) 21534 .k(16) 21535 .qmax(128) 21536 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21537 } 21538 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R,strided_cm)21539 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_LD1R, strided_cm) { 21540 TEST_REQUIRES_ARM_NEON; 21541 GemmMicrokernelTester() 21542 .mr(4) 21543 .nr(8) 21544 .kr(4) 21545 .sr(1) 21546 .m(4) 21547 .n(8) 21548 .k(16) 21549 .cm_stride(11) 21550 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21551 } 21552 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 21553 21554 21555 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_eq_8)21556 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8) { 21557 TEST_REQUIRES_ARM_NEON; 21558 GemmMicrokernelTester() 21559 .mr(4) 21560 .nr(8) 21561 .kr(4) 21562 .sr(1) 21563 .m(4) 21564 .n(8) 21565 .k(8) 21566 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21567 } 21568 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,strided_cn)21569 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cn) { 21570 TEST_REQUIRES_ARM_NEON; 21571 GemmMicrokernelTester() 21572 .mr(4) 21573 .nr(8) 21574 .kr(4) 21575 .sr(1) 21576 .m(4) 21577 .n(8) 21578 .k(8) 21579 .cn_stride(11) 21580 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21581 } 21582 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_eq_8_subtile)21583 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile) { 21584 TEST_REQUIRES_ARM_NEON; 21585 for (uint32_t n = 1; n <= 8; n++) { 21586 for (uint32_t m = 1; m <= 4; m++) { 21587 GemmMicrokernelTester() 21588 .mr(4) 21589 .nr(8) 21590 .kr(4) 21591 .sr(1) 21592 .m(m) 21593 .n(n) 21594 .k(8) 21595 .iterations(1) 21596 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21597 } 21598 } 21599 } 21600 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_eq_8_subtile_m)21601 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) { 21602 TEST_REQUIRES_ARM_NEON; 21603 for (uint32_t m = 1; m <= 4; m++) { 21604 GemmMicrokernelTester() 21605 .mr(4) 21606 .nr(8) 21607 .kr(4) 21608 .sr(1) 21609 .m(m) 21610 .n(8) 21611 .k(8) 21612 .iterations(1) 21613 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21614 } 21615 } 21616 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_eq_8_subtile_n)21617 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) { 21618 TEST_REQUIRES_ARM_NEON; 21619 for (uint32_t n = 1; n <= 8; n++) { 21620 GemmMicrokernelTester() 21621 .mr(4) 21622 .nr(8) 21623 .kr(4) 21624 .sr(1) 21625 .m(4) 21626 .n(n) 21627 .k(8) 21628 .iterations(1) 21629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21630 } 21631 } 21632 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_lt_8)21633 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_lt_8) { 21634 TEST_REQUIRES_ARM_NEON; 21635 for (size_t k = 1; k < 8; k++) { 21636 GemmMicrokernelTester() 21637 .mr(4) 21638 .nr(8) 21639 .kr(4) 21640 .sr(1) 21641 .m(4) 21642 .n(8) 21643 .k(k) 21644 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21645 } 21646 } 21647 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_lt_8_subtile)21648 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_lt_8_subtile) { 21649 TEST_REQUIRES_ARM_NEON; 21650 for (size_t k = 1; k < 8; k++) { 21651 for (uint32_t n = 1; n <= 8; n++) { 21652 for (uint32_t m = 1; m <= 4; m++) { 21653 GemmMicrokernelTester() 21654 .mr(4) 21655 .nr(8) 21656 .kr(4) 21657 .sr(1) 21658 .m(m) 21659 .n(n) 21660 .k(k) 21661 .iterations(1) 21662 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21663 } 21664 } 21665 } 21666 } 21667 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_gt_8)21668 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_gt_8) { 21669 TEST_REQUIRES_ARM_NEON; 21670 for (size_t k = 9; k < 16; k++) { 21671 GemmMicrokernelTester() 21672 .mr(4) 21673 .nr(8) 21674 .kr(4) 21675 .sr(1) 21676 .m(4) 21677 .n(8) 21678 .k(k) 21679 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21680 } 21681 } 21682 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_gt_8_subtile)21683 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_gt_8_subtile) { 21684 TEST_REQUIRES_ARM_NEON; 21685 for (size_t k = 9; k < 16; k++) { 21686 for (uint32_t n = 1; n <= 8; n++) { 21687 for (uint32_t m = 1; m <= 4; m++) { 21688 GemmMicrokernelTester() 21689 .mr(4) 21690 .nr(8) 21691 .kr(4) 21692 .sr(1) 21693 .m(m) 21694 .n(n) 21695 .k(k) 21696 .iterations(1) 21697 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21698 } 21699 } 21700 } 21701 } 21702 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_div_8)21703 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_div_8) { 21704 TEST_REQUIRES_ARM_NEON; 21705 for (size_t k = 16; k <= 80; k += 8) { 21706 GemmMicrokernelTester() 21707 .mr(4) 21708 .nr(8) 21709 .kr(4) 21710 .sr(1) 21711 .m(4) 21712 .n(8) 21713 .k(k) 21714 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21715 } 21716 } 21717 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,k_div_8_subtile)21718 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_div_8_subtile) { 21719 TEST_REQUIRES_ARM_NEON; 21720 for (size_t k = 16; k <= 80; k += 8) { 21721 for (uint32_t n = 1; n <= 8; n++) { 21722 for (uint32_t m = 1; m <= 4; m++) { 21723 GemmMicrokernelTester() 21724 .mr(4) 21725 .nr(8) 21726 .kr(4) 21727 .sr(1) 21728 .m(m) 21729 .n(n) 21730 .k(k) 21731 .iterations(1) 21732 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21733 } 21734 } 21735 } 21736 } 21737 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,n_gt_8)21738 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8) { 21739 TEST_REQUIRES_ARM_NEON; 21740 for (uint32_t n = 9; n < 16; n++) { 21741 for (size_t k = 1; k <= 40; k += 9) { 21742 GemmMicrokernelTester() 21743 .mr(4) 21744 .nr(8) 21745 .kr(4) 21746 .sr(1) 21747 .m(4) 21748 .n(n) 21749 .k(k) 21750 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21751 } 21752 } 21753 } 21754 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,n_gt_8_strided_cn)21755 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) { 21756 TEST_REQUIRES_ARM_NEON; 21757 for (uint32_t n = 9; n < 16; n++) { 21758 for (size_t k = 1; k <= 40; k += 9) { 21759 GemmMicrokernelTester() 21760 .mr(4) 21761 .nr(8) 21762 .kr(4) 21763 .sr(1) 21764 .m(4) 21765 .n(n) 21766 .k(k) 21767 .cn_stride(11) 21768 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21769 } 21770 } 21771 } 21772 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,n_gt_8_subtile)21773 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_subtile) { 21774 TEST_REQUIRES_ARM_NEON; 21775 for (uint32_t n = 9; n < 16; n++) { 21776 for (size_t k = 1; k <= 40; k += 9) { 21777 for (uint32_t m = 1; m <= 4; m++) { 21778 GemmMicrokernelTester() 21779 .mr(4) 21780 .nr(8) 21781 .kr(4) 21782 .sr(1) 21783 .m(m) 21784 .n(n) 21785 .k(k) 21786 .iterations(1) 21787 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21788 } 21789 } 21790 } 21791 } 21792 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,n_div_8)21793 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8) { 21794 TEST_REQUIRES_ARM_NEON; 21795 for (uint32_t n = 16; n <= 24; n += 8) { 21796 for (size_t k = 1; k <= 40; k += 9) { 21797 GemmMicrokernelTester() 21798 .mr(4) 21799 .nr(8) 21800 .kr(4) 21801 .sr(1) 21802 .m(4) 21803 .n(n) 21804 .k(k) 21805 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21806 } 21807 } 21808 } 21809 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,n_div_8_strided_cn)21810 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) { 21811 TEST_REQUIRES_ARM_NEON; 21812 for (uint32_t n = 16; n <= 24; n += 8) { 21813 for (size_t k = 1; k <= 40; k += 9) { 21814 GemmMicrokernelTester() 21815 .mr(4) 21816 .nr(8) 21817 .kr(4) 21818 .sr(1) 21819 .m(4) 21820 .n(n) 21821 .k(k) 21822 .cn_stride(11) 21823 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21824 } 21825 } 21826 } 21827 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,n_div_8_subtile)21828 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_subtile) { 21829 TEST_REQUIRES_ARM_NEON; 21830 for (uint32_t n = 16; n <= 24; n += 8) { 21831 for (size_t k = 1; k <= 40; k += 9) { 21832 for (uint32_t m = 1; m <= 4; m++) { 21833 GemmMicrokernelTester() 21834 .mr(4) 21835 .nr(8) 21836 .kr(4) 21837 .sr(1) 21838 .m(m) 21839 .n(n) 21840 .k(k) 21841 .iterations(1) 21842 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21843 } 21844 } 21845 } 21846 } 21847 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,small_kernel)21848 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, small_kernel) { 21849 TEST_REQUIRES_ARM_NEON; 21850 for (size_t k = 1; k <= 40; k += 9) { 21851 GemmMicrokernelTester() 21852 .mr(4) 21853 .nr(8) 21854 .kr(4) 21855 .sr(1) 21856 .m(4) 21857 .n(8) 21858 .k(k) 21859 .ks(3) 21860 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21861 } 21862 } 21863 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,small_kernel_subtile)21864 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, small_kernel_subtile) { 21865 TEST_REQUIRES_ARM_NEON; 21866 for (size_t k = 1; k <= 40; k += 9) { 21867 for (uint32_t n = 1; n <= 8; n++) { 21868 for (uint32_t m = 1; m <= 4; m++) { 21869 GemmMicrokernelTester() 21870 .mr(4) 21871 .nr(8) 21872 .kr(4) 21873 .sr(1) 21874 .m(m) 21875 .n(n) 21876 .k(k) 21877 .ks(3) 21878 .iterations(1) 21879 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21880 } 21881 } 21882 } 21883 } 21884 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,n_gt_8_small_kernel)21885 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) { 21886 TEST_REQUIRES_ARM_NEON; 21887 for (uint32_t n = 9; n < 16; n++) { 21888 for (size_t k = 1; k <= 40; k += 9) { 21889 GemmMicrokernelTester() 21890 .mr(4) 21891 .nr(8) 21892 .kr(4) 21893 .sr(1) 21894 .m(4) 21895 .n(n) 21896 .k(k) 21897 .ks(3) 21898 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21899 } 21900 } 21901 } 21902 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,n_div_8_small_kernel)21903 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) { 21904 TEST_REQUIRES_ARM_NEON; 21905 for (uint32_t n = 16; n <= 24; n += 8) { 21906 for (size_t k = 1; k <= 40; k += 9) { 21907 GemmMicrokernelTester() 21908 .mr(4) 21909 .nr(8) 21910 .kr(4) 21911 .sr(1) 21912 .m(4) 21913 .n(n) 21914 .k(k) 21915 .ks(3) 21916 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21917 } 21918 } 21919 } 21920 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,strided_cm_subtile)21921 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cm_subtile) { 21922 TEST_REQUIRES_ARM_NEON; 21923 for (size_t k = 1; k <= 40; k += 9) { 21924 for (uint32_t n = 1; n <= 8; n++) { 21925 for (uint32_t m = 1; m <= 4; m++) { 21926 GemmMicrokernelTester() 21927 .mr(4) 21928 .nr(8) 21929 .kr(4) 21930 .sr(1) 21931 .m(m) 21932 .n(n) 21933 .k(k) 21934 .cm_stride(11) 21935 .iterations(1) 21936 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21937 } 21938 } 21939 } 21940 } 21941 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,a_offset)21942 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, a_offset) { 21943 TEST_REQUIRES_ARM_NEON; 21944 for (size_t k = 1; k <= 40; k += 9) { 21945 GemmMicrokernelTester() 21946 .mr(4) 21947 .nr(8) 21948 .kr(4) 21949 .sr(1) 21950 .m(4) 21951 .n(8) 21952 .k(k) 21953 .ks(3) 21954 .a_offset(163) 21955 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21956 } 21957 } 21958 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,zero)21959 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, zero) { 21960 TEST_REQUIRES_ARM_NEON; 21961 for (size_t k = 1; k <= 40; k += 9) { 21962 for (uint32_t mz = 0; mz < 4; mz++) { 21963 GemmMicrokernelTester() 21964 .mr(4) 21965 .nr(8) 21966 .kr(4) 21967 .sr(1) 21968 .m(4) 21969 .n(8) 21970 .k(k) 21971 .ks(3) 21972 .a_offset(163) 21973 .zero_index(mz) 21974 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21975 } 21976 } 21977 } 21978 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,qmin)21979 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, qmin) { 21980 TEST_REQUIRES_ARM_NEON; 21981 GemmMicrokernelTester() 21982 .mr(4) 21983 .nr(8) 21984 .kr(4) 21985 .sr(1) 21986 .m(4) 21987 .n(8) 21988 .k(8) 21989 .qmin(128) 21990 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 21991 } 21992 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,qmax)21993 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, qmax) { 21994 TEST_REQUIRES_ARM_NEON; 21995 GemmMicrokernelTester() 21996 .mr(4) 21997 .nr(8) 21998 .kr(4) 21999 .sr(1) 22000 .m(4) 22001 .n(8) 22002 .k(8) 22003 .qmax(128) 22004 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22005 } 22006 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R,strided_cm)22007 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cm) { 22008 TEST_REQUIRES_ARM_NEON; 22009 GemmMicrokernelTester() 22010 .mr(4) 22011 .nr(8) 22012 .kr(4) 22013 .sr(1) 22014 .m(4) 22015 .n(8) 22016 .k(8) 22017 .cm_stride(11) 22018 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22019 } 22020 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 22021 22022 22023 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_eq_16)22024 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16) { 22025 TEST_REQUIRES_ARM_NEON; 22026 GemmMicrokernelTester() 22027 .mr(4) 22028 .nr(8) 22029 .kr(4) 22030 .sr(2) 22031 .m(4) 22032 .n(8) 22033 .k(16) 22034 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22035 } 22036 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,strided_cn)22037 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cn) { 22038 TEST_REQUIRES_ARM_NEON; 22039 GemmMicrokernelTester() 22040 .mr(4) 22041 .nr(8) 22042 .kr(4) 22043 .sr(2) 22044 .m(4) 22045 .n(8) 22046 .k(16) 22047 .cn_stride(11) 22048 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22049 } 22050 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_eq_16_subtile)22051 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile) { 22052 TEST_REQUIRES_ARM_NEON; 22053 for (uint32_t n = 1; n <= 8; n++) { 22054 for (uint32_t m = 1; m <= 4; m++) { 22055 GemmMicrokernelTester() 22056 .mr(4) 22057 .nr(8) 22058 .kr(4) 22059 .sr(2) 22060 .m(m) 22061 .n(n) 22062 .k(16) 22063 .iterations(1) 22064 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22065 } 22066 } 22067 } 22068 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_eq_16_subtile_m)22069 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile_m) { 22070 TEST_REQUIRES_ARM_NEON; 22071 for (uint32_t m = 1; m <= 4; m++) { 22072 GemmMicrokernelTester() 22073 .mr(4) 22074 .nr(8) 22075 .kr(4) 22076 .sr(2) 22077 .m(m) 22078 .n(8) 22079 .k(16) 22080 .iterations(1) 22081 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22082 } 22083 } 22084 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_eq_16_subtile_n)22085 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_eq_16_subtile_n) { 22086 TEST_REQUIRES_ARM_NEON; 22087 for (uint32_t n = 1; n <= 8; n++) { 22088 GemmMicrokernelTester() 22089 .mr(4) 22090 .nr(8) 22091 .kr(4) 22092 .sr(2) 22093 .m(4) 22094 .n(n) 22095 .k(16) 22096 .iterations(1) 22097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22098 } 22099 } 22100 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_lt_16)22101 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_lt_16) { 22102 TEST_REQUIRES_ARM_NEON; 22103 for (size_t k = 1; k < 16; k++) { 22104 GemmMicrokernelTester() 22105 .mr(4) 22106 .nr(8) 22107 .kr(4) 22108 .sr(2) 22109 .m(4) 22110 .n(8) 22111 .k(k) 22112 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22113 } 22114 } 22115 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_lt_16_subtile)22116 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_lt_16_subtile) { 22117 TEST_REQUIRES_ARM_NEON; 22118 for (size_t k = 1; k < 16; k++) { 22119 for (uint32_t n = 1; n <= 8; n++) { 22120 for (uint32_t m = 1; m <= 4; m++) { 22121 GemmMicrokernelTester() 22122 .mr(4) 22123 .nr(8) 22124 .kr(4) 22125 .sr(2) 22126 .m(m) 22127 .n(n) 22128 .k(k) 22129 .iterations(1) 22130 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22131 } 22132 } 22133 } 22134 } 22135 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_gt_16)22136 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_gt_16) { 22137 TEST_REQUIRES_ARM_NEON; 22138 for (size_t k = 17; k < 32; k++) { 22139 GemmMicrokernelTester() 22140 .mr(4) 22141 .nr(8) 22142 .kr(4) 22143 .sr(2) 22144 .m(4) 22145 .n(8) 22146 .k(k) 22147 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22148 } 22149 } 22150 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_gt_16_subtile)22151 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_gt_16_subtile) { 22152 TEST_REQUIRES_ARM_NEON; 22153 for (size_t k = 17; k < 32; k++) { 22154 for (uint32_t n = 1; n <= 8; n++) { 22155 for (uint32_t m = 1; m <= 4; m++) { 22156 GemmMicrokernelTester() 22157 .mr(4) 22158 .nr(8) 22159 .kr(4) 22160 .sr(2) 22161 .m(m) 22162 .n(n) 22163 .k(k) 22164 .iterations(1) 22165 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22166 } 22167 } 22168 } 22169 } 22170 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_div_16)22171 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_div_16) { 22172 TEST_REQUIRES_ARM_NEON; 22173 for (size_t k = 32; k <= 160; k += 16) { 22174 GemmMicrokernelTester() 22175 .mr(4) 22176 .nr(8) 22177 .kr(4) 22178 .sr(2) 22179 .m(4) 22180 .n(8) 22181 .k(k) 22182 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22183 } 22184 } 22185 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,k_div_16_subtile)22186 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, k_div_16_subtile) { 22187 TEST_REQUIRES_ARM_NEON; 22188 for (size_t k = 32; k <= 160; k += 16) { 22189 for (uint32_t n = 1; n <= 8; n++) { 22190 for (uint32_t m = 1; m <= 4; m++) { 22191 GemmMicrokernelTester() 22192 .mr(4) 22193 .nr(8) 22194 .kr(4) 22195 .sr(2) 22196 .m(m) 22197 .n(n) 22198 .k(k) 22199 .iterations(1) 22200 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22201 } 22202 } 22203 } 22204 } 22205 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,n_gt_8)22206 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8) { 22207 TEST_REQUIRES_ARM_NEON; 22208 for (uint32_t n = 9; n < 16; n++) { 22209 for (size_t k = 1; k <= 80; k += 17) { 22210 GemmMicrokernelTester() 22211 .mr(4) 22212 .nr(8) 22213 .kr(4) 22214 .sr(2) 22215 .m(4) 22216 .n(n) 22217 .k(k) 22218 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22219 } 22220 } 22221 } 22222 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,n_gt_8_strided_cn)22223 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_strided_cn) { 22224 TEST_REQUIRES_ARM_NEON; 22225 for (uint32_t n = 9; n < 16; n++) { 22226 for (size_t k = 1; k <= 80; k += 17) { 22227 GemmMicrokernelTester() 22228 .mr(4) 22229 .nr(8) 22230 .kr(4) 22231 .sr(2) 22232 .m(4) 22233 .n(n) 22234 .k(k) 22235 .cn_stride(11) 22236 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22237 } 22238 } 22239 } 22240 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,n_gt_8_subtile)22241 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_subtile) { 22242 TEST_REQUIRES_ARM_NEON; 22243 for (uint32_t n = 9; n < 16; n++) { 22244 for (size_t k = 1; k <= 80; k += 17) { 22245 for (uint32_t m = 1; m <= 4; m++) { 22246 GemmMicrokernelTester() 22247 .mr(4) 22248 .nr(8) 22249 .kr(4) 22250 .sr(2) 22251 .m(m) 22252 .n(n) 22253 .k(k) 22254 .iterations(1) 22255 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22256 } 22257 } 22258 } 22259 } 22260 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,n_div_8)22261 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8) { 22262 TEST_REQUIRES_ARM_NEON; 22263 for (uint32_t n = 16; n <= 24; n += 8) { 22264 for (size_t k = 1; k <= 80; k += 17) { 22265 GemmMicrokernelTester() 22266 .mr(4) 22267 .nr(8) 22268 .kr(4) 22269 .sr(2) 22270 .m(4) 22271 .n(n) 22272 .k(k) 22273 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22274 } 22275 } 22276 } 22277 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,n_div_8_strided_cn)22278 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_strided_cn) { 22279 TEST_REQUIRES_ARM_NEON; 22280 for (uint32_t n = 16; n <= 24; n += 8) { 22281 for (size_t k = 1; k <= 80; k += 17) { 22282 GemmMicrokernelTester() 22283 .mr(4) 22284 .nr(8) 22285 .kr(4) 22286 .sr(2) 22287 .m(4) 22288 .n(n) 22289 .k(k) 22290 .cn_stride(11) 22291 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22292 } 22293 } 22294 } 22295 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,n_div_8_subtile)22296 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_subtile) { 22297 TEST_REQUIRES_ARM_NEON; 22298 for (uint32_t n = 16; n <= 24; n += 8) { 22299 for (size_t k = 1; k <= 80; k += 17) { 22300 for (uint32_t m = 1; m <= 4; m++) { 22301 GemmMicrokernelTester() 22302 .mr(4) 22303 .nr(8) 22304 .kr(4) 22305 .sr(2) 22306 .m(m) 22307 .n(n) 22308 .k(k) 22309 .iterations(1) 22310 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22311 } 22312 } 22313 } 22314 } 22315 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,small_kernel)22316 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, small_kernel) { 22317 TEST_REQUIRES_ARM_NEON; 22318 for (size_t k = 1; k <= 80; k += 17) { 22319 GemmMicrokernelTester() 22320 .mr(4) 22321 .nr(8) 22322 .kr(4) 22323 .sr(2) 22324 .m(4) 22325 .n(8) 22326 .k(k) 22327 .ks(3) 22328 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22329 } 22330 } 22331 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,small_kernel_subtile)22332 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, small_kernel_subtile) { 22333 TEST_REQUIRES_ARM_NEON; 22334 for (size_t k = 1; k <= 80; k += 17) { 22335 for (uint32_t n = 1; n <= 8; n++) { 22336 for (uint32_t m = 1; m <= 4; m++) { 22337 GemmMicrokernelTester() 22338 .mr(4) 22339 .nr(8) 22340 .kr(4) 22341 .sr(2) 22342 .m(m) 22343 .n(n) 22344 .k(k) 22345 .ks(3) 22346 .iterations(1) 22347 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22348 } 22349 } 22350 } 22351 } 22352 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,n_gt_8_small_kernel)22353 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_gt_8_small_kernel) { 22354 TEST_REQUIRES_ARM_NEON; 22355 for (uint32_t n = 9; n < 16; n++) { 22356 for (size_t k = 1; k <= 80; k += 17) { 22357 GemmMicrokernelTester() 22358 .mr(4) 22359 .nr(8) 22360 .kr(4) 22361 .sr(2) 22362 .m(4) 22363 .n(n) 22364 .k(k) 22365 .ks(3) 22366 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22367 } 22368 } 22369 } 22370 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,n_div_8_small_kernel)22371 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, n_div_8_small_kernel) { 22372 TEST_REQUIRES_ARM_NEON; 22373 for (uint32_t n = 16; n <= 24; n += 8) { 22374 for (size_t k = 1; k <= 80; k += 17) { 22375 GemmMicrokernelTester() 22376 .mr(4) 22377 .nr(8) 22378 .kr(4) 22379 .sr(2) 22380 .m(4) 22381 .n(n) 22382 .k(k) 22383 .ks(3) 22384 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22385 } 22386 } 22387 } 22388 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,strided_cm_subtile)22389 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cm_subtile) { 22390 TEST_REQUIRES_ARM_NEON; 22391 for (size_t k = 1; k <= 80; k += 17) { 22392 for (uint32_t n = 1; n <= 8; n++) { 22393 for (uint32_t m = 1; m <= 4; m++) { 22394 GemmMicrokernelTester() 22395 .mr(4) 22396 .nr(8) 22397 .kr(4) 22398 .sr(2) 22399 .m(m) 22400 .n(n) 22401 .k(k) 22402 .cm_stride(11) 22403 .iterations(1) 22404 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22405 } 22406 } 22407 } 22408 } 22409 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,a_offset)22410 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, a_offset) { 22411 TEST_REQUIRES_ARM_NEON; 22412 for (size_t k = 1; k <= 80; k += 17) { 22413 GemmMicrokernelTester() 22414 .mr(4) 22415 .nr(8) 22416 .kr(4) 22417 .sr(2) 22418 .m(4) 22419 .n(8) 22420 .k(k) 22421 .ks(3) 22422 .a_offset(331) 22423 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22424 } 22425 } 22426 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,zero)22427 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, zero) { 22428 TEST_REQUIRES_ARM_NEON; 22429 for (size_t k = 1; k <= 80; k += 17) { 22430 for (uint32_t mz = 0; mz < 4; mz++) { 22431 GemmMicrokernelTester() 22432 .mr(4) 22433 .nr(8) 22434 .kr(4) 22435 .sr(2) 22436 .m(4) 22437 .n(8) 22438 .k(k) 22439 .ks(3) 22440 .a_offset(331) 22441 .zero_index(mz) 22442 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22443 } 22444 } 22445 } 22446 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,qmin)22447 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, qmin) { 22448 TEST_REQUIRES_ARM_NEON; 22449 GemmMicrokernelTester() 22450 .mr(4) 22451 .nr(8) 22452 .kr(4) 22453 .sr(2) 22454 .m(4) 22455 .n(8) 22456 .k(16) 22457 .qmin(128) 22458 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22459 } 22460 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,qmax)22461 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, qmax) { 22462 TEST_REQUIRES_ARM_NEON; 22463 GemmMicrokernelTester() 22464 .mr(4) 22465 .nr(8) 22466 .kr(4) 22467 .sr(2) 22468 .m(4) 22469 .n(8) 22470 .k(16) 22471 .qmax(128) 22472 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22473 } 22474 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL,strided_cm)22475 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MLAL, strided_cm) { 22476 TEST_REQUIRES_ARM_NEON; 22477 GemmMicrokernelTester() 22478 .mr(4) 22479 .nr(8) 22480 .kr(4) 22481 .sr(2) 22482 .m(4) 22483 .n(8) 22484 .k(16) 22485 .cm_stride(11) 22486 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22487 } 22488 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 22489 22490 22491 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_eq_8)22492 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8) { 22493 TEST_REQUIRES_ARM_NEON; 22494 GemmMicrokernelTester() 22495 .mr(4) 22496 .nr(8) 22497 .kr(8) 22498 .sr(1) 22499 .m(4) 22500 .n(8) 22501 .k(8) 22502 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22503 } 22504 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,strided_cn)22505 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cn) { 22506 TEST_REQUIRES_ARM_NEON; 22507 GemmMicrokernelTester() 22508 .mr(4) 22509 .nr(8) 22510 .kr(8) 22511 .sr(1) 22512 .m(4) 22513 .n(8) 22514 .k(8) 22515 .cn_stride(11) 22516 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22517 } 22518 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_eq_8_subtile)22519 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile) { 22520 TEST_REQUIRES_ARM_NEON; 22521 for (uint32_t n = 1; n <= 8; n++) { 22522 for (uint32_t m = 1; m <= 4; m++) { 22523 GemmMicrokernelTester() 22524 .mr(4) 22525 .nr(8) 22526 .kr(8) 22527 .sr(1) 22528 .m(m) 22529 .n(n) 22530 .k(8) 22531 .iterations(1) 22532 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22533 } 22534 } 22535 } 22536 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_eq_8_subtile_m)22537 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile_m) { 22538 TEST_REQUIRES_ARM_NEON; 22539 for (uint32_t m = 1; m <= 4; m++) { 22540 GemmMicrokernelTester() 22541 .mr(4) 22542 .nr(8) 22543 .kr(8) 22544 .sr(1) 22545 .m(m) 22546 .n(8) 22547 .k(8) 22548 .iterations(1) 22549 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22550 } 22551 } 22552 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_eq_8_subtile_n)22553 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile_n) { 22554 TEST_REQUIRES_ARM_NEON; 22555 for (uint32_t n = 1; n <= 8; n++) { 22556 GemmMicrokernelTester() 22557 .mr(4) 22558 .nr(8) 22559 .kr(8) 22560 .sr(1) 22561 .m(4) 22562 .n(n) 22563 .k(8) 22564 .iterations(1) 22565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22566 } 22567 } 22568 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_lt_8)22569 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_lt_8) { 22570 TEST_REQUIRES_ARM_NEON; 22571 for (size_t k = 1; k < 8; k++) { 22572 GemmMicrokernelTester() 22573 .mr(4) 22574 .nr(8) 22575 .kr(8) 22576 .sr(1) 22577 .m(4) 22578 .n(8) 22579 .k(k) 22580 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22581 } 22582 } 22583 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_lt_8_subtile)22584 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_lt_8_subtile) { 22585 TEST_REQUIRES_ARM_NEON; 22586 for (size_t k = 1; k < 8; k++) { 22587 for (uint32_t n = 1; n <= 8; n++) { 22588 for (uint32_t m = 1; m <= 4; m++) { 22589 GemmMicrokernelTester() 22590 .mr(4) 22591 .nr(8) 22592 .kr(8) 22593 .sr(1) 22594 .m(m) 22595 .n(n) 22596 .k(k) 22597 .iterations(1) 22598 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22599 } 22600 } 22601 } 22602 } 22603 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_gt_8)22604 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_gt_8) { 22605 TEST_REQUIRES_ARM_NEON; 22606 for (size_t k = 9; k < 16; k++) { 22607 GemmMicrokernelTester() 22608 .mr(4) 22609 .nr(8) 22610 .kr(8) 22611 .sr(1) 22612 .m(4) 22613 .n(8) 22614 .k(k) 22615 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22616 } 22617 } 22618 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_gt_8_subtile)22619 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_gt_8_subtile) { 22620 TEST_REQUIRES_ARM_NEON; 22621 for (size_t k = 9; k < 16; k++) { 22622 for (uint32_t n = 1; n <= 8; n++) { 22623 for (uint32_t m = 1; m <= 4; m++) { 22624 GemmMicrokernelTester() 22625 .mr(4) 22626 .nr(8) 22627 .kr(8) 22628 .sr(1) 22629 .m(m) 22630 .n(n) 22631 .k(k) 22632 .iterations(1) 22633 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22634 } 22635 } 22636 } 22637 } 22638 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_div_8)22639 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_div_8) { 22640 TEST_REQUIRES_ARM_NEON; 22641 for (size_t k = 16; k <= 80; k += 8) { 22642 GemmMicrokernelTester() 22643 .mr(4) 22644 .nr(8) 22645 .kr(8) 22646 .sr(1) 22647 .m(4) 22648 .n(8) 22649 .k(k) 22650 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22651 } 22652 } 22653 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,k_div_8_subtile)22654 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_div_8_subtile) { 22655 TEST_REQUIRES_ARM_NEON; 22656 for (size_t k = 16; k <= 80; k += 8) { 22657 for (uint32_t n = 1; n <= 8; n++) { 22658 for (uint32_t m = 1; m <= 4; m++) { 22659 GemmMicrokernelTester() 22660 .mr(4) 22661 .nr(8) 22662 .kr(8) 22663 .sr(1) 22664 .m(m) 22665 .n(n) 22666 .k(k) 22667 .iterations(1) 22668 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22669 } 22670 } 22671 } 22672 } 22673 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,n_gt_8)22674 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8) { 22675 TEST_REQUIRES_ARM_NEON; 22676 for (uint32_t n = 9; n < 16; n++) { 22677 for (size_t k = 1; k <= 40; k += 9) { 22678 GemmMicrokernelTester() 22679 .mr(4) 22680 .nr(8) 22681 .kr(8) 22682 .sr(1) 22683 .m(4) 22684 .n(n) 22685 .k(k) 22686 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22687 } 22688 } 22689 } 22690 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,n_gt_8_strided_cn)22691 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_strided_cn) { 22692 TEST_REQUIRES_ARM_NEON; 22693 for (uint32_t n = 9; n < 16; n++) { 22694 for (size_t k = 1; k <= 40; k += 9) { 22695 GemmMicrokernelTester() 22696 .mr(4) 22697 .nr(8) 22698 .kr(8) 22699 .sr(1) 22700 .m(4) 22701 .n(n) 22702 .k(k) 22703 .cn_stride(11) 22704 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22705 } 22706 } 22707 } 22708 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,n_gt_8_subtile)22709 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_subtile) { 22710 TEST_REQUIRES_ARM_NEON; 22711 for (uint32_t n = 9; n < 16; n++) { 22712 for (size_t k = 1; k <= 40; k += 9) { 22713 for (uint32_t m = 1; m <= 4; m++) { 22714 GemmMicrokernelTester() 22715 .mr(4) 22716 .nr(8) 22717 .kr(8) 22718 .sr(1) 22719 .m(m) 22720 .n(n) 22721 .k(k) 22722 .iterations(1) 22723 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22724 } 22725 } 22726 } 22727 } 22728 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,n_div_8)22729 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8) { 22730 TEST_REQUIRES_ARM_NEON; 22731 for (uint32_t n = 16; n <= 24; n += 8) { 22732 for (size_t k = 1; k <= 40; k += 9) { 22733 GemmMicrokernelTester() 22734 .mr(4) 22735 .nr(8) 22736 .kr(8) 22737 .sr(1) 22738 .m(4) 22739 .n(n) 22740 .k(k) 22741 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22742 } 22743 } 22744 } 22745 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,n_div_8_strided_cn)22746 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_strided_cn) { 22747 TEST_REQUIRES_ARM_NEON; 22748 for (uint32_t n = 16; n <= 24; n += 8) { 22749 for (size_t k = 1; k <= 40; k += 9) { 22750 GemmMicrokernelTester() 22751 .mr(4) 22752 .nr(8) 22753 .kr(8) 22754 .sr(1) 22755 .m(4) 22756 .n(n) 22757 .k(k) 22758 .cn_stride(11) 22759 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22760 } 22761 } 22762 } 22763 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,n_div_8_subtile)22764 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_subtile) { 22765 TEST_REQUIRES_ARM_NEON; 22766 for (uint32_t n = 16; n <= 24; n += 8) { 22767 for (size_t k = 1; k <= 40; k += 9) { 22768 for (uint32_t m = 1; m <= 4; m++) { 22769 GemmMicrokernelTester() 22770 .mr(4) 22771 .nr(8) 22772 .kr(8) 22773 .sr(1) 22774 .m(m) 22775 .n(n) 22776 .k(k) 22777 .iterations(1) 22778 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22779 } 22780 } 22781 } 22782 } 22783 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,small_kernel)22784 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, small_kernel) { 22785 TEST_REQUIRES_ARM_NEON; 22786 for (size_t k = 1; k <= 40; k += 9) { 22787 GemmMicrokernelTester() 22788 .mr(4) 22789 .nr(8) 22790 .kr(8) 22791 .sr(1) 22792 .m(4) 22793 .n(8) 22794 .k(k) 22795 .ks(3) 22796 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22797 } 22798 } 22799 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,small_kernel_subtile)22800 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, small_kernel_subtile) { 22801 TEST_REQUIRES_ARM_NEON; 22802 for (size_t k = 1; k <= 40; k += 9) { 22803 for (uint32_t n = 1; n <= 8; n++) { 22804 for (uint32_t m = 1; m <= 4; m++) { 22805 GemmMicrokernelTester() 22806 .mr(4) 22807 .nr(8) 22808 .kr(8) 22809 .sr(1) 22810 .m(m) 22811 .n(n) 22812 .k(k) 22813 .ks(3) 22814 .iterations(1) 22815 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22816 } 22817 } 22818 } 22819 } 22820 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,n_gt_8_small_kernel)22821 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_small_kernel) { 22822 TEST_REQUIRES_ARM_NEON; 22823 for (uint32_t n = 9; n < 16; n++) { 22824 for (size_t k = 1; k <= 40; k += 9) { 22825 GemmMicrokernelTester() 22826 .mr(4) 22827 .nr(8) 22828 .kr(8) 22829 .sr(1) 22830 .m(4) 22831 .n(n) 22832 .k(k) 22833 .ks(3) 22834 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22835 } 22836 } 22837 } 22838 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,n_div_8_small_kernel)22839 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_small_kernel) { 22840 TEST_REQUIRES_ARM_NEON; 22841 for (uint32_t n = 16; n <= 24; n += 8) { 22842 for (size_t k = 1; k <= 40; k += 9) { 22843 GemmMicrokernelTester() 22844 .mr(4) 22845 .nr(8) 22846 .kr(8) 22847 .sr(1) 22848 .m(4) 22849 .n(n) 22850 .k(k) 22851 .ks(3) 22852 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22853 } 22854 } 22855 } 22856 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,strided_cm_subtile)22857 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cm_subtile) { 22858 TEST_REQUIRES_ARM_NEON; 22859 for (size_t k = 1; k <= 40; k += 9) { 22860 for (uint32_t n = 1; n <= 8; n++) { 22861 for (uint32_t m = 1; m <= 4; m++) { 22862 GemmMicrokernelTester() 22863 .mr(4) 22864 .nr(8) 22865 .kr(8) 22866 .sr(1) 22867 .m(m) 22868 .n(n) 22869 .k(k) 22870 .cm_stride(11) 22871 .iterations(1) 22872 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22873 } 22874 } 22875 } 22876 } 22877 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,a_offset)22878 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, a_offset) { 22879 TEST_REQUIRES_ARM_NEON; 22880 for (size_t k = 1; k <= 40; k += 9) { 22881 GemmMicrokernelTester() 22882 .mr(4) 22883 .nr(8) 22884 .kr(8) 22885 .sr(1) 22886 .m(4) 22887 .n(8) 22888 .k(k) 22889 .ks(3) 22890 .a_offset(163) 22891 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22892 } 22893 } 22894 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,zero)22895 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, zero) { 22896 TEST_REQUIRES_ARM_NEON; 22897 for (size_t k = 1; k <= 40; k += 9) { 22898 for (uint32_t mz = 0; mz < 4; mz++) { 22899 GemmMicrokernelTester() 22900 .mr(4) 22901 .nr(8) 22902 .kr(8) 22903 .sr(1) 22904 .m(4) 22905 .n(8) 22906 .k(k) 22907 .ks(3) 22908 .a_offset(163) 22909 .zero_index(mz) 22910 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22911 } 22912 } 22913 } 22914 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,qmin)22915 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, qmin) { 22916 TEST_REQUIRES_ARM_NEON; 22917 GemmMicrokernelTester() 22918 .mr(4) 22919 .nr(8) 22920 .kr(8) 22921 .sr(1) 22922 .m(4) 22923 .n(8) 22924 .k(8) 22925 .qmin(128) 22926 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22927 } 22928 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,qmax)22929 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, qmax) { 22930 TEST_REQUIRES_ARM_NEON; 22931 GemmMicrokernelTester() 22932 .mr(4) 22933 .nr(8) 22934 .kr(8) 22935 .sr(1) 22936 .m(4) 22937 .n(8) 22938 .k(8) 22939 .qmax(128) 22940 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22941 } 22942 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL,strided_cm)22943 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cm) { 22944 TEST_REQUIRES_ARM_NEON; 22945 GemmMicrokernelTester() 22946 .mr(4) 22947 .nr(8) 22948 .kr(8) 22949 .sr(1) 22950 .m(4) 22951 .n(8) 22952 .k(8) 22953 .cm_stride(11) 22954 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22955 } 22956 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 22957 22958 22959 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_eq_16)22960 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_eq_16) { 22961 TEST_REQUIRES_ARM_NEON; 22962 GemmMicrokernelTester() 22963 .mr(4) 22964 .nr(8) 22965 .kr(16) 22966 .sr(1) 22967 .m(4) 22968 .n(8) 22969 .k(16) 22970 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22971 } 22972 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,strided_cn)22973 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, strided_cn) { 22974 TEST_REQUIRES_ARM_NEON; 22975 GemmMicrokernelTester() 22976 .mr(4) 22977 .nr(8) 22978 .kr(16) 22979 .sr(1) 22980 .m(4) 22981 .n(8) 22982 .k(16) 22983 .cn_stride(11) 22984 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 22985 } 22986 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_eq_16_subtile)22987 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_eq_16_subtile) { 22988 TEST_REQUIRES_ARM_NEON; 22989 for (uint32_t n = 1; n <= 8; n++) { 22990 for (uint32_t m = 1; m <= 4; m++) { 22991 GemmMicrokernelTester() 22992 .mr(4) 22993 .nr(8) 22994 .kr(16) 22995 .sr(1) 22996 .m(m) 22997 .n(n) 22998 .k(16) 22999 .iterations(1) 23000 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23001 } 23002 } 23003 } 23004 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_eq_16_subtile_m)23005 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_eq_16_subtile_m) { 23006 TEST_REQUIRES_ARM_NEON; 23007 for (uint32_t m = 1; m <= 4; m++) { 23008 GemmMicrokernelTester() 23009 .mr(4) 23010 .nr(8) 23011 .kr(16) 23012 .sr(1) 23013 .m(m) 23014 .n(8) 23015 .k(16) 23016 .iterations(1) 23017 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23018 } 23019 } 23020 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_eq_16_subtile_n)23021 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_eq_16_subtile_n) { 23022 TEST_REQUIRES_ARM_NEON; 23023 for (uint32_t n = 1; n <= 8; n++) { 23024 GemmMicrokernelTester() 23025 .mr(4) 23026 .nr(8) 23027 .kr(16) 23028 .sr(1) 23029 .m(4) 23030 .n(n) 23031 .k(16) 23032 .iterations(1) 23033 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23034 } 23035 } 23036 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_lt_16)23037 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_lt_16) { 23038 TEST_REQUIRES_ARM_NEON; 23039 for (size_t k = 1; k < 16; k++) { 23040 GemmMicrokernelTester() 23041 .mr(4) 23042 .nr(8) 23043 .kr(16) 23044 .sr(1) 23045 .m(4) 23046 .n(8) 23047 .k(k) 23048 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23049 } 23050 } 23051 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_lt_16_subtile)23052 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_lt_16_subtile) { 23053 TEST_REQUIRES_ARM_NEON; 23054 for (size_t k = 1; k < 16; k++) { 23055 for (uint32_t n = 1; n <= 8; n++) { 23056 for (uint32_t m = 1; m <= 4; m++) { 23057 GemmMicrokernelTester() 23058 .mr(4) 23059 .nr(8) 23060 .kr(16) 23061 .sr(1) 23062 .m(m) 23063 .n(n) 23064 .k(k) 23065 .iterations(1) 23066 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23067 } 23068 } 23069 } 23070 } 23071 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_gt_16)23072 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_gt_16) { 23073 TEST_REQUIRES_ARM_NEON; 23074 for (size_t k = 17; k < 32; k++) { 23075 GemmMicrokernelTester() 23076 .mr(4) 23077 .nr(8) 23078 .kr(16) 23079 .sr(1) 23080 .m(4) 23081 .n(8) 23082 .k(k) 23083 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23084 } 23085 } 23086 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_gt_16_subtile)23087 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_gt_16_subtile) { 23088 TEST_REQUIRES_ARM_NEON; 23089 for (size_t k = 17; k < 32; k++) { 23090 for (uint32_t n = 1; n <= 8; n++) { 23091 for (uint32_t m = 1; m <= 4; m++) { 23092 GemmMicrokernelTester() 23093 .mr(4) 23094 .nr(8) 23095 .kr(16) 23096 .sr(1) 23097 .m(m) 23098 .n(n) 23099 .k(k) 23100 .iterations(1) 23101 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23102 } 23103 } 23104 } 23105 } 23106 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_div_16)23107 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_div_16) { 23108 TEST_REQUIRES_ARM_NEON; 23109 for (size_t k = 32; k <= 160; k += 16) { 23110 GemmMicrokernelTester() 23111 .mr(4) 23112 .nr(8) 23113 .kr(16) 23114 .sr(1) 23115 .m(4) 23116 .n(8) 23117 .k(k) 23118 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23119 } 23120 } 23121 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,k_div_16_subtile)23122 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, k_div_16_subtile) { 23123 TEST_REQUIRES_ARM_NEON; 23124 for (size_t k = 32; k <= 160; k += 16) { 23125 for (uint32_t n = 1; n <= 8; n++) { 23126 for (uint32_t m = 1; m <= 4; m++) { 23127 GemmMicrokernelTester() 23128 .mr(4) 23129 .nr(8) 23130 .kr(16) 23131 .sr(1) 23132 .m(m) 23133 .n(n) 23134 .k(k) 23135 .iterations(1) 23136 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23137 } 23138 } 23139 } 23140 } 23141 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,n_gt_8)23142 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_gt_8) { 23143 TEST_REQUIRES_ARM_NEON; 23144 for (uint32_t n = 9; n < 16; n++) { 23145 for (size_t k = 1; k <= 80; k += 17) { 23146 GemmMicrokernelTester() 23147 .mr(4) 23148 .nr(8) 23149 .kr(16) 23150 .sr(1) 23151 .m(4) 23152 .n(n) 23153 .k(k) 23154 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23155 } 23156 } 23157 } 23158 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,n_gt_8_strided_cn)23159 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_gt_8_strided_cn) { 23160 TEST_REQUIRES_ARM_NEON; 23161 for (uint32_t n = 9; n < 16; n++) { 23162 for (size_t k = 1; k <= 80; k += 17) { 23163 GemmMicrokernelTester() 23164 .mr(4) 23165 .nr(8) 23166 .kr(16) 23167 .sr(1) 23168 .m(4) 23169 .n(n) 23170 .k(k) 23171 .cn_stride(11) 23172 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23173 } 23174 } 23175 } 23176 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,n_gt_8_subtile)23177 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_gt_8_subtile) { 23178 TEST_REQUIRES_ARM_NEON; 23179 for (uint32_t n = 9; n < 16; n++) { 23180 for (size_t k = 1; k <= 80; k += 17) { 23181 for (uint32_t m = 1; m <= 4; m++) { 23182 GemmMicrokernelTester() 23183 .mr(4) 23184 .nr(8) 23185 .kr(16) 23186 .sr(1) 23187 .m(m) 23188 .n(n) 23189 .k(k) 23190 .iterations(1) 23191 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23192 } 23193 } 23194 } 23195 } 23196 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,n_div_8)23197 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_div_8) { 23198 TEST_REQUIRES_ARM_NEON; 23199 for (uint32_t n = 16; n <= 24; n += 8) { 23200 for (size_t k = 1; k <= 80; k += 17) { 23201 GemmMicrokernelTester() 23202 .mr(4) 23203 .nr(8) 23204 .kr(16) 23205 .sr(1) 23206 .m(4) 23207 .n(n) 23208 .k(k) 23209 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23210 } 23211 } 23212 } 23213 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,n_div_8_strided_cn)23214 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_div_8_strided_cn) { 23215 TEST_REQUIRES_ARM_NEON; 23216 for (uint32_t n = 16; n <= 24; n += 8) { 23217 for (size_t k = 1; k <= 80; k += 17) { 23218 GemmMicrokernelTester() 23219 .mr(4) 23220 .nr(8) 23221 .kr(16) 23222 .sr(1) 23223 .m(4) 23224 .n(n) 23225 .k(k) 23226 .cn_stride(11) 23227 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23228 } 23229 } 23230 } 23231 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,n_div_8_subtile)23232 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_div_8_subtile) { 23233 TEST_REQUIRES_ARM_NEON; 23234 for (uint32_t n = 16; n <= 24; n += 8) { 23235 for (size_t k = 1; k <= 80; k += 17) { 23236 for (uint32_t m = 1; m <= 4; m++) { 23237 GemmMicrokernelTester() 23238 .mr(4) 23239 .nr(8) 23240 .kr(16) 23241 .sr(1) 23242 .m(m) 23243 .n(n) 23244 .k(k) 23245 .iterations(1) 23246 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23247 } 23248 } 23249 } 23250 } 23251 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,small_kernel)23252 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, small_kernel) { 23253 TEST_REQUIRES_ARM_NEON; 23254 for (size_t k = 1; k <= 80; k += 17) { 23255 GemmMicrokernelTester() 23256 .mr(4) 23257 .nr(8) 23258 .kr(16) 23259 .sr(1) 23260 .m(4) 23261 .n(8) 23262 .k(k) 23263 .ks(3) 23264 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23265 } 23266 } 23267 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,small_kernel_subtile)23268 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, small_kernel_subtile) { 23269 TEST_REQUIRES_ARM_NEON; 23270 for (size_t k = 1; k <= 80; k += 17) { 23271 for (uint32_t n = 1; n <= 8; n++) { 23272 for (uint32_t m = 1; m <= 4; m++) { 23273 GemmMicrokernelTester() 23274 .mr(4) 23275 .nr(8) 23276 .kr(16) 23277 .sr(1) 23278 .m(m) 23279 .n(n) 23280 .k(k) 23281 .ks(3) 23282 .iterations(1) 23283 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23284 } 23285 } 23286 } 23287 } 23288 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,n_gt_8_small_kernel)23289 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_gt_8_small_kernel) { 23290 TEST_REQUIRES_ARM_NEON; 23291 for (uint32_t n = 9; n < 16; n++) { 23292 for (size_t k = 1; k <= 80; k += 17) { 23293 GemmMicrokernelTester() 23294 .mr(4) 23295 .nr(8) 23296 .kr(16) 23297 .sr(1) 23298 .m(4) 23299 .n(n) 23300 .k(k) 23301 .ks(3) 23302 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23303 } 23304 } 23305 } 23306 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,n_div_8_small_kernel)23307 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, n_div_8_small_kernel) { 23308 TEST_REQUIRES_ARM_NEON; 23309 for (uint32_t n = 16; n <= 24; n += 8) { 23310 for (size_t k = 1; k <= 80; k += 17) { 23311 GemmMicrokernelTester() 23312 .mr(4) 23313 .nr(8) 23314 .kr(16) 23315 .sr(1) 23316 .m(4) 23317 .n(n) 23318 .k(k) 23319 .ks(3) 23320 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23321 } 23322 } 23323 } 23324 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,strided_cm_subtile)23325 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, strided_cm_subtile) { 23326 TEST_REQUIRES_ARM_NEON; 23327 for (size_t k = 1; k <= 80; k += 17) { 23328 for (uint32_t n = 1; n <= 8; n++) { 23329 for (uint32_t m = 1; m <= 4; m++) { 23330 GemmMicrokernelTester() 23331 .mr(4) 23332 .nr(8) 23333 .kr(16) 23334 .sr(1) 23335 .m(m) 23336 .n(n) 23337 .k(k) 23338 .cm_stride(11) 23339 .iterations(1) 23340 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23341 } 23342 } 23343 } 23344 } 23345 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,a_offset)23346 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, a_offset) { 23347 TEST_REQUIRES_ARM_NEON; 23348 for (size_t k = 1; k <= 80; k += 17) { 23349 GemmMicrokernelTester() 23350 .mr(4) 23351 .nr(8) 23352 .kr(16) 23353 .sr(1) 23354 .m(4) 23355 .n(8) 23356 .k(k) 23357 .ks(3) 23358 .a_offset(331) 23359 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23360 } 23361 } 23362 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,zero)23363 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, zero) { 23364 TEST_REQUIRES_ARM_NEON; 23365 for (size_t k = 1; k <= 80; k += 17) { 23366 for (uint32_t mz = 0; mz < 4; mz++) { 23367 GemmMicrokernelTester() 23368 .mr(4) 23369 .nr(8) 23370 .kr(16) 23371 .sr(1) 23372 .m(4) 23373 .n(8) 23374 .k(k) 23375 .ks(3) 23376 .a_offset(331) 23377 .zero_index(mz) 23378 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23379 } 23380 } 23381 } 23382 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,qmin)23383 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, qmin) { 23384 TEST_REQUIRES_ARM_NEON; 23385 GemmMicrokernelTester() 23386 .mr(4) 23387 .nr(8) 23388 .kr(16) 23389 .sr(1) 23390 .m(4) 23391 .n(8) 23392 .k(16) 23393 .qmin(128) 23394 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23395 } 23396 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,qmax)23397 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, qmax) { 23398 TEST_REQUIRES_ARM_NEON; 23399 GemmMicrokernelTester() 23400 .mr(4) 23401 .nr(8) 23402 .kr(16) 23403 .sr(1) 23404 .m(4) 23405 .n(8) 23406 .k(16) 23407 .qmax(128) 23408 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23409 } 23410 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL,strided_cm)23411 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C16__NEON_MLAL, strided_cm) { 23412 TEST_REQUIRES_ARM_NEON; 23413 GemmMicrokernelTester() 23414 .mr(4) 23415 .nr(8) 23416 .kr(16) 23417 .sr(1) 23418 .m(4) 23419 .n(8) 23420 .k(16) 23421 .cm_stride(11) 23422 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23423 } 23424 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 23425 23426 23427 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_eq_16)23428 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16) { 23429 TEST_REQUIRES_ARM_NEON; 23430 GemmMicrokernelTester() 23431 .mr(4) 23432 .nr(16) 23433 .kr(2) 23434 .sr(1) 23435 .m(4) 23436 .n(16) 23437 .k(16) 23438 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23439 } 23440 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,strided_cn)23441 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cn) { 23442 TEST_REQUIRES_ARM_NEON; 23443 GemmMicrokernelTester() 23444 .mr(4) 23445 .nr(16) 23446 .kr(2) 23447 .sr(1) 23448 .m(4) 23449 .n(16) 23450 .k(16) 23451 .cn_stride(19) 23452 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23453 } 23454 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_eq_16_subtile)23455 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile) { 23456 TEST_REQUIRES_ARM_NEON; 23457 for (uint32_t n = 1; n <= 16; n++) { 23458 for (uint32_t m = 1; m <= 4; m++) { 23459 GemmMicrokernelTester() 23460 .mr(4) 23461 .nr(16) 23462 .kr(2) 23463 .sr(1) 23464 .m(m) 23465 .n(n) 23466 .k(16) 23467 .iterations(1) 23468 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23469 } 23470 } 23471 } 23472 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_eq_16_subtile_m)23473 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) { 23474 TEST_REQUIRES_ARM_NEON; 23475 for (uint32_t m = 1; m <= 4; m++) { 23476 GemmMicrokernelTester() 23477 .mr(4) 23478 .nr(16) 23479 .kr(2) 23480 .sr(1) 23481 .m(m) 23482 .n(16) 23483 .k(16) 23484 .iterations(1) 23485 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23486 } 23487 } 23488 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_eq_16_subtile_n)23489 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) { 23490 TEST_REQUIRES_ARM_NEON; 23491 for (uint32_t n = 1; n <= 16; n++) { 23492 GemmMicrokernelTester() 23493 .mr(4) 23494 .nr(16) 23495 .kr(2) 23496 .sr(1) 23497 .m(4) 23498 .n(n) 23499 .k(16) 23500 .iterations(1) 23501 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23502 } 23503 } 23504 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_lt_16)23505 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_lt_16) { 23506 TEST_REQUIRES_ARM_NEON; 23507 for (size_t k = 1; k < 16; k++) { 23508 GemmMicrokernelTester() 23509 .mr(4) 23510 .nr(16) 23511 .kr(2) 23512 .sr(1) 23513 .m(4) 23514 .n(16) 23515 .k(k) 23516 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23517 } 23518 } 23519 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_lt_16_subtile)23520 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_lt_16_subtile) { 23521 TEST_REQUIRES_ARM_NEON; 23522 for (size_t k = 1; k < 16; k++) { 23523 for (uint32_t n = 1; n <= 16; n++) { 23524 for (uint32_t m = 1; m <= 4; m++) { 23525 GemmMicrokernelTester() 23526 .mr(4) 23527 .nr(16) 23528 .kr(2) 23529 .sr(1) 23530 .m(m) 23531 .n(n) 23532 .k(k) 23533 .iterations(1) 23534 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23535 } 23536 } 23537 } 23538 } 23539 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_gt_16)23540 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_gt_16) { 23541 TEST_REQUIRES_ARM_NEON; 23542 for (size_t k = 17; k < 32; k++) { 23543 GemmMicrokernelTester() 23544 .mr(4) 23545 .nr(16) 23546 .kr(2) 23547 .sr(1) 23548 .m(4) 23549 .n(16) 23550 .k(k) 23551 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23552 } 23553 } 23554 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_gt_16_subtile)23555 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_gt_16_subtile) { 23556 TEST_REQUIRES_ARM_NEON; 23557 for (size_t k = 17; k < 32; k++) { 23558 for (uint32_t n = 1; n <= 16; n++) { 23559 for (uint32_t m = 1; m <= 4; m++) { 23560 GemmMicrokernelTester() 23561 .mr(4) 23562 .nr(16) 23563 .kr(2) 23564 .sr(1) 23565 .m(m) 23566 .n(n) 23567 .k(k) 23568 .iterations(1) 23569 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23570 } 23571 } 23572 } 23573 } 23574 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_div_16)23575 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_div_16) { 23576 TEST_REQUIRES_ARM_NEON; 23577 for (size_t k = 32; k <= 160; k += 16) { 23578 GemmMicrokernelTester() 23579 .mr(4) 23580 .nr(16) 23581 .kr(2) 23582 .sr(1) 23583 .m(4) 23584 .n(16) 23585 .k(k) 23586 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23587 } 23588 } 23589 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,k_div_16_subtile)23590 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, k_div_16_subtile) { 23591 TEST_REQUIRES_ARM_NEON; 23592 for (size_t k = 32; k <= 160; k += 16) { 23593 for (uint32_t n = 1; n <= 16; n++) { 23594 for (uint32_t m = 1; m <= 4; m++) { 23595 GemmMicrokernelTester() 23596 .mr(4) 23597 .nr(16) 23598 .kr(2) 23599 .sr(1) 23600 .m(m) 23601 .n(n) 23602 .k(k) 23603 .iterations(1) 23604 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23605 } 23606 } 23607 } 23608 } 23609 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,n_gt_16)23610 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16) { 23611 TEST_REQUIRES_ARM_NEON; 23612 for (uint32_t n = 17; n < 32; n++) { 23613 for (size_t k = 1; k <= 80; k += 17) { 23614 GemmMicrokernelTester() 23615 .mr(4) 23616 .nr(16) 23617 .kr(2) 23618 .sr(1) 23619 .m(4) 23620 .n(n) 23621 .k(k) 23622 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23623 } 23624 } 23625 } 23626 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,n_gt_16_strided_cn)23627 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) { 23628 TEST_REQUIRES_ARM_NEON; 23629 for (uint32_t n = 17; n < 32; n++) { 23630 for (size_t k = 1; k <= 80; k += 17) { 23631 GemmMicrokernelTester() 23632 .mr(4) 23633 .nr(16) 23634 .kr(2) 23635 .sr(1) 23636 .m(4) 23637 .n(n) 23638 .k(k) 23639 .cn_stride(19) 23640 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23641 } 23642 } 23643 } 23644 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,n_gt_16_subtile)23645 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_subtile) { 23646 TEST_REQUIRES_ARM_NEON; 23647 for (uint32_t n = 17; n < 32; n++) { 23648 for (size_t k = 1; k <= 80; k += 17) { 23649 for (uint32_t m = 1; m <= 4; m++) { 23650 GemmMicrokernelTester() 23651 .mr(4) 23652 .nr(16) 23653 .kr(2) 23654 .sr(1) 23655 .m(m) 23656 .n(n) 23657 .k(k) 23658 .iterations(1) 23659 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23660 } 23661 } 23662 } 23663 } 23664 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,n_div_16)23665 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16) { 23666 TEST_REQUIRES_ARM_NEON; 23667 for (uint32_t n = 32; n <= 48; n += 16) { 23668 for (size_t k = 1; k <= 80; k += 17) { 23669 GemmMicrokernelTester() 23670 .mr(4) 23671 .nr(16) 23672 .kr(2) 23673 .sr(1) 23674 .m(4) 23675 .n(n) 23676 .k(k) 23677 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23678 } 23679 } 23680 } 23681 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,n_div_16_strided_cn)23682 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) { 23683 TEST_REQUIRES_ARM_NEON; 23684 for (uint32_t n = 32; n <= 48; n += 16) { 23685 for (size_t k = 1; k <= 80; k += 17) { 23686 GemmMicrokernelTester() 23687 .mr(4) 23688 .nr(16) 23689 .kr(2) 23690 .sr(1) 23691 .m(4) 23692 .n(n) 23693 .k(k) 23694 .cn_stride(19) 23695 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23696 } 23697 } 23698 } 23699 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,n_div_16_subtile)23700 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_subtile) { 23701 TEST_REQUIRES_ARM_NEON; 23702 for (uint32_t n = 32; n <= 48; n += 16) { 23703 for (size_t k = 1; k <= 80; k += 17) { 23704 for (uint32_t m = 1; m <= 4; m++) { 23705 GemmMicrokernelTester() 23706 .mr(4) 23707 .nr(16) 23708 .kr(2) 23709 .sr(1) 23710 .m(m) 23711 .n(n) 23712 .k(k) 23713 .iterations(1) 23714 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23715 } 23716 } 23717 } 23718 } 23719 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,small_kernel)23720 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, small_kernel) { 23721 TEST_REQUIRES_ARM_NEON; 23722 for (size_t k = 1; k <= 80; k += 17) { 23723 GemmMicrokernelTester() 23724 .mr(4) 23725 .nr(16) 23726 .kr(2) 23727 .sr(1) 23728 .m(4) 23729 .n(16) 23730 .k(k) 23731 .ks(3) 23732 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23733 } 23734 } 23735 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,small_kernel_subtile)23736 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, small_kernel_subtile) { 23737 TEST_REQUIRES_ARM_NEON; 23738 for (size_t k = 1; k <= 80; k += 17) { 23739 for (uint32_t n = 1; n <= 16; n++) { 23740 for (uint32_t m = 1; m <= 4; m++) { 23741 GemmMicrokernelTester() 23742 .mr(4) 23743 .nr(16) 23744 .kr(2) 23745 .sr(1) 23746 .m(m) 23747 .n(n) 23748 .k(k) 23749 .ks(3) 23750 .iterations(1) 23751 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23752 } 23753 } 23754 } 23755 } 23756 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,n_gt_16_small_kernel)23757 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) { 23758 TEST_REQUIRES_ARM_NEON; 23759 for (uint32_t n = 17; n < 32; n++) { 23760 for (size_t k = 1; k <= 80; k += 17) { 23761 GemmMicrokernelTester() 23762 .mr(4) 23763 .nr(16) 23764 .kr(2) 23765 .sr(1) 23766 .m(4) 23767 .n(n) 23768 .k(k) 23769 .ks(3) 23770 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23771 } 23772 } 23773 } 23774 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,n_div_16_small_kernel)23775 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) { 23776 TEST_REQUIRES_ARM_NEON; 23777 for (uint32_t n = 32; n <= 48; n += 16) { 23778 for (size_t k = 1; k <= 80; k += 17) { 23779 GemmMicrokernelTester() 23780 .mr(4) 23781 .nr(16) 23782 .kr(2) 23783 .sr(1) 23784 .m(4) 23785 .n(n) 23786 .k(k) 23787 .ks(3) 23788 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23789 } 23790 } 23791 } 23792 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,strided_cm_subtile)23793 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cm_subtile) { 23794 TEST_REQUIRES_ARM_NEON; 23795 for (size_t k = 1; k <= 80; k += 17) { 23796 for (uint32_t n = 1; n <= 16; n++) { 23797 for (uint32_t m = 1; m <= 4; m++) { 23798 GemmMicrokernelTester() 23799 .mr(4) 23800 .nr(16) 23801 .kr(2) 23802 .sr(1) 23803 .m(m) 23804 .n(n) 23805 .k(k) 23806 .cm_stride(19) 23807 .iterations(1) 23808 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23809 } 23810 } 23811 } 23812 } 23813 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,a_offset)23814 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, a_offset) { 23815 TEST_REQUIRES_ARM_NEON; 23816 for (size_t k = 1; k <= 80; k += 17) { 23817 GemmMicrokernelTester() 23818 .mr(4) 23819 .nr(16) 23820 .kr(2) 23821 .sr(1) 23822 .m(4) 23823 .n(16) 23824 .k(k) 23825 .ks(3) 23826 .a_offset(331) 23827 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23828 } 23829 } 23830 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,zero)23831 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, zero) { 23832 TEST_REQUIRES_ARM_NEON; 23833 for (size_t k = 1; k <= 80; k += 17) { 23834 for (uint32_t mz = 0; mz < 4; mz++) { 23835 GemmMicrokernelTester() 23836 .mr(4) 23837 .nr(16) 23838 .kr(2) 23839 .sr(1) 23840 .m(4) 23841 .n(16) 23842 .k(k) 23843 .ks(3) 23844 .a_offset(331) 23845 .zero_index(mz) 23846 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23847 } 23848 } 23849 } 23850 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,qmin)23851 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, qmin) { 23852 TEST_REQUIRES_ARM_NEON; 23853 GemmMicrokernelTester() 23854 .mr(4) 23855 .nr(16) 23856 .kr(2) 23857 .sr(1) 23858 .m(4) 23859 .n(16) 23860 .k(16) 23861 .qmin(128) 23862 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23863 } 23864 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,qmax)23865 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, qmax) { 23866 TEST_REQUIRES_ARM_NEON; 23867 GemmMicrokernelTester() 23868 .mr(4) 23869 .nr(16) 23870 .kr(2) 23871 .sr(1) 23872 .m(4) 23873 .n(16) 23874 .k(16) 23875 .qmax(128) 23876 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23877 } 23878 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP,strided_cm)23879 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_DUP, strided_cm) { 23880 TEST_REQUIRES_ARM_NEON; 23881 GemmMicrokernelTester() 23882 .mr(4) 23883 .nr(16) 23884 .kr(2) 23885 .sr(1) 23886 .m(4) 23887 .n(16) 23888 .k(16) 23889 .cm_stride(19) 23890 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23891 } 23892 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 23893 23894 23895 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_eq_16)23896 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_eq_16) { 23897 TEST_REQUIRES_ARM_NEON; 23898 GemmMicrokernelTester() 23899 .mr(4) 23900 .nr(16) 23901 .kr(2) 23902 .sr(1) 23903 .m(4) 23904 .n(16) 23905 .k(16) 23906 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23907 } 23908 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,strided_cn)23909 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, strided_cn) { 23910 TEST_REQUIRES_ARM_NEON; 23911 GemmMicrokernelTester() 23912 .mr(4) 23913 .nr(16) 23914 .kr(2) 23915 .sr(1) 23916 .m(4) 23917 .n(16) 23918 .k(16) 23919 .cn_stride(19) 23920 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23921 } 23922 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_eq_16_subtile)23923 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_eq_16_subtile) { 23924 TEST_REQUIRES_ARM_NEON; 23925 for (uint32_t n = 1; n <= 16; n++) { 23926 for (uint32_t m = 1; m <= 4; m++) { 23927 GemmMicrokernelTester() 23928 .mr(4) 23929 .nr(16) 23930 .kr(2) 23931 .sr(1) 23932 .m(m) 23933 .n(n) 23934 .k(16) 23935 .iterations(1) 23936 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23937 } 23938 } 23939 } 23940 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_m)23941 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 23942 TEST_REQUIRES_ARM_NEON; 23943 for (uint32_t m = 1; m <= 4; m++) { 23944 GemmMicrokernelTester() 23945 .mr(4) 23946 .nr(16) 23947 .kr(2) 23948 .sr(1) 23949 .m(m) 23950 .n(16) 23951 .k(16) 23952 .iterations(1) 23953 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23954 } 23955 } 23956 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_eq_16_subtile_n)23957 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 23958 TEST_REQUIRES_ARM_NEON; 23959 for (uint32_t n = 1; n <= 16; n++) { 23960 GemmMicrokernelTester() 23961 .mr(4) 23962 .nr(16) 23963 .kr(2) 23964 .sr(1) 23965 .m(4) 23966 .n(n) 23967 .k(16) 23968 .iterations(1) 23969 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23970 } 23971 } 23972 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_lt_16)23973 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_lt_16) { 23974 TEST_REQUIRES_ARM_NEON; 23975 for (size_t k = 1; k < 16; k++) { 23976 GemmMicrokernelTester() 23977 .mr(4) 23978 .nr(16) 23979 .kr(2) 23980 .sr(1) 23981 .m(4) 23982 .n(16) 23983 .k(k) 23984 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 23985 } 23986 } 23987 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_lt_16_subtile)23988 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_lt_16_subtile) { 23989 TEST_REQUIRES_ARM_NEON; 23990 for (size_t k = 1; k < 16; k++) { 23991 for (uint32_t n = 1; n <= 16; n++) { 23992 for (uint32_t m = 1; m <= 4; m++) { 23993 GemmMicrokernelTester() 23994 .mr(4) 23995 .nr(16) 23996 .kr(2) 23997 .sr(1) 23998 .m(m) 23999 .n(n) 24000 .k(k) 24001 .iterations(1) 24002 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24003 } 24004 } 24005 } 24006 } 24007 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_gt_16)24008 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_gt_16) { 24009 TEST_REQUIRES_ARM_NEON; 24010 for (size_t k = 17; k < 32; k++) { 24011 GemmMicrokernelTester() 24012 .mr(4) 24013 .nr(16) 24014 .kr(2) 24015 .sr(1) 24016 .m(4) 24017 .n(16) 24018 .k(k) 24019 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24020 } 24021 } 24022 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_gt_16_subtile)24023 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_gt_16_subtile) { 24024 TEST_REQUIRES_ARM_NEON; 24025 for (size_t k = 17; k < 32; k++) { 24026 for (uint32_t n = 1; n <= 16; n++) { 24027 for (uint32_t m = 1; m <= 4; m++) { 24028 GemmMicrokernelTester() 24029 .mr(4) 24030 .nr(16) 24031 .kr(2) 24032 .sr(1) 24033 .m(m) 24034 .n(n) 24035 .k(k) 24036 .iterations(1) 24037 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24038 } 24039 } 24040 } 24041 } 24042 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_div_16)24043 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_div_16) { 24044 TEST_REQUIRES_ARM_NEON; 24045 for (size_t k = 32; k <= 160; k += 16) { 24046 GemmMicrokernelTester() 24047 .mr(4) 24048 .nr(16) 24049 .kr(2) 24050 .sr(1) 24051 .m(4) 24052 .n(16) 24053 .k(k) 24054 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24055 } 24056 } 24057 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,k_div_16_subtile)24058 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, k_div_16_subtile) { 24059 TEST_REQUIRES_ARM_NEON; 24060 for (size_t k = 32; k <= 160; k += 16) { 24061 for (uint32_t n = 1; n <= 16; n++) { 24062 for (uint32_t m = 1; m <= 4; m++) { 24063 GemmMicrokernelTester() 24064 .mr(4) 24065 .nr(16) 24066 .kr(2) 24067 .sr(1) 24068 .m(m) 24069 .n(n) 24070 .k(k) 24071 .iterations(1) 24072 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24073 } 24074 } 24075 } 24076 } 24077 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,n_gt_16)24078 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_gt_16) { 24079 TEST_REQUIRES_ARM_NEON; 24080 for (uint32_t n = 17; n < 32; n++) { 24081 for (size_t k = 1; k <= 80; k += 17) { 24082 GemmMicrokernelTester() 24083 .mr(4) 24084 .nr(16) 24085 .kr(2) 24086 .sr(1) 24087 .m(4) 24088 .n(n) 24089 .k(k) 24090 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24091 } 24092 } 24093 } 24094 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,n_gt_16_strided_cn)24095 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 24096 TEST_REQUIRES_ARM_NEON; 24097 for (uint32_t n = 17; n < 32; n++) { 24098 for (size_t k = 1; k <= 80; k += 17) { 24099 GemmMicrokernelTester() 24100 .mr(4) 24101 .nr(16) 24102 .kr(2) 24103 .sr(1) 24104 .m(4) 24105 .n(n) 24106 .k(k) 24107 .cn_stride(19) 24108 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24109 } 24110 } 24111 } 24112 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,n_gt_16_subtile)24113 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_gt_16_subtile) { 24114 TEST_REQUIRES_ARM_NEON; 24115 for (uint32_t n = 17; n < 32; n++) { 24116 for (size_t k = 1; k <= 80; k += 17) { 24117 for (uint32_t m = 1; m <= 4; m++) { 24118 GemmMicrokernelTester() 24119 .mr(4) 24120 .nr(16) 24121 .kr(2) 24122 .sr(1) 24123 .m(m) 24124 .n(n) 24125 .k(k) 24126 .iterations(1) 24127 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24128 } 24129 } 24130 } 24131 } 24132 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,n_div_16)24133 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_div_16) { 24134 TEST_REQUIRES_ARM_NEON; 24135 for (uint32_t n = 32; n <= 48; n += 16) { 24136 for (size_t k = 1; k <= 80; k += 17) { 24137 GemmMicrokernelTester() 24138 .mr(4) 24139 .nr(16) 24140 .kr(2) 24141 .sr(1) 24142 .m(4) 24143 .n(n) 24144 .k(k) 24145 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24146 } 24147 } 24148 } 24149 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,n_div_16_strided_cn)24150 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_div_16_strided_cn) { 24151 TEST_REQUIRES_ARM_NEON; 24152 for (uint32_t n = 32; n <= 48; n += 16) { 24153 for (size_t k = 1; k <= 80; k += 17) { 24154 GemmMicrokernelTester() 24155 .mr(4) 24156 .nr(16) 24157 .kr(2) 24158 .sr(1) 24159 .m(4) 24160 .n(n) 24161 .k(k) 24162 .cn_stride(19) 24163 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24164 } 24165 } 24166 } 24167 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,n_div_16_subtile)24168 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_div_16_subtile) { 24169 TEST_REQUIRES_ARM_NEON; 24170 for (uint32_t n = 32; n <= 48; n += 16) { 24171 for (size_t k = 1; k <= 80; k += 17) { 24172 for (uint32_t m = 1; m <= 4; m++) { 24173 GemmMicrokernelTester() 24174 .mr(4) 24175 .nr(16) 24176 .kr(2) 24177 .sr(1) 24178 .m(m) 24179 .n(n) 24180 .k(k) 24181 .iterations(1) 24182 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24183 } 24184 } 24185 } 24186 } 24187 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,small_kernel)24188 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, small_kernel) { 24189 TEST_REQUIRES_ARM_NEON; 24190 for (size_t k = 1; k <= 80; k += 17) { 24191 GemmMicrokernelTester() 24192 .mr(4) 24193 .nr(16) 24194 .kr(2) 24195 .sr(1) 24196 .m(4) 24197 .n(16) 24198 .k(k) 24199 .ks(3) 24200 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24201 } 24202 } 24203 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,small_kernel_subtile)24204 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, small_kernel_subtile) { 24205 TEST_REQUIRES_ARM_NEON; 24206 for (size_t k = 1; k <= 80; k += 17) { 24207 for (uint32_t n = 1; n <= 16; n++) { 24208 for (uint32_t m = 1; m <= 4; m++) { 24209 GemmMicrokernelTester() 24210 .mr(4) 24211 .nr(16) 24212 .kr(2) 24213 .sr(1) 24214 .m(m) 24215 .n(n) 24216 .k(k) 24217 .ks(3) 24218 .iterations(1) 24219 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24220 } 24221 } 24222 } 24223 } 24224 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,n_gt_16_small_kernel)24225 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_gt_16_small_kernel) { 24226 TEST_REQUIRES_ARM_NEON; 24227 for (uint32_t n = 17; n < 32; n++) { 24228 for (size_t k = 1; k <= 80; k += 17) { 24229 GemmMicrokernelTester() 24230 .mr(4) 24231 .nr(16) 24232 .kr(2) 24233 .sr(1) 24234 .m(4) 24235 .n(n) 24236 .k(k) 24237 .ks(3) 24238 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24239 } 24240 } 24241 } 24242 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,n_div_16_small_kernel)24243 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, n_div_16_small_kernel) { 24244 TEST_REQUIRES_ARM_NEON; 24245 for (uint32_t n = 32; n <= 48; n += 16) { 24246 for (size_t k = 1; k <= 80; k += 17) { 24247 GemmMicrokernelTester() 24248 .mr(4) 24249 .nr(16) 24250 .kr(2) 24251 .sr(1) 24252 .m(4) 24253 .n(n) 24254 .k(k) 24255 .ks(3) 24256 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24257 } 24258 } 24259 } 24260 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,strided_cm_subtile)24261 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, strided_cm_subtile) { 24262 TEST_REQUIRES_ARM_NEON; 24263 for (size_t k = 1; k <= 80; k += 17) { 24264 for (uint32_t n = 1; n <= 16; n++) { 24265 for (uint32_t m = 1; m <= 4; m++) { 24266 GemmMicrokernelTester() 24267 .mr(4) 24268 .nr(16) 24269 .kr(2) 24270 .sr(1) 24271 .m(m) 24272 .n(n) 24273 .k(k) 24274 .cm_stride(19) 24275 .iterations(1) 24276 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24277 } 24278 } 24279 } 24280 } 24281 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,a_offset)24282 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, a_offset) { 24283 TEST_REQUIRES_ARM_NEON; 24284 for (size_t k = 1; k <= 80; k += 17) { 24285 GemmMicrokernelTester() 24286 .mr(4) 24287 .nr(16) 24288 .kr(2) 24289 .sr(1) 24290 .m(4) 24291 .n(16) 24292 .k(k) 24293 .ks(3) 24294 .a_offset(331) 24295 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24296 } 24297 } 24298 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,zero)24299 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, zero) { 24300 TEST_REQUIRES_ARM_NEON; 24301 for (size_t k = 1; k <= 80; k += 17) { 24302 for (uint32_t mz = 0; mz < 4; mz++) { 24303 GemmMicrokernelTester() 24304 .mr(4) 24305 .nr(16) 24306 .kr(2) 24307 .sr(1) 24308 .m(4) 24309 .n(16) 24310 .k(k) 24311 .ks(3) 24312 .a_offset(331) 24313 .zero_index(mz) 24314 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24315 } 24316 } 24317 } 24318 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,qmin)24319 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, qmin) { 24320 TEST_REQUIRES_ARM_NEON; 24321 GemmMicrokernelTester() 24322 .mr(4) 24323 .nr(16) 24324 .kr(2) 24325 .sr(1) 24326 .m(4) 24327 .n(16) 24328 .k(16) 24329 .qmin(128) 24330 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24331 } 24332 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,qmax)24333 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, qmax) { 24334 TEST_REQUIRES_ARM_NEON; 24335 GemmMicrokernelTester() 24336 .mr(4) 24337 .nr(16) 24338 .kr(2) 24339 .sr(1) 24340 .m(4) 24341 .n(16) 24342 .k(16) 24343 .qmax(128) 24344 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24345 } 24346 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R,strided_cm)24347 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD2R, strided_cm) { 24348 TEST_REQUIRES_ARM_NEON; 24349 GemmMicrokernelTester() 24350 .mr(4) 24351 .nr(16) 24352 .kr(2) 24353 .sr(1) 24354 .m(4) 24355 .n(16) 24356 .k(16) 24357 .cm_stride(19) 24358 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24359 } 24360 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 24361 24362 24363 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8)24364 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8) { 24365 TEST_REQUIRES_ARM_NEON; 24366 GemmMicrokernelTester() 24367 .mr(4) 24368 .nr(16) 24369 .kr(2) 24370 .sr(1) 24371 .m(4) 24372 .n(16) 24373 .k(8) 24374 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24375 } 24376 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cn)24377 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cn) { 24378 TEST_REQUIRES_ARM_NEON; 24379 GemmMicrokernelTester() 24380 .mr(4) 24381 .nr(16) 24382 .kr(2) 24383 .sr(1) 24384 .m(4) 24385 .n(16) 24386 .k(8) 24387 .cn_stride(19) 24388 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24389 } 24390 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile)24391 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile) { 24392 TEST_REQUIRES_ARM_NEON; 24393 for (uint32_t n = 1; n <= 16; n++) { 24394 for (uint32_t m = 1; m <= 4; m++) { 24395 GemmMicrokernelTester() 24396 .mr(4) 24397 .nr(16) 24398 .kr(2) 24399 .sr(1) 24400 .m(m) 24401 .n(n) 24402 .k(8) 24403 .iterations(1) 24404 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24405 } 24406 } 24407 } 24408 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile_m)24409 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) { 24410 TEST_REQUIRES_ARM_NEON; 24411 for (uint32_t m = 1; m <= 4; m++) { 24412 GemmMicrokernelTester() 24413 .mr(4) 24414 .nr(16) 24415 .kr(2) 24416 .sr(1) 24417 .m(m) 24418 .n(16) 24419 .k(8) 24420 .iterations(1) 24421 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24422 } 24423 } 24424 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_eq_8_subtile_n)24425 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) { 24426 TEST_REQUIRES_ARM_NEON; 24427 for (uint32_t n = 1; n <= 16; n++) { 24428 GemmMicrokernelTester() 24429 .mr(4) 24430 .nr(16) 24431 .kr(2) 24432 .sr(1) 24433 .m(4) 24434 .n(n) 24435 .k(8) 24436 .iterations(1) 24437 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24438 } 24439 } 24440 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_lt_8)24441 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8) { 24442 TEST_REQUIRES_ARM_NEON; 24443 for (size_t k = 1; k < 8; k++) { 24444 GemmMicrokernelTester() 24445 .mr(4) 24446 .nr(16) 24447 .kr(2) 24448 .sr(1) 24449 .m(4) 24450 .n(16) 24451 .k(k) 24452 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24453 } 24454 } 24455 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_lt_8_subtile)24456 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_lt_8_subtile) { 24457 TEST_REQUIRES_ARM_NEON; 24458 for (size_t k = 1; k < 8; k++) { 24459 for (uint32_t n = 1; n <= 16; n++) { 24460 for (uint32_t m = 1; m <= 4; m++) { 24461 GemmMicrokernelTester() 24462 .mr(4) 24463 .nr(16) 24464 .kr(2) 24465 .sr(1) 24466 .m(m) 24467 .n(n) 24468 .k(k) 24469 .iterations(1) 24470 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24471 } 24472 } 24473 } 24474 } 24475 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_gt_8)24476 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8) { 24477 TEST_REQUIRES_ARM_NEON; 24478 for (size_t k = 9; k < 16; k++) { 24479 GemmMicrokernelTester() 24480 .mr(4) 24481 .nr(16) 24482 .kr(2) 24483 .sr(1) 24484 .m(4) 24485 .n(16) 24486 .k(k) 24487 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24488 } 24489 } 24490 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_gt_8_subtile)24491 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_gt_8_subtile) { 24492 TEST_REQUIRES_ARM_NEON; 24493 for (size_t k = 9; k < 16; k++) { 24494 for (uint32_t n = 1; n <= 16; n++) { 24495 for (uint32_t m = 1; m <= 4; m++) { 24496 GemmMicrokernelTester() 24497 .mr(4) 24498 .nr(16) 24499 .kr(2) 24500 .sr(1) 24501 .m(m) 24502 .n(n) 24503 .k(k) 24504 .iterations(1) 24505 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24506 } 24507 } 24508 } 24509 } 24510 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_div_8)24511 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8) { 24512 TEST_REQUIRES_ARM_NEON; 24513 for (size_t k = 16; k <= 80; k += 8) { 24514 GemmMicrokernelTester() 24515 .mr(4) 24516 .nr(16) 24517 .kr(2) 24518 .sr(1) 24519 .m(4) 24520 .n(16) 24521 .k(k) 24522 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24523 } 24524 } 24525 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,k_div_8_subtile)24526 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, k_div_8_subtile) { 24527 TEST_REQUIRES_ARM_NEON; 24528 for (size_t k = 16; k <= 80; k += 8) { 24529 for (uint32_t n = 1; n <= 16; n++) { 24530 for (uint32_t m = 1; m <= 4; m++) { 24531 GemmMicrokernelTester() 24532 .mr(4) 24533 .nr(16) 24534 .kr(2) 24535 .sr(1) 24536 .m(m) 24537 .n(n) 24538 .k(k) 24539 .iterations(1) 24540 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24541 } 24542 } 24543 } 24544 } 24545 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16)24546 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16) { 24547 TEST_REQUIRES_ARM_NEON; 24548 for (uint32_t n = 17; n < 32; n++) { 24549 for (size_t k = 1; k <= 40; k += 9) { 24550 GemmMicrokernelTester() 24551 .mr(4) 24552 .nr(16) 24553 .kr(2) 24554 .sr(1) 24555 .m(4) 24556 .n(n) 24557 .k(k) 24558 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24559 } 24560 } 24561 } 24562 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_strided_cn)24563 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) { 24564 TEST_REQUIRES_ARM_NEON; 24565 for (uint32_t n = 17; n < 32; n++) { 24566 for (size_t k = 1; k <= 40; k += 9) { 24567 GemmMicrokernelTester() 24568 .mr(4) 24569 .nr(16) 24570 .kr(2) 24571 .sr(1) 24572 .m(4) 24573 .n(n) 24574 .k(k) 24575 .cn_stride(19) 24576 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24577 } 24578 } 24579 } 24580 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_subtile)24581 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_subtile) { 24582 TEST_REQUIRES_ARM_NEON; 24583 for (uint32_t n = 17; n < 32; n++) { 24584 for (size_t k = 1; k <= 40; k += 9) { 24585 for (uint32_t m = 1; m <= 4; m++) { 24586 GemmMicrokernelTester() 24587 .mr(4) 24588 .nr(16) 24589 .kr(2) 24590 .sr(1) 24591 .m(m) 24592 .n(n) 24593 .k(k) 24594 .iterations(1) 24595 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24596 } 24597 } 24598 } 24599 } 24600 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16)24601 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16) { 24602 TEST_REQUIRES_ARM_NEON; 24603 for (uint32_t n = 32; n <= 48; n += 16) { 24604 for (size_t k = 1; k <= 40; k += 9) { 24605 GemmMicrokernelTester() 24606 .mr(4) 24607 .nr(16) 24608 .kr(2) 24609 .sr(1) 24610 .m(4) 24611 .n(n) 24612 .k(k) 24613 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24614 } 24615 } 24616 } 24617 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_strided_cn)24618 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) { 24619 TEST_REQUIRES_ARM_NEON; 24620 for (uint32_t n = 32; n <= 48; n += 16) { 24621 for (size_t k = 1; k <= 40; k += 9) { 24622 GemmMicrokernelTester() 24623 .mr(4) 24624 .nr(16) 24625 .kr(2) 24626 .sr(1) 24627 .m(4) 24628 .n(n) 24629 .k(k) 24630 .cn_stride(19) 24631 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24632 } 24633 } 24634 } 24635 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_subtile)24636 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_subtile) { 24637 TEST_REQUIRES_ARM_NEON; 24638 for (uint32_t n = 32; n <= 48; n += 16) { 24639 for (size_t k = 1; k <= 40; k += 9) { 24640 for (uint32_t m = 1; m <= 4; m++) { 24641 GemmMicrokernelTester() 24642 .mr(4) 24643 .nr(16) 24644 .kr(2) 24645 .sr(1) 24646 .m(m) 24647 .n(n) 24648 .k(k) 24649 .iterations(1) 24650 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24651 } 24652 } 24653 } 24654 } 24655 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,small_kernel)24656 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, small_kernel) { 24657 TEST_REQUIRES_ARM_NEON; 24658 for (size_t k = 1; k <= 40; k += 9) { 24659 GemmMicrokernelTester() 24660 .mr(4) 24661 .nr(16) 24662 .kr(2) 24663 .sr(1) 24664 .m(4) 24665 .n(16) 24666 .k(k) 24667 .ks(3) 24668 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24669 } 24670 } 24671 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,small_kernel_subtile)24672 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, small_kernel_subtile) { 24673 TEST_REQUIRES_ARM_NEON; 24674 for (size_t k = 1; k <= 40; k += 9) { 24675 for (uint32_t n = 1; n <= 16; n++) { 24676 for (uint32_t m = 1; m <= 4; m++) { 24677 GemmMicrokernelTester() 24678 .mr(4) 24679 .nr(16) 24680 .kr(2) 24681 .sr(1) 24682 .m(m) 24683 .n(n) 24684 .k(k) 24685 .ks(3) 24686 .iterations(1) 24687 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24688 } 24689 } 24690 } 24691 } 24692 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_gt_16_small_kernel)24693 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_gt_16_small_kernel) { 24694 TEST_REQUIRES_ARM_NEON; 24695 for (uint32_t n = 17; n < 32; n++) { 24696 for (size_t k = 1; k <= 40; k += 9) { 24697 GemmMicrokernelTester() 24698 .mr(4) 24699 .nr(16) 24700 .kr(2) 24701 .sr(1) 24702 .m(4) 24703 .n(n) 24704 .k(k) 24705 .ks(3) 24706 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24707 } 24708 } 24709 } 24710 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,n_div_16_small_kernel)24711 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, n_div_16_small_kernel) { 24712 TEST_REQUIRES_ARM_NEON; 24713 for (uint32_t n = 32; n <= 48; n += 16) { 24714 for (size_t k = 1; k <= 40; k += 9) { 24715 GemmMicrokernelTester() 24716 .mr(4) 24717 .nr(16) 24718 .kr(2) 24719 .sr(1) 24720 .m(4) 24721 .n(n) 24722 .k(k) 24723 .ks(3) 24724 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24725 } 24726 } 24727 } 24728 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cm_subtile)24729 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cm_subtile) { 24730 TEST_REQUIRES_ARM_NEON; 24731 for (size_t k = 1; k <= 40; k += 9) { 24732 for (uint32_t n = 1; n <= 16; n++) { 24733 for (uint32_t m = 1; m <= 4; m++) { 24734 GemmMicrokernelTester() 24735 .mr(4) 24736 .nr(16) 24737 .kr(2) 24738 .sr(1) 24739 .m(m) 24740 .n(n) 24741 .k(k) 24742 .cm_stride(19) 24743 .iterations(1) 24744 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24745 } 24746 } 24747 } 24748 } 24749 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,a_offset)24750 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, a_offset) { 24751 TEST_REQUIRES_ARM_NEON; 24752 for (size_t k = 1; k <= 40; k += 9) { 24753 GemmMicrokernelTester() 24754 .mr(4) 24755 .nr(16) 24756 .kr(2) 24757 .sr(1) 24758 .m(4) 24759 .n(16) 24760 .k(k) 24761 .ks(3) 24762 .a_offset(163) 24763 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24764 } 24765 } 24766 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,zero)24767 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, zero) { 24768 TEST_REQUIRES_ARM_NEON; 24769 for (size_t k = 1; k <= 40; k += 9) { 24770 for (uint32_t mz = 0; mz < 4; mz++) { 24771 GemmMicrokernelTester() 24772 .mr(4) 24773 .nr(16) 24774 .kr(2) 24775 .sr(1) 24776 .m(4) 24777 .n(16) 24778 .k(k) 24779 .ks(3) 24780 .a_offset(163) 24781 .zero_index(mz) 24782 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24783 } 24784 } 24785 } 24786 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,qmin)24787 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, qmin) { 24788 TEST_REQUIRES_ARM_NEON; 24789 GemmMicrokernelTester() 24790 .mr(4) 24791 .nr(16) 24792 .kr(2) 24793 .sr(1) 24794 .m(4) 24795 .n(16) 24796 .k(8) 24797 .qmin(128) 24798 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24799 } 24800 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,qmax)24801 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, qmax) { 24802 TEST_REQUIRES_ARM_NEON; 24803 GemmMicrokernelTester() 24804 .mr(4) 24805 .nr(16) 24806 .kr(2) 24807 .sr(1) 24808 .m(4) 24809 .n(16) 24810 .k(8) 24811 .qmax(128) 24812 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24813 } 24814 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R,strided_cm)24815 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD2R, strided_cm) { 24816 TEST_REQUIRES_ARM_NEON; 24817 GemmMicrokernelTester() 24818 .mr(4) 24819 .nr(16) 24820 .kr(2) 24821 .sr(1) 24822 .m(4) 24823 .n(16) 24824 .k(8) 24825 .cm_stride(19) 24826 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24827 } 24828 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 24829 24830 24831 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_eq_16)24832 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_eq_16) { 24833 TEST_REQUIRES_ARM_NEON; 24834 GemmMicrokernelTester() 24835 .mr(4) 24836 .nr(16) 24837 .kr(4) 24838 .sr(1) 24839 .m(4) 24840 .n(16) 24841 .k(16) 24842 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24843 } 24844 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,strided_cn)24845 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, strided_cn) { 24846 TEST_REQUIRES_ARM_NEON; 24847 GemmMicrokernelTester() 24848 .mr(4) 24849 .nr(16) 24850 .kr(4) 24851 .sr(1) 24852 .m(4) 24853 .n(16) 24854 .k(16) 24855 .cn_stride(19) 24856 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24857 } 24858 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_eq_16_subtile)24859 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_eq_16_subtile) { 24860 TEST_REQUIRES_ARM_NEON; 24861 for (uint32_t n = 1; n <= 16; n++) { 24862 for (uint32_t m = 1; m <= 4; m++) { 24863 GemmMicrokernelTester() 24864 .mr(4) 24865 .nr(16) 24866 .kr(4) 24867 .sr(1) 24868 .m(m) 24869 .n(n) 24870 .k(16) 24871 .iterations(1) 24872 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24873 } 24874 } 24875 } 24876 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_eq_16_subtile_m)24877 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) { 24878 TEST_REQUIRES_ARM_NEON; 24879 for (uint32_t m = 1; m <= 4; m++) { 24880 GemmMicrokernelTester() 24881 .mr(4) 24882 .nr(16) 24883 .kr(4) 24884 .sr(1) 24885 .m(m) 24886 .n(16) 24887 .k(16) 24888 .iterations(1) 24889 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24890 } 24891 } 24892 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_eq_16_subtile_n)24893 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) { 24894 TEST_REQUIRES_ARM_NEON; 24895 for (uint32_t n = 1; n <= 16; n++) { 24896 GemmMicrokernelTester() 24897 .mr(4) 24898 .nr(16) 24899 .kr(4) 24900 .sr(1) 24901 .m(4) 24902 .n(n) 24903 .k(16) 24904 .iterations(1) 24905 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24906 } 24907 } 24908 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_lt_16)24909 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_lt_16) { 24910 TEST_REQUIRES_ARM_NEON; 24911 for (size_t k = 1; k < 16; k++) { 24912 GemmMicrokernelTester() 24913 .mr(4) 24914 .nr(16) 24915 .kr(4) 24916 .sr(1) 24917 .m(4) 24918 .n(16) 24919 .k(k) 24920 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24921 } 24922 } 24923 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_lt_16_subtile)24924 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_lt_16_subtile) { 24925 TEST_REQUIRES_ARM_NEON; 24926 for (size_t k = 1; k < 16; k++) { 24927 for (uint32_t n = 1; n <= 16; n++) { 24928 for (uint32_t m = 1; m <= 4; m++) { 24929 GemmMicrokernelTester() 24930 .mr(4) 24931 .nr(16) 24932 .kr(4) 24933 .sr(1) 24934 .m(m) 24935 .n(n) 24936 .k(k) 24937 .iterations(1) 24938 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24939 } 24940 } 24941 } 24942 } 24943 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_gt_16)24944 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_gt_16) { 24945 TEST_REQUIRES_ARM_NEON; 24946 for (size_t k = 17; k < 32; k++) { 24947 GemmMicrokernelTester() 24948 .mr(4) 24949 .nr(16) 24950 .kr(4) 24951 .sr(1) 24952 .m(4) 24953 .n(16) 24954 .k(k) 24955 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24956 } 24957 } 24958 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_gt_16_subtile)24959 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_gt_16_subtile) { 24960 TEST_REQUIRES_ARM_NEON; 24961 for (size_t k = 17; k < 32; k++) { 24962 for (uint32_t n = 1; n <= 16; n++) { 24963 for (uint32_t m = 1; m <= 4; m++) { 24964 GemmMicrokernelTester() 24965 .mr(4) 24966 .nr(16) 24967 .kr(4) 24968 .sr(1) 24969 .m(m) 24970 .n(n) 24971 .k(k) 24972 .iterations(1) 24973 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24974 } 24975 } 24976 } 24977 } 24978 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_div_16)24979 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_div_16) { 24980 TEST_REQUIRES_ARM_NEON; 24981 for (size_t k = 32; k <= 160; k += 16) { 24982 GemmMicrokernelTester() 24983 .mr(4) 24984 .nr(16) 24985 .kr(4) 24986 .sr(1) 24987 .m(4) 24988 .n(16) 24989 .k(k) 24990 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 24991 } 24992 } 24993 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,k_div_16_subtile)24994 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, k_div_16_subtile) { 24995 TEST_REQUIRES_ARM_NEON; 24996 for (size_t k = 32; k <= 160; k += 16) { 24997 for (uint32_t n = 1; n <= 16; n++) { 24998 for (uint32_t m = 1; m <= 4; m++) { 24999 GemmMicrokernelTester() 25000 .mr(4) 25001 .nr(16) 25002 .kr(4) 25003 .sr(1) 25004 .m(m) 25005 .n(n) 25006 .k(k) 25007 .iterations(1) 25008 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25009 } 25010 } 25011 } 25012 } 25013 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,n_gt_16)25014 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_gt_16) { 25015 TEST_REQUIRES_ARM_NEON; 25016 for (uint32_t n = 17; n < 32; n++) { 25017 for (size_t k = 1; k <= 80; k += 17) { 25018 GemmMicrokernelTester() 25019 .mr(4) 25020 .nr(16) 25021 .kr(4) 25022 .sr(1) 25023 .m(4) 25024 .n(n) 25025 .k(k) 25026 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25027 } 25028 } 25029 } 25030 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,n_gt_16_strided_cn)25031 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) { 25032 TEST_REQUIRES_ARM_NEON; 25033 for (uint32_t n = 17; n < 32; n++) { 25034 for (size_t k = 1; k <= 80; k += 17) { 25035 GemmMicrokernelTester() 25036 .mr(4) 25037 .nr(16) 25038 .kr(4) 25039 .sr(1) 25040 .m(4) 25041 .n(n) 25042 .k(k) 25043 .cn_stride(19) 25044 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25045 } 25046 } 25047 } 25048 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,n_gt_16_subtile)25049 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_gt_16_subtile) { 25050 TEST_REQUIRES_ARM_NEON; 25051 for (uint32_t n = 17; n < 32; n++) { 25052 for (size_t k = 1; k <= 80; k += 17) { 25053 for (uint32_t m = 1; m <= 4; m++) { 25054 GemmMicrokernelTester() 25055 .mr(4) 25056 .nr(16) 25057 .kr(4) 25058 .sr(1) 25059 .m(m) 25060 .n(n) 25061 .k(k) 25062 .iterations(1) 25063 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25064 } 25065 } 25066 } 25067 } 25068 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,n_div_16)25069 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_div_16) { 25070 TEST_REQUIRES_ARM_NEON; 25071 for (uint32_t n = 32; n <= 48; n += 16) { 25072 for (size_t k = 1; k <= 80; k += 17) { 25073 GemmMicrokernelTester() 25074 .mr(4) 25075 .nr(16) 25076 .kr(4) 25077 .sr(1) 25078 .m(4) 25079 .n(n) 25080 .k(k) 25081 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25082 } 25083 } 25084 } 25085 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,n_div_16_strided_cn)25086 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) { 25087 TEST_REQUIRES_ARM_NEON; 25088 for (uint32_t n = 32; n <= 48; n += 16) { 25089 for (size_t k = 1; k <= 80; k += 17) { 25090 GemmMicrokernelTester() 25091 .mr(4) 25092 .nr(16) 25093 .kr(4) 25094 .sr(1) 25095 .m(4) 25096 .n(n) 25097 .k(k) 25098 .cn_stride(19) 25099 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25100 } 25101 } 25102 } 25103 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,n_div_16_subtile)25104 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_div_16_subtile) { 25105 TEST_REQUIRES_ARM_NEON; 25106 for (uint32_t n = 32; n <= 48; n += 16) { 25107 for (size_t k = 1; k <= 80; k += 17) { 25108 for (uint32_t m = 1; m <= 4; m++) { 25109 GemmMicrokernelTester() 25110 .mr(4) 25111 .nr(16) 25112 .kr(4) 25113 .sr(1) 25114 .m(m) 25115 .n(n) 25116 .k(k) 25117 .iterations(1) 25118 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25119 } 25120 } 25121 } 25122 } 25123 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,small_kernel)25124 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, small_kernel) { 25125 TEST_REQUIRES_ARM_NEON; 25126 for (size_t k = 1; k <= 80; k += 17) { 25127 GemmMicrokernelTester() 25128 .mr(4) 25129 .nr(16) 25130 .kr(4) 25131 .sr(1) 25132 .m(4) 25133 .n(16) 25134 .k(k) 25135 .ks(3) 25136 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25137 } 25138 } 25139 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,small_kernel_subtile)25140 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, small_kernel_subtile) { 25141 TEST_REQUIRES_ARM_NEON; 25142 for (size_t k = 1; k <= 80; k += 17) { 25143 for (uint32_t n = 1; n <= 16; n++) { 25144 for (uint32_t m = 1; m <= 4; m++) { 25145 GemmMicrokernelTester() 25146 .mr(4) 25147 .nr(16) 25148 .kr(4) 25149 .sr(1) 25150 .m(m) 25151 .n(n) 25152 .k(k) 25153 .ks(3) 25154 .iterations(1) 25155 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25156 } 25157 } 25158 } 25159 } 25160 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,n_gt_16_small_kernel)25161 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) { 25162 TEST_REQUIRES_ARM_NEON; 25163 for (uint32_t n = 17; n < 32; n++) { 25164 for (size_t k = 1; k <= 80; k += 17) { 25165 GemmMicrokernelTester() 25166 .mr(4) 25167 .nr(16) 25168 .kr(4) 25169 .sr(1) 25170 .m(4) 25171 .n(n) 25172 .k(k) 25173 .ks(3) 25174 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25175 } 25176 } 25177 } 25178 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,n_div_16_small_kernel)25179 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) { 25180 TEST_REQUIRES_ARM_NEON; 25181 for (uint32_t n = 32; n <= 48; n += 16) { 25182 for (size_t k = 1; k <= 80; k += 17) { 25183 GemmMicrokernelTester() 25184 .mr(4) 25185 .nr(16) 25186 .kr(4) 25187 .sr(1) 25188 .m(4) 25189 .n(n) 25190 .k(k) 25191 .ks(3) 25192 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25193 } 25194 } 25195 } 25196 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,strided_cm_subtile)25197 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, strided_cm_subtile) { 25198 TEST_REQUIRES_ARM_NEON; 25199 for (size_t k = 1; k <= 80; k += 17) { 25200 for (uint32_t n = 1; n <= 16; n++) { 25201 for (uint32_t m = 1; m <= 4; m++) { 25202 GemmMicrokernelTester() 25203 .mr(4) 25204 .nr(16) 25205 .kr(4) 25206 .sr(1) 25207 .m(m) 25208 .n(n) 25209 .k(k) 25210 .cm_stride(19) 25211 .iterations(1) 25212 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25213 } 25214 } 25215 } 25216 } 25217 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,a_offset)25218 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, a_offset) { 25219 TEST_REQUIRES_ARM_NEON; 25220 for (size_t k = 1; k <= 80; k += 17) { 25221 GemmMicrokernelTester() 25222 .mr(4) 25223 .nr(16) 25224 .kr(4) 25225 .sr(1) 25226 .m(4) 25227 .n(16) 25228 .k(k) 25229 .ks(3) 25230 .a_offset(331) 25231 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25232 } 25233 } 25234 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,zero)25235 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, zero) { 25236 TEST_REQUIRES_ARM_NEON; 25237 for (size_t k = 1; k <= 80; k += 17) { 25238 for (uint32_t mz = 0; mz < 4; mz++) { 25239 GemmMicrokernelTester() 25240 .mr(4) 25241 .nr(16) 25242 .kr(4) 25243 .sr(1) 25244 .m(4) 25245 .n(16) 25246 .k(k) 25247 .ks(3) 25248 .a_offset(331) 25249 .zero_index(mz) 25250 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25251 } 25252 } 25253 } 25254 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,qmin)25255 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, qmin) { 25256 TEST_REQUIRES_ARM_NEON; 25257 GemmMicrokernelTester() 25258 .mr(4) 25259 .nr(16) 25260 .kr(4) 25261 .sr(1) 25262 .m(4) 25263 .n(16) 25264 .k(16) 25265 .qmin(128) 25266 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25267 } 25268 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,qmax)25269 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, qmax) { 25270 TEST_REQUIRES_ARM_NEON; 25271 GemmMicrokernelTester() 25272 .mr(4) 25273 .nr(16) 25274 .kr(4) 25275 .sr(1) 25276 .m(4) 25277 .n(16) 25278 .k(16) 25279 .qmax(128) 25280 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25281 } 25282 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP,strided_cm)25283 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_DUP, strided_cm) { 25284 TEST_REQUIRES_ARM_NEON; 25285 GemmMicrokernelTester() 25286 .mr(4) 25287 .nr(16) 25288 .kr(4) 25289 .sr(1) 25290 .m(4) 25291 .n(16) 25292 .k(16) 25293 .cm_stride(19) 25294 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25295 } 25296 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 25297 25298 25299 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_eq_16)25300 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_eq_16) { 25301 TEST_REQUIRES_ARM_NEON; 25302 GemmMicrokernelTester() 25303 .mr(4) 25304 .nr(16) 25305 .kr(4) 25306 .sr(1) 25307 .m(4) 25308 .n(16) 25309 .k(16) 25310 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25311 } 25312 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,strided_cn)25313 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, strided_cn) { 25314 TEST_REQUIRES_ARM_NEON; 25315 GemmMicrokernelTester() 25316 .mr(4) 25317 .nr(16) 25318 .kr(4) 25319 .sr(1) 25320 .m(4) 25321 .n(16) 25322 .k(16) 25323 .cn_stride(19) 25324 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25325 } 25326 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_eq_16_subtile)25327 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) { 25328 TEST_REQUIRES_ARM_NEON; 25329 for (uint32_t n = 1; n <= 16; n++) { 25330 for (uint32_t m = 1; m <= 4; m++) { 25331 GemmMicrokernelTester() 25332 .mr(4) 25333 .nr(16) 25334 .kr(4) 25335 .sr(1) 25336 .m(m) 25337 .n(n) 25338 .k(16) 25339 .iterations(1) 25340 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25341 } 25342 } 25343 } 25344 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_eq_16_subtile_m)25345 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) { 25346 TEST_REQUIRES_ARM_NEON; 25347 for (uint32_t m = 1; m <= 4; m++) { 25348 GemmMicrokernelTester() 25349 .mr(4) 25350 .nr(16) 25351 .kr(4) 25352 .sr(1) 25353 .m(m) 25354 .n(16) 25355 .k(16) 25356 .iterations(1) 25357 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25358 } 25359 } 25360 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_eq_16_subtile_n)25361 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) { 25362 TEST_REQUIRES_ARM_NEON; 25363 for (uint32_t n = 1; n <= 16; n++) { 25364 GemmMicrokernelTester() 25365 .mr(4) 25366 .nr(16) 25367 .kr(4) 25368 .sr(1) 25369 .m(4) 25370 .n(n) 25371 .k(16) 25372 .iterations(1) 25373 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25374 } 25375 } 25376 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_lt_16)25377 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_lt_16) { 25378 TEST_REQUIRES_ARM_NEON; 25379 for (size_t k = 1; k < 16; k++) { 25380 GemmMicrokernelTester() 25381 .mr(4) 25382 .nr(16) 25383 .kr(4) 25384 .sr(1) 25385 .m(4) 25386 .n(16) 25387 .k(k) 25388 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25389 } 25390 } 25391 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_lt_16_subtile)25392 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) { 25393 TEST_REQUIRES_ARM_NEON; 25394 for (size_t k = 1; k < 16; k++) { 25395 for (uint32_t n = 1; n <= 16; n++) { 25396 for (uint32_t m = 1; m <= 4; m++) { 25397 GemmMicrokernelTester() 25398 .mr(4) 25399 .nr(16) 25400 .kr(4) 25401 .sr(1) 25402 .m(m) 25403 .n(n) 25404 .k(k) 25405 .iterations(1) 25406 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25407 } 25408 } 25409 } 25410 } 25411 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_gt_16)25412 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_gt_16) { 25413 TEST_REQUIRES_ARM_NEON; 25414 for (size_t k = 17; k < 32; k++) { 25415 GemmMicrokernelTester() 25416 .mr(4) 25417 .nr(16) 25418 .kr(4) 25419 .sr(1) 25420 .m(4) 25421 .n(16) 25422 .k(k) 25423 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25424 } 25425 } 25426 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_gt_16_subtile)25427 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) { 25428 TEST_REQUIRES_ARM_NEON; 25429 for (size_t k = 17; k < 32; k++) { 25430 for (uint32_t n = 1; n <= 16; n++) { 25431 for (uint32_t m = 1; m <= 4; m++) { 25432 GemmMicrokernelTester() 25433 .mr(4) 25434 .nr(16) 25435 .kr(4) 25436 .sr(1) 25437 .m(m) 25438 .n(n) 25439 .k(k) 25440 .iterations(1) 25441 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25442 } 25443 } 25444 } 25445 } 25446 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_div_16)25447 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_div_16) { 25448 TEST_REQUIRES_ARM_NEON; 25449 for (size_t k = 32; k <= 160; k += 16) { 25450 GemmMicrokernelTester() 25451 .mr(4) 25452 .nr(16) 25453 .kr(4) 25454 .sr(1) 25455 .m(4) 25456 .n(16) 25457 .k(k) 25458 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25459 } 25460 } 25461 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,k_div_16_subtile)25462 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, k_div_16_subtile) { 25463 TEST_REQUIRES_ARM_NEON; 25464 for (size_t k = 32; k <= 160; k += 16) { 25465 for (uint32_t n = 1; n <= 16; n++) { 25466 for (uint32_t m = 1; m <= 4; m++) { 25467 GemmMicrokernelTester() 25468 .mr(4) 25469 .nr(16) 25470 .kr(4) 25471 .sr(1) 25472 .m(m) 25473 .n(n) 25474 .k(k) 25475 .iterations(1) 25476 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25477 } 25478 } 25479 } 25480 } 25481 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,n_gt_16)25482 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_gt_16) { 25483 TEST_REQUIRES_ARM_NEON; 25484 for (uint32_t n = 17; n < 32; n++) { 25485 for (size_t k = 1; k <= 80; k += 17) { 25486 GemmMicrokernelTester() 25487 .mr(4) 25488 .nr(16) 25489 .kr(4) 25490 .sr(1) 25491 .m(4) 25492 .n(n) 25493 .k(k) 25494 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25495 } 25496 } 25497 } 25498 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,n_gt_16_strided_cn)25499 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) { 25500 TEST_REQUIRES_ARM_NEON; 25501 for (uint32_t n = 17; n < 32; n++) { 25502 for (size_t k = 1; k <= 80; k += 17) { 25503 GemmMicrokernelTester() 25504 .mr(4) 25505 .nr(16) 25506 .kr(4) 25507 .sr(1) 25508 .m(4) 25509 .n(n) 25510 .k(k) 25511 .cn_stride(19) 25512 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25513 } 25514 } 25515 } 25516 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,n_gt_16_subtile)25517 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) { 25518 TEST_REQUIRES_ARM_NEON; 25519 for (uint32_t n = 17; n < 32; n++) { 25520 for (size_t k = 1; k <= 80; k += 17) { 25521 for (uint32_t m = 1; m <= 4; m++) { 25522 GemmMicrokernelTester() 25523 .mr(4) 25524 .nr(16) 25525 .kr(4) 25526 .sr(1) 25527 .m(m) 25528 .n(n) 25529 .k(k) 25530 .iterations(1) 25531 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25532 } 25533 } 25534 } 25535 } 25536 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,n_div_16)25537 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_div_16) { 25538 TEST_REQUIRES_ARM_NEON; 25539 for (uint32_t n = 32; n <= 48; n += 16) { 25540 for (size_t k = 1; k <= 80; k += 17) { 25541 GemmMicrokernelTester() 25542 .mr(4) 25543 .nr(16) 25544 .kr(4) 25545 .sr(1) 25546 .m(4) 25547 .n(n) 25548 .k(k) 25549 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25550 } 25551 } 25552 } 25553 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,n_div_16_strided_cn)25554 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) { 25555 TEST_REQUIRES_ARM_NEON; 25556 for (uint32_t n = 32; n <= 48; n += 16) { 25557 for (size_t k = 1; k <= 80; k += 17) { 25558 GemmMicrokernelTester() 25559 .mr(4) 25560 .nr(16) 25561 .kr(4) 25562 .sr(1) 25563 .m(4) 25564 .n(n) 25565 .k(k) 25566 .cn_stride(19) 25567 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25568 } 25569 } 25570 } 25571 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,n_div_16_subtile)25572 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_div_16_subtile) { 25573 TEST_REQUIRES_ARM_NEON; 25574 for (uint32_t n = 32; n <= 48; n += 16) { 25575 for (size_t k = 1; k <= 80; k += 17) { 25576 for (uint32_t m = 1; m <= 4; m++) { 25577 GemmMicrokernelTester() 25578 .mr(4) 25579 .nr(16) 25580 .kr(4) 25581 .sr(1) 25582 .m(m) 25583 .n(n) 25584 .k(k) 25585 .iterations(1) 25586 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25587 } 25588 } 25589 } 25590 } 25591 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,small_kernel)25592 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, small_kernel) { 25593 TEST_REQUIRES_ARM_NEON; 25594 for (size_t k = 1; k <= 80; k += 17) { 25595 GemmMicrokernelTester() 25596 .mr(4) 25597 .nr(16) 25598 .kr(4) 25599 .sr(1) 25600 .m(4) 25601 .n(16) 25602 .k(k) 25603 .ks(3) 25604 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25605 } 25606 } 25607 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,small_kernel_subtile)25608 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, small_kernel_subtile) { 25609 TEST_REQUIRES_ARM_NEON; 25610 for (size_t k = 1; k <= 80; k += 17) { 25611 for (uint32_t n = 1; n <= 16; n++) { 25612 for (uint32_t m = 1; m <= 4; m++) { 25613 GemmMicrokernelTester() 25614 .mr(4) 25615 .nr(16) 25616 .kr(4) 25617 .sr(1) 25618 .m(m) 25619 .n(n) 25620 .k(k) 25621 .ks(3) 25622 .iterations(1) 25623 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25624 } 25625 } 25626 } 25627 } 25628 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,n_gt_16_small_kernel)25629 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_gt_16_small_kernel) { 25630 TEST_REQUIRES_ARM_NEON; 25631 for (uint32_t n = 17; n < 32; n++) { 25632 for (size_t k = 1; k <= 80; k += 17) { 25633 GemmMicrokernelTester() 25634 .mr(4) 25635 .nr(16) 25636 .kr(4) 25637 .sr(1) 25638 .m(4) 25639 .n(n) 25640 .k(k) 25641 .ks(3) 25642 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25643 } 25644 } 25645 } 25646 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,n_div_16_small_kernel)25647 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, n_div_16_small_kernel) { 25648 TEST_REQUIRES_ARM_NEON; 25649 for (uint32_t n = 32; n <= 48; n += 16) { 25650 for (size_t k = 1; k <= 80; k += 17) { 25651 GemmMicrokernelTester() 25652 .mr(4) 25653 .nr(16) 25654 .kr(4) 25655 .sr(1) 25656 .m(4) 25657 .n(n) 25658 .k(k) 25659 .ks(3) 25660 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25661 } 25662 } 25663 } 25664 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,strided_cm_subtile)25665 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, strided_cm_subtile) { 25666 TEST_REQUIRES_ARM_NEON; 25667 for (size_t k = 1; k <= 80; k += 17) { 25668 for (uint32_t n = 1; n <= 16; n++) { 25669 for (uint32_t m = 1; m <= 4; m++) { 25670 GemmMicrokernelTester() 25671 .mr(4) 25672 .nr(16) 25673 .kr(4) 25674 .sr(1) 25675 .m(m) 25676 .n(n) 25677 .k(k) 25678 .cm_stride(19) 25679 .iterations(1) 25680 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25681 } 25682 } 25683 } 25684 } 25685 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,a_offset)25686 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, a_offset) { 25687 TEST_REQUIRES_ARM_NEON; 25688 for (size_t k = 1; k <= 80; k += 17) { 25689 GemmMicrokernelTester() 25690 .mr(4) 25691 .nr(16) 25692 .kr(4) 25693 .sr(1) 25694 .m(4) 25695 .n(16) 25696 .k(k) 25697 .ks(3) 25698 .a_offset(331) 25699 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25700 } 25701 } 25702 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,zero)25703 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, zero) { 25704 TEST_REQUIRES_ARM_NEON; 25705 for (size_t k = 1; k <= 80; k += 17) { 25706 for (uint32_t mz = 0; mz < 4; mz++) { 25707 GemmMicrokernelTester() 25708 .mr(4) 25709 .nr(16) 25710 .kr(4) 25711 .sr(1) 25712 .m(4) 25713 .n(16) 25714 .k(k) 25715 .ks(3) 25716 .a_offset(331) 25717 .zero_index(mz) 25718 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25719 } 25720 } 25721 } 25722 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,qmin)25723 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, qmin) { 25724 TEST_REQUIRES_ARM_NEON; 25725 GemmMicrokernelTester() 25726 .mr(4) 25727 .nr(16) 25728 .kr(4) 25729 .sr(1) 25730 .m(4) 25731 .n(16) 25732 .k(16) 25733 .qmin(128) 25734 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25735 } 25736 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,qmax)25737 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, qmax) { 25738 TEST_REQUIRES_ARM_NEON; 25739 GemmMicrokernelTester() 25740 .mr(4) 25741 .nr(16) 25742 .kr(4) 25743 .sr(1) 25744 .m(4) 25745 .n(16) 25746 .k(16) 25747 .qmax(128) 25748 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25749 } 25750 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R,strided_cm)25751 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MLAL_LD2R, strided_cm) { 25752 TEST_REQUIRES_ARM_NEON; 25753 GemmMicrokernelTester() 25754 .mr(4) 25755 .nr(16) 25756 .kr(4) 25757 .sr(1) 25758 .m(4) 25759 .n(16) 25760 .k(16) 25761 .cm_stride(19) 25762 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25763 } 25764 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 25765 25766 25767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_eq_8)25768 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_eq_8) { 25769 TEST_REQUIRES_ARM_NEON; 25770 GemmMicrokernelTester() 25771 .mr(4) 25772 .nr(16) 25773 .kr(4) 25774 .sr(1) 25775 .m(4) 25776 .n(16) 25777 .k(8) 25778 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25779 } 25780 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,strided_cn)25781 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, strided_cn) { 25782 TEST_REQUIRES_ARM_NEON; 25783 GemmMicrokernelTester() 25784 .mr(4) 25785 .nr(16) 25786 .kr(4) 25787 .sr(1) 25788 .m(4) 25789 .n(16) 25790 .k(8) 25791 .cn_stride(19) 25792 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25793 } 25794 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_eq_8_subtile)25795 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_eq_8_subtile) { 25796 TEST_REQUIRES_ARM_NEON; 25797 for (uint32_t n = 1; n <= 16; n++) { 25798 for (uint32_t m = 1; m <= 4; m++) { 25799 GemmMicrokernelTester() 25800 .mr(4) 25801 .nr(16) 25802 .kr(4) 25803 .sr(1) 25804 .m(m) 25805 .n(n) 25806 .k(8) 25807 .iterations(1) 25808 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25809 } 25810 } 25811 } 25812 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_eq_8_subtile_m)25813 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_eq_8_subtile_m) { 25814 TEST_REQUIRES_ARM_NEON; 25815 for (uint32_t m = 1; m <= 4; m++) { 25816 GemmMicrokernelTester() 25817 .mr(4) 25818 .nr(16) 25819 .kr(4) 25820 .sr(1) 25821 .m(m) 25822 .n(16) 25823 .k(8) 25824 .iterations(1) 25825 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25826 } 25827 } 25828 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_eq_8_subtile_n)25829 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_eq_8_subtile_n) { 25830 TEST_REQUIRES_ARM_NEON; 25831 for (uint32_t n = 1; n <= 16; n++) { 25832 GemmMicrokernelTester() 25833 .mr(4) 25834 .nr(16) 25835 .kr(4) 25836 .sr(1) 25837 .m(4) 25838 .n(n) 25839 .k(8) 25840 .iterations(1) 25841 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25842 } 25843 } 25844 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_lt_8)25845 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_lt_8) { 25846 TEST_REQUIRES_ARM_NEON; 25847 for (size_t k = 1; k < 8; k++) { 25848 GemmMicrokernelTester() 25849 .mr(4) 25850 .nr(16) 25851 .kr(4) 25852 .sr(1) 25853 .m(4) 25854 .n(16) 25855 .k(k) 25856 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25857 } 25858 } 25859 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_lt_8_subtile)25860 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_lt_8_subtile) { 25861 TEST_REQUIRES_ARM_NEON; 25862 for (size_t k = 1; k < 8; k++) { 25863 for (uint32_t n = 1; n <= 16; n++) { 25864 for (uint32_t m = 1; m <= 4; m++) { 25865 GemmMicrokernelTester() 25866 .mr(4) 25867 .nr(16) 25868 .kr(4) 25869 .sr(1) 25870 .m(m) 25871 .n(n) 25872 .k(k) 25873 .iterations(1) 25874 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25875 } 25876 } 25877 } 25878 } 25879 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_gt_8)25880 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_gt_8) { 25881 TEST_REQUIRES_ARM_NEON; 25882 for (size_t k = 9; k < 16; k++) { 25883 GemmMicrokernelTester() 25884 .mr(4) 25885 .nr(16) 25886 .kr(4) 25887 .sr(1) 25888 .m(4) 25889 .n(16) 25890 .k(k) 25891 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25892 } 25893 } 25894 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_gt_8_subtile)25895 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_gt_8_subtile) { 25896 TEST_REQUIRES_ARM_NEON; 25897 for (size_t k = 9; k < 16; k++) { 25898 for (uint32_t n = 1; n <= 16; n++) { 25899 for (uint32_t m = 1; m <= 4; m++) { 25900 GemmMicrokernelTester() 25901 .mr(4) 25902 .nr(16) 25903 .kr(4) 25904 .sr(1) 25905 .m(m) 25906 .n(n) 25907 .k(k) 25908 .iterations(1) 25909 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25910 } 25911 } 25912 } 25913 } 25914 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_div_8)25915 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_div_8) { 25916 TEST_REQUIRES_ARM_NEON; 25917 for (size_t k = 16; k <= 80; k += 8) { 25918 GemmMicrokernelTester() 25919 .mr(4) 25920 .nr(16) 25921 .kr(4) 25922 .sr(1) 25923 .m(4) 25924 .n(16) 25925 .k(k) 25926 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25927 } 25928 } 25929 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,k_div_8_subtile)25930 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, k_div_8_subtile) { 25931 TEST_REQUIRES_ARM_NEON; 25932 for (size_t k = 16; k <= 80; k += 8) { 25933 for (uint32_t n = 1; n <= 16; n++) { 25934 for (uint32_t m = 1; m <= 4; m++) { 25935 GemmMicrokernelTester() 25936 .mr(4) 25937 .nr(16) 25938 .kr(4) 25939 .sr(1) 25940 .m(m) 25941 .n(n) 25942 .k(k) 25943 .iterations(1) 25944 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25945 } 25946 } 25947 } 25948 } 25949 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,n_gt_16)25950 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_gt_16) { 25951 TEST_REQUIRES_ARM_NEON; 25952 for (uint32_t n = 17; n < 32; n++) { 25953 for (size_t k = 1; k <= 40; k += 9) { 25954 GemmMicrokernelTester() 25955 .mr(4) 25956 .nr(16) 25957 .kr(4) 25958 .sr(1) 25959 .m(4) 25960 .n(n) 25961 .k(k) 25962 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25963 } 25964 } 25965 } 25966 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,n_gt_16_strided_cn)25967 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_gt_16_strided_cn) { 25968 TEST_REQUIRES_ARM_NEON; 25969 for (uint32_t n = 17; n < 32; n++) { 25970 for (size_t k = 1; k <= 40; k += 9) { 25971 GemmMicrokernelTester() 25972 .mr(4) 25973 .nr(16) 25974 .kr(4) 25975 .sr(1) 25976 .m(4) 25977 .n(n) 25978 .k(k) 25979 .cn_stride(19) 25980 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 25981 } 25982 } 25983 } 25984 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,n_gt_16_subtile)25985 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_gt_16_subtile) { 25986 TEST_REQUIRES_ARM_NEON; 25987 for (uint32_t n = 17; n < 32; n++) { 25988 for (size_t k = 1; k <= 40; k += 9) { 25989 for (uint32_t m = 1; m <= 4; m++) { 25990 GemmMicrokernelTester() 25991 .mr(4) 25992 .nr(16) 25993 .kr(4) 25994 .sr(1) 25995 .m(m) 25996 .n(n) 25997 .k(k) 25998 .iterations(1) 25999 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26000 } 26001 } 26002 } 26003 } 26004 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,n_div_16)26005 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_div_16) { 26006 TEST_REQUIRES_ARM_NEON; 26007 for (uint32_t n = 32; n <= 48; n += 16) { 26008 for (size_t k = 1; k <= 40; k += 9) { 26009 GemmMicrokernelTester() 26010 .mr(4) 26011 .nr(16) 26012 .kr(4) 26013 .sr(1) 26014 .m(4) 26015 .n(n) 26016 .k(k) 26017 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26018 } 26019 } 26020 } 26021 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,n_div_16_strided_cn)26022 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_div_16_strided_cn) { 26023 TEST_REQUIRES_ARM_NEON; 26024 for (uint32_t n = 32; n <= 48; n += 16) { 26025 for (size_t k = 1; k <= 40; k += 9) { 26026 GemmMicrokernelTester() 26027 .mr(4) 26028 .nr(16) 26029 .kr(4) 26030 .sr(1) 26031 .m(4) 26032 .n(n) 26033 .k(k) 26034 .cn_stride(19) 26035 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26036 } 26037 } 26038 } 26039 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,n_div_16_subtile)26040 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_div_16_subtile) { 26041 TEST_REQUIRES_ARM_NEON; 26042 for (uint32_t n = 32; n <= 48; n += 16) { 26043 for (size_t k = 1; k <= 40; k += 9) { 26044 for (uint32_t m = 1; m <= 4; m++) { 26045 GemmMicrokernelTester() 26046 .mr(4) 26047 .nr(16) 26048 .kr(4) 26049 .sr(1) 26050 .m(m) 26051 .n(n) 26052 .k(k) 26053 .iterations(1) 26054 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26055 } 26056 } 26057 } 26058 } 26059 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,small_kernel)26060 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, small_kernel) { 26061 TEST_REQUIRES_ARM_NEON; 26062 for (size_t k = 1; k <= 40; k += 9) { 26063 GemmMicrokernelTester() 26064 .mr(4) 26065 .nr(16) 26066 .kr(4) 26067 .sr(1) 26068 .m(4) 26069 .n(16) 26070 .k(k) 26071 .ks(3) 26072 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26073 } 26074 } 26075 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,small_kernel_subtile)26076 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, small_kernel_subtile) { 26077 TEST_REQUIRES_ARM_NEON; 26078 for (size_t k = 1; k <= 40; k += 9) { 26079 for (uint32_t n = 1; n <= 16; n++) { 26080 for (uint32_t m = 1; m <= 4; m++) { 26081 GemmMicrokernelTester() 26082 .mr(4) 26083 .nr(16) 26084 .kr(4) 26085 .sr(1) 26086 .m(m) 26087 .n(n) 26088 .k(k) 26089 .ks(3) 26090 .iterations(1) 26091 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26092 } 26093 } 26094 } 26095 } 26096 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,n_gt_16_small_kernel)26097 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_gt_16_small_kernel) { 26098 TEST_REQUIRES_ARM_NEON; 26099 for (uint32_t n = 17; n < 32; n++) { 26100 for (size_t k = 1; k <= 40; k += 9) { 26101 GemmMicrokernelTester() 26102 .mr(4) 26103 .nr(16) 26104 .kr(4) 26105 .sr(1) 26106 .m(4) 26107 .n(n) 26108 .k(k) 26109 .ks(3) 26110 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26111 } 26112 } 26113 } 26114 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,n_div_16_small_kernel)26115 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, n_div_16_small_kernel) { 26116 TEST_REQUIRES_ARM_NEON; 26117 for (uint32_t n = 32; n <= 48; n += 16) { 26118 for (size_t k = 1; k <= 40; k += 9) { 26119 GemmMicrokernelTester() 26120 .mr(4) 26121 .nr(16) 26122 .kr(4) 26123 .sr(1) 26124 .m(4) 26125 .n(n) 26126 .k(k) 26127 .ks(3) 26128 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26129 } 26130 } 26131 } 26132 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,strided_cm_subtile)26133 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, strided_cm_subtile) { 26134 TEST_REQUIRES_ARM_NEON; 26135 for (size_t k = 1; k <= 40; k += 9) { 26136 for (uint32_t n = 1; n <= 16; n++) { 26137 for (uint32_t m = 1; m <= 4; m++) { 26138 GemmMicrokernelTester() 26139 .mr(4) 26140 .nr(16) 26141 .kr(4) 26142 .sr(1) 26143 .m(m) 26144 .n(n) 26145 .k(k) 26146 .cm_stride(19) 26147 .iterations(1) 26148 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26149 } 26150 } 26151 } 26152 } 26153 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,a_offset)26154 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, a_offset) { 26155 TEST_REQUIRES_ARM_NEON; 26156 for (size_t k = 1; k <= 40; k += 9) { 26157 GemmMicrokernelTester() 26158 .mr(4) 26159 .nr(16) 26160 .kr(4) 26161 .sr(1) 26162 .m(4) 26163 .n(16) 26164 .k(k) 26165 .ks(3) 26166 .a_offset(163) 26167 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26168 } 26169 } 26170 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,zero)26171 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, zero) { 26172 TEST_REQUIRES_ARM_NEON; 26173 for (size_t k = 1; k <= 40; k += 9) { 26174 for (uint32_t mz = 0; mz < 4; mz++) { 26175 GemmMicrokernelTester() 26176 .mr(4) 26177 .nr(16) 26178 .kr(4) 26179 .sr(1) 26180 .m(4) 26181 .n(16) 26182 .k(k) 26183 .ks(3) 26184 .a_offset(163) 26185 .zero_index(mz) 26186 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26187 } 26188 } 26189 } 26190 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,qmin)26191 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, qmin) { 26192 TEST_REQUIRES_ARM_NEON; 26193 GemmMicrokernelTester() 26194 .mr(4) 26195 .nr(16) 26196 .kr(4) 26197 .sr(1) 26198 .m(4) 26199 .n(16) 26200 .k(8) 26201 .qmin(128) 26202 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26203 } 26204 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,qmax)26205 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, qmax) { 26206 TEST_REQUIRES_ARM_NEON; 26207 GemmMicrokernelTester() 26208 .mr(4) 26209 .nr(16) 26210 .kr(4) 26211 .sr(1) 26212 .m(4) 26213 .n(16) 26214 .k(8) 26215 .qmax(128) 26216 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26217 } 26218 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R,strided_cm)26219 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEON_MULL_LD2R, strided_cm) { 26220 TEST_REQUIRES_ARM_NEON; 26221 GemmMicrokernelTester() 26222 .mr(4) 26223 .nr(16) 26224 .kr(4) 26225 .sr(1) 26226 .m(4) 26227 .n(16) 26228 .k(8) 26229 .cm_stride(19) 26230 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26231 } 26232 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 26233 26234 26235 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8)26236 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8) { 26237 TEST_REQUIRES_ARM_NEON_DOT; 26238 GemmMicrokernelTester() 26239 .mr(4) 26240 .nr(16) 26241 .kr(4) 26242 .sr(1) 26243 .m(4) 26244 .n(16) 26245 .k(8) 26246 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26247 } 26248 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cn)26249 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cn) { 26250 TEST_REQUIRES_ARM_NEON_DOT; 26251 GemmMicrokernelTester() 26252 .mr(4) 26253 .nr(16) 26254 .kr(4) 26255 .sr(1) 26256 .m(4) 26257 .n(16) 26258 .k(8) 26259 .cn_stride(19) 26260 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26261 } 26262 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile)26263 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile) { 26264 TEST_REQUIRES_ARM_NEON_DOT; 26265 for (uint32_t n = 1; n <= 16; n++) { 26266 for (uint32_t m = 1; m <= 4; m++) { 26267 GemmMicrokernelTester() 26268 .mr(4) 26269 .nr(16) 26270 .kr(4) 26271 .sr(1) 26272 .m(m) 26273 .n(n) 26274 .k(8) 26275 .iterations(1) 26276 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26277 } 26278 } 26279 } 26280 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile_m)26281 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_m) { 26282 TEST_REQUIRES_ARM_NEON_DOT; 26283 for (uint32_t m = 1; m <= 4; m++) { 26284 GemmMicrokernelTester() 26285 .mr(4) 26286 .nr(16) 26287 .kr(4) 26288 .sr(1) 26289 .m(m) 26290 .n(16) 26291 .k(8) 26292 .iterations(1) 26293 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26294 } 26295 } 26296 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_eq_8_subtile_n)26297 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_n) { 26298 TEST_REQUIRES_ARM_NEON_DOT; 26299 for (uint32_t n = 1; n <= 16; n++) { 26300 GemmMicrokernelTester() 26301 .mr(4) 26302 .nr(16) 26303 .kr(4) 26304 .sr(1) 26305 .m(4) 26306 .n(n) 26307 .k(8) 26308 .iterations(1) 26309 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26310 } 26311 } 26312 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_lt_8)26313 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8) { 26314 TEST_REQUIRES_ARM_NEON_DOT; 26315 for (size_t k = 1; k < 8; k++) { 26316 GemmMicrokernelTester() 26317 .mr(4) 26318 .nr(16) 26319 .kr(4) 26320 .sr(1) 26321 .m(4) 26322 .n(16) 26323 .k(k) 26324 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26325 } 26326 } 26327 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_lt_8_subtile)26328 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_subtile) { 26329 TEST_REQUIRES_ARM_NEON_DOT; 26330 for (size_t k = 1; k < 8; k++) { 26331 for (uint32_t n = 1; n <= 16; n++) { 26332 for (uint32_t m = 1; m <= 4; m++) { 26333 GemmMicrokernelTester() 26334 .mr(4) 26335 .nr(16) 26336 .kr(4) 26337 .sr(1) 26338 .m(m) 26339 .n(n) 26340 .k(k) 26341 .iterations(1) 26342 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26343 } 26344 } 26345 } 26346 } 26347 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_gt_8)26348 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8) { 26349 TEST_REQUIRES_ARM_NEON_DOT; 26350 for (size_t k = 9; k < 16; k++) { 26351 GemmMicrokernelTester() 26352 .mr(4) 26353 .nr(16) 26354 .kr(4) 26355 .sr(1) 26356 .m(4) 26357 .n(16) 26358 .k(k) 26359 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26360 } 26361 } 26362 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_gt_8_subtile)26363 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_subtile) { 26364 TEST_REQUIRES_ARM_NEON_DOT; 26365 for (size_t k = 9; k < 16; k++) { 26366 for (uint32_t n = 1; n <= 16; n++) { 26367 for (uint32_t m = 1; m <= 4; m++) { 26368 GemmMicrokernelTester() 26369 .mr(4) 26370 .nr(16) 26371 .kr(4) 26372 .sr(1) 26373 .m(m) 26374 .n(n) 26375 .k(k) 26376 .iterations(1) 26377 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26378 } 26379 } 26380 } 26381 } 26382 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_div_8)26383 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8) { 26384 TEST_REQUIRES_ARM_NEON_DOT; 26385 for (size_t k = 16; k <= 80; k += 8) { 26386 GemmMicrokernelTester() 26387 .mr(4) 26388 .nr(16) 26389 .kr(4) 26390 .sr(1) 26391 .m(4) 26392 .n(16) 26393 .k(k) 26394 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26395 } 26396 } 26397 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,k_div_8_subtile)26398 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_subtile) { 26399 TEST_REQUIRES_ARM_NEON_DOT; 26400 for (size_t k = 16; k <= 80; k += 8) { 26401 for (uint32_t n = 1; n <= 16; n++) { 26402 for (uint32_t m = 1; m <= 4; m++) { 26403 GemmMicrokernelTester() 26404 .mr(4) 26405 .nr(16) 26406 .kr(4) 26407 .sr(1) 26408 .m(m) 26409 .n(n) 26410 .k(k) 26411 .iterations(1) 26412 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26413 } 26414 } 26415 } 26416 } 26417 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16)26418 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16) { 26419 TEST_REQUIRES_ARM_NEON_DOT; 26420 for (uint32_t n = 17; n < 32; n++) { 26421 for (size_t k = 1; k <= 40; k += 9) { 26422 GemmMicrokernelTester() 26423 .mr(4) 26424 .nr(16) 26425 .kr(4) 26426 .sr(1) 26427 .m(4) 26428 .n(n) 26429 .k(k) 26430 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26431 } 26432 } 26433 } 26434 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_strided_cn)26435 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_cn) { 26436 TEST_REQUIRES_ARM_NEON_DOT; 26437 for (uint32_t n = 17; n < 32; n++) { 26438 for (size_t k = 1; k <= 40; k += 9) { 26439 GemmMicrokernelTester() 26440 .mr(4) 26441 .nr(16) 26442 .kr(4) 26443 .sr(1) 26444 .m(4) 26445 .n(n) 26446 .k(k) 26447 .cn_stride(19) 26448 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26449 } 26450 } 26451 } 26452 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_subtile)26453 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_subtile) { 26454 TEST_REQUIRES_ARM_NEON_DOT; 26455 for (uint32_t n = 17; n < 32; n++) { 26456 for (size_t k = 1; k <= 40; k += 9) { 26457 for (uint32_t m = 1; m <= 4; m++) { 26458 GemmMicrokernelTester() 26459 .mr(4) 26460 .nr(16) 26461 .kr(4) 26462 .sr(1) 26463 .m(m) 26464 .n(n) 26465 .k(k) 26466 .iterations(1) 26467 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26468 } 26469 } 26470 } 26471 } 26472 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16)26473 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16) { 26474 TEST_REQUIRES_ARM_NEON_DOT; 26475 for (uint32_t n = 32; n <= 48; n += 16) { 26476 for (size_t k = 1; k <= 40; k += 9) { 26477 GemmMicrokernelTester() 26478 .mr(4) 26479 .nr(16) 26480 .kr(4) 26481 .sr(1) 26482 .m(4) 26483 .n(n) 26484 .k(k) 26485 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26486 } 26487 } 26488 } 26489 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_strided_cn)26490 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_cn) { 26491 TEST_REQUIRES_ARM_NEON_DOT; 26492 for (uint32_t n = 32; n <= 48; n += 16) { 26493 for (size_t k = 1; k <= 40; k += 9) { 26494 GemmMicrokernelTester() 26495 .mr(4) 26496 .nr(16) 26497 .kr(4) 26498 .sr(1) 26499 .m(4) 26500 .n(n) 26501 .k(k) 26502 .cn_stride(19) 26503 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26504 } 26505 } 26506 } 26507 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_subtile)26508 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_subtile) { 26509 TEST_REQUIRES_ARM_NEON_DOT; 26510 for (uint32_t n = 32; n <= 48; n += 16) { 26511 for (size_t k = 1; k <= 40; k += 9) { 26512 for (uint32_t m = 1; m <= 4; m++) { 26513 GemmMicrokernelTester() 26514 .mr(4) 26515 .nr(16) 26516 .kr(4) 26517 .sr(1) 26518 .m(m) 26519 .n(n) 26520 .k(k) 26521 .iterations(1) 26522 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26523 } 26524 } 26525 } 26526 } 26527 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,small_kernel)26528 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel) { 26529 TEST_REQUIRES_ARM_NEON_DOT; 26530 for (size_t k = 1; k <= 40; k += 9) { 26531 GemmMicrokernelTester() 26532 .mr(4) 26533 .nr(16) 26534 .kr(4) 26535 .sr(1) 26536 .m(4) 26537 .n(16) 26538 .k(k) 26539 .ks(3) 26540 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26541 } 26542 } 26543 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,small_kernel_subtile)26544 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel_subtile) { 26545 TEST_REQUIRES_ARM_NEON_DOT; 26546 for (size_t k = 1; k <= 40; k += 9) { 26547 for (uint32_t n = 1; n <= 16; n++) { 26548 for (uint32_t m = 1; m <= 4; m++) { 26549 GemmMicrokernelTester() 26550 .mr(4) 26551 .nr(16) 26552 .kr(4) 26553 .sr(1) 26554 .m(m) 26555 .n(n) 26556 .k(k) 26557 .ks(3) 26558 .iterations(1) 26559 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26560 } 26561 } 26562 } 26563 } 26564 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_gt_16_small_kernel)26565 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_small_kernel) { 26566 TEST_REQUIRES_ARM_NEON_DOT; 26567 for (uint32_t n = 17; n < 32; n++) { 26568 for (size_t k = 1; k <= 40; k += 9) { 26569 GemmMicrokernelTester() 26570 .mr(4) 26571 .nr(16) 26572 .kr(4) 26573 .sr(1) 26574 .m(4) 26575 .n(n) 26576 .k(k) 26577 .ks(3) 26578 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26579 } 26580 } 26581 } 26582 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,n_div_16_small_kernel)26583 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_small_kernel) { 26584 TEST_REQUIRES_ARM_NEON_DOT; 26585 for (uint32_t n = 32; n <= 48; n += 16) { 26586 for (size_t k = 1; k <= 40; k += 9) { 26587 GemmMicrokernelTester() 26588 .mr(4) 26589 .nr(16) 26590 .kr(4) 26591 .sr(1) 26592 .m(4) 26593 .n(n) 26594 .k(k) 26595 .ks(3) 26596 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26597 } 26598 } 26599 } 26600 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cm_subtile)26601 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm_subtile) { 26602 TEST_REQUIRES_ARM_NEON_DOT; 26603 for (size_t k = 1; k <= 40; k += 9) { 26604 for (uint32_t n = 1; n <= 16; n++) { 26605 for (uint32_t m = 1; m <= 4; m++) { 26606 GemmMicrokernelTester() 26607 .mr(4) 26608 .nr(16) 26609 .kr(4) 26610 .sr(1) 26611 .m(m) 26612 .n(n) 26613 .k(k) 26614 .cm_stride(19) 26615 .iterations(1) 26616 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26617 } 26618 } 26619 } 26620 } 26621 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,a_offset)26622 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, a_offset) { 26623 TEST_REQUIRES_ARM_NEON_DOT; 26624 for (size_t k = 1; k <= 40; k += 9) { 26625 GemmMicrokernelTester() 26626 .mr(4) 26627 .nr(16) 26628 .kr(4) 26629 .sr(1) 26630 .m(4) 26631 .n(16) 26632 .k(k) 26633 .ks(3) 26634 .a_offset(163) 26635 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26636 } 26637 } 26638 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,zero)26639 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, zero) { 26640 TEST_REQUIRES_ARM_NEON_DOT; 26641 for (size_t k = 1; k <= 40; k += 9) { 26642 for (uint32_t mz = 0; mz < 4; mz++) { 26643 GemmMicrokernelTester() 26644 .mr(4) 26645 .nr(16) 26646 .kr(4) 26647 .sr(1) 26648 .m(4) 26649 .n(16) 26650 .k(k) 26651 .ks(3) 26652 .a_offset(163) 26653 .zero_index(mz) 26654 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26655 } 26656 } 26657 } 26658 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,qmin)26659 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmin) { 26660 TEST_REQUIRES_ARM_NEON_DOT; 26661 GemmMicrokernelTester() 26662 .mr(4) 26663 .nr(16) 26664 .kr(4) 26665 .sr(1) 26666 .m(4) 26667 .n(16) 26668 .k(8) 26669 .qmin(128) 26670 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26671 } 26672 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,qmax)26673 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmax) { 26674 TEST_REQUIRES_ARM_NEON_DOT; 26675 GemmMicrokernelTester() 26676 .mr(4) 26677 .nr(16) 26678 .kr(4) 26679 .sr(1) 26680 .m(4) 26681 .n(16) 26682 .k(8) 26683 .qmax(128) 26684 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26685 } 26686 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT,strided_cm)26687 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm) { 26688 TEST_REQUIRES_ARM_NEON_DOT; 26689 GemmMicrokernelTester() 26690 .mr(4) 26691 .nr(16) 26692 .kr(4) 26693 .sr(1) 26694 .m(4) 26695 .n(16) 26696 .k(8) 26697 .cm_stride(19) 26698 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26699 } 26700 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 26701 26702 26703 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8)26704 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) { 26705 TEST_REQUIRES_ARM_NEON; 26706 GemmMicrokernelTester() 26707 .mr(6) 26708 .nr(8) 26709 .kr(1) 26710 .sr(1) 26711 .m(6) 26712 .n(8) 26713 .k(8) 26714 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26715 } 26716 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cn)26717 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cn) { 26718 TEST_REQUIRES_ARM_NEON; 26719 GemmMicrokernelTester() 26720 .mr(6) 26721 .nr(8) 26722 .kr(1) 26723 .sr(1) 26724 .m(6) 26725 .n(8) 26726 .k(8) 26727 .cn_stride(11) 26728 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26729 } 26730 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile)26731 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) { 26732 TEST_REQUIRES_ARM_NEON; 26733 for (uint32_t n = 1; n <= 8; n++) { 26734 for (uint32_t m = 1; m <= 6; m++) { 26735 GemmMicrokernelTester() 26736 .mr(6) 26737 .nr(8) 26738 .kr(1) 26739 .sr(1) 26740 .m(m) 26741 .n(n) 26742 .k(8) 26743 .iterations(1) 26744 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26745 } 26746 } 26747 } 26748 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_m)26749 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) { 26750 TEST_REQUIRES_ARM_NEON; 26751 for (uint32_t m = 1; m <= 6; m++) { 26752 GemmMicrokernelTester() 26753 .mr(6) 26754 .nr(8) 26755 .kr(1) 26756 .sr(1) 26757 .m(m) 26758 .n(8) 26759 .k(8) 26760 .iterations(1) 26761 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26762 } 26763 } 26764 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_eq_8_subtile_n)26765 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) { 26766 TEST_REQUIRES_ARM_NEON; 26767 for (uint32_t n = 1; n <= 8; n++) { 26768 GemmMicrokernelTester() 26769 .mr(6) 26770 .nr(8) 26771 .kr(1) 26772 .sr(1) 26773 .m(6) 26774 .n(n) 26775 .k(8) 26776 .iterations(1) 26777 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26778 } 26779 } 26780 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_lt_8)26781 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) { 26782 TEST_REQUIRES_ARM_NEON; 26783 for (size_t k = 1; k < 8; k++) { 26784 GemmMicrokernelTester() 26785 .mr(6) 26786 .nr(8) 26787 .kr(1) 26788 .sr(1) 26789 .m(6) 26790 .n(8) 26791 .k(k) 26792 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26793 } 26794 } 26795 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_lt_8_subtile)26796 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) { 26797 TEST_REQUIRES_ARM_NEON; 26798 for (size_t k = 1; k < 8; k++) { 26799 for (uint32_t n = 1; n <= 8; n++) { 26800 for (uint32_t m = 1; m <= 6; m++) { 26801 GemmMicrokernelTester() 26802 .mr(6) 26803 .nr(8) 26804 .kr(1) 26805 .sr(1) 26806 .m(m) 26807 .n(n) 26808 .k(k) 26809 .iterations(1) 26810 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26811 } 26812 } 26813 } 26814 } 26815 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_gt_8)26816 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) { 26817 TEST_REQUIRES_ARM_NEON; 26818 for (size_t k = 9; k < 16; k++) { 26819 GemmMicrokernelTester() 26820 .mr(6) 26821 .nr(8) 26822 .kr(1) 26823 .sr(1) 26824 .m(6) 26825 .n(8) 26826 .k(k) 26827 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26828 } 26829 } 26830 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_gt_8_subtile)26831 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) { 26832 TEST_REQUIRES_ARM_NEON; 26833 for (size_t k = 9; k < 16; k++) { 26834 for (uint32_t n = 1; n <= 8; n++) { 26835 for (uint32_t m = 1; m <= 6; m++) { 26836 GemmMicrokernelTester() 26837 .mr(6) 26838 .nr(8) 26839 .kr(1) 26840 .sr(1) 26841 .m(m) 26842 .n(n) 26843 .k(k) 26844 .iterations(1) 26845 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26846 } 26847 } 26848 } 26849 } 26850 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_div_8)26851 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8) { 26852 TEST_REQUIRES_ARM_NEON; 26853 for (size_t k = 16; k <= 80; k += 8) { 26854 GemmMicrokernelTester() 26855 .mr(6) 26856 .nr(8) 26857 .kr(1) 26858 .sr(1) 26859 .m(6) 26860 .n(8) 26861 .k(k) 26862 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26863 } 26864 } 26865 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,k_div_8_subtile)26866 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) { 26867 TEST_REQUIRES_ARM_NEON; 26868 for (size_t k = 16; k <= 80; k += 8) { 26869 for (uint32_t n = 1; n <= 8; n++) { 26870 for (uint32_t m = 1; m <= 6; m++) { 26871 GemmMicrokernelTester() 26872 .mr(6) 26873 .nr(8) 26874 .kr(1) 26875 .sr(1) 26876 .m(m) 26877 .n(n) 26878 .k(k) 26879 .iterations(1) 26880 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26881 } 26882 } 26883 } 26884 } 26885 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8)26886 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) { 26887 TEST_REQUIRES_ARM_NEON; 26888 for (uint32_t n = 9; n < 16; n++) { 26889 for (size_t k = 1; k <= 40; k += 9) { 26890 GemmMicrokernelTester() 26891 .mr(6) 26892 .nr(8) 26893 .kr(1) 26894 .sr(1) 26895 .m(6) 26896 .n(n) 26897 .k(k) 26898 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26899 } 26900 } 26901 } 26902 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_strided_cn)26903 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) { 26904 TEST_REQUIRES_ARM_NEON; 26905 for (uint32_t n = 9; n < 16; n++) { 26906 for (size_t k = 1; k <= 40; k += 9) { 26907 GemmMicrokernelTester() 26908 .mr(6) 26909 .nr(8) 26910 .kr(1) 26911 .sr(1) 26912 .m(6) 26913 .n(n) 26914 .k(k) 26915 .cn_stride(11) 26916 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26917 } 26918 } 26919 } 26920 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_subtile)26921 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) { 26922 TEST_REQUIRES_ARM_NEON; 26923 for (uint32_t n = 9; n < 16; n++) { 26924 for (size_t k = 1; k <= 40; k += 9) { 26925 for (uint32_t m = 1; m <= 6; m++) { 26926 GemmMicrokernelTester() 26927 .mr(6) 26928 .nr(8) 26929 .kr(1) 26930 .sr(1) 26931 .m(m) 26932 .n(n) 26933 .k(k) 26934 .iterations(1) 26935 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26936 } 26937 } 26938 } 26939 } 26940 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8)26941 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8) { 26942 TEST_REQUIRES_ARM_NEON; 26943 for (uint32_t n = 16; n <= 24; n += 8) { 26944 for (size_t k = 1; k <= 40; k += 9) { 26945 GemmMicrokernelTester() 26946 .mr(6) 26947 .nr(8) 26948 .kr(1) 26949 .sr(1) 26950 .m(6) 26951 .n(n) 26952 .k(k) 26953 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26954 } 26955 } 26956 } 26957 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_strided_cn)26958 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) { 26959 TEST_REQUIRES_ARM_NEON; 26960 for (uint32_t n = 16; n <= 24; n += 8) { 26961 for (size_t k = 1; k <= 40; k += 9) { 26962 GemmMicrokernelTester() 26963 .mr(6) 26964 .nr(8) 26965 .kr(1) 26966 .sr(1) 26967 .m(6) 26968 .n(n) 26969 .k(k) 26970 .cn_stride(11) 26971 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26972 } 26973 } 26974 } 26975 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_subtile)26976 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) { 26977 TEST_REQUIRES_ARM_NEON; 26978 for (uint32_t n = 16; n <= 24; n += 8) { 26979 for (size_t k = 1; k <= 40; k += 9) { 26980 for (uint32_t m = 1; m <= 6; m++) { 26981 GemmMicrokernelTester() 26982 .mr(6) 26983 .nr(8) 26984 .kr(1) 26985 .sr(1) 26986 .m(m) 26987 .n(n) 26988 .k(k) 26989 .iterations(1) 26990 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 26991 } 26992 } 26993 } 26994 } 26995 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,small_kernel)26996 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, small_kernel) { 26997 TEST_REQUIRES_ARM_NEON; 26998 for (size_t k = 1; k <= 40; k += 9) { 26999 GemmMicrokernelTester() 27000 .mr(6) 27001 .nr(8) 27002 .kr(1) 27003 .sr(1) 27004 .m(6) 27005 .n(8) 27006 .k(k) 27007 .ks(3) 27008 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27009 } 27010 } 27011 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,small_kernel_subtile)27012 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) { 27013 TEST_REQUIRES_ARM_NEON; 27014 for (size_t k = 1; k <= 40; k += 9) { 27015 for (uint32_t n = 1; n <= 8; n++) { 27016 for (uint32_t m = 1; m <= 6; m++) { 27017 GemmMicrokernelTester() 27018 .mr(6) 27019 .nr(8) 27020 .kr(1) 27021 .sr(1) 27022 .m(m) 27023 .n(n) 27024 .k(k) 27025 .ks(3) 27026 .iterations(1) 27027 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27028 } 27029 } 27030 } 27031 } 27032 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_gt_8_small_kernel)27033 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) { 27034 TEST_REQUIRES_ARM_NEON; 27035 for (uint32_t n = 9; n < 16; n++) { 27036 for (size_t k = 1; k <= 40; k += 9) { 27037 GemmMicrokernelTester() 27038 .mr(6) 27039 .nr(8) 27040 .kr(1) 27041 .sr(1) 27042 .m(6) 27043 .n(n) 27044 .k(k) 27045 .ks(3) 27046 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27047 } 27048 } 27049 } 27050 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,n_div_8_small_kernel)27051 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) { 27052 TEST_REQUIRES_ARM_NEON; 27053 for (uint32_t n = 16; n <= 24; n += 8) { 27054 for (size_t k = 1; k <= 40; k += 9) { 27055 GemmMicrokernelTester() 27056 .mr(6) 27057 .nr(8) 27058 .kr(1) 27059 .sr(1) 27060 .m(6) 27061 .n(n) 27062 .k(k) 27063 .ks(3) 27064 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27065 } 27066 } 27067 } 27068 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cm_subtile)27069 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) { 27070 TEST_REQUIRES_ARM_NEON; 27071 for (size_t k = 1; k <= 40; k += 9) { 27072 for (uint32_t n = 1; n <= 8; n++) { 27073 for (uint32_t m = 1; m <= 6; m++) { 27074 GemmMicrokernelTester() 27075 .mr(6) 27076 .nr(8) 27077 .kr(1) 27078 .sr(1) 27079 .m(m) 27080 .n(n) 27081 .k(k) 27082 .cm_stride(11) 27083 .iterations(1) 27084 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27085 } 27086 } 27087 } 27088 } 27089 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,a_offset)27090 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, a_offset) { 27091 TEST_REQUIRES_ARM_NEON; 27092 for (size_t k = 1; k <= 40; k += 9) { 27093 GemmMicrokernelTester() 27094 .mr(6) 27095 .nr(8) 27096 .kr(1) 27097 .sr(1) 27098 .m(6) 27099 .n(8) 27100 .k(k) 27101 .ks(3) 27102 .a_offset(251) 27103 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27104 } 27105 } 27106 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,zero)27107 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, zero) { 27108 TEST_REQUIRES_ARM_NEON; 27109 for (size_t k = 1; k <= 40; k += 9) { 27110 for (uint32_t mz = 0; mz < 6; mz++) { 27111 GemmMicrokernelTester() 27112 .mr(6) 27113 .nr(8) 27114 .kr(1) 27115 .sr(1) 27116 .m(6) 27117 .n(8) 27118 .k(k) 27119 .ks(3) 27120 .a_offset(251) 27121 .zero_index(mz) 27122 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27123 } 27124 } 27125 } 27126 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,qmin)27127 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, qmin) { 27128 TEST_REQUIRES_ARM_NEON; 27129 GemmMicrokernelTester() 27130 .mr(6) 27131 .nr(8) 27132 .kr(1) 27133 .sr(1) 27134 .m(6) 27135 .n(8) 27136 .k(8) 27137 .qmin(128) 27138 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27139 } 27140 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,qmax)27141 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, qmax) { 27142 TEST_REQUIRES_ARM_NEON; 27143 GemmMicrokernelTester() 27144 .mr(6) 27145 .nr(8) 27146 .kr(1) 27147 .sr(1) 27148 .m(6) 27149 .n(8) 27150 .k(8) 27151 .qmax(128) 27152 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27153 } 27154 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM,strided_cm)27155 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE_PRFM, strided_cm) { 27156 TEST_REQUIRES_ARM_NEON; 27157 GemmMicrokernelTester() 27158 .mr(6) 27159 .nr(8) 27160 .kr(1) 27161 .sr(1) 27162 .m(6) 27163 .n(8) 27164 .k(8) 27165 .cm_stride(11) 27166 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27167 } 27168 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 27169 27170 27171 #if XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8)27172 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8) { 27173 TEST_REQUIRES_ARM_NEON_DOT; 27174 GemmMicrokernelTester() 27175 .mr(8) 27176 .nr(16) 27177 .kr(4) 27178 .sr(1) 27179 .m(8) 27180 .n(16) 27181 .k(8) 27182 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27183 } 27184 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cn)27185 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cn) { 27186 TEST_REQUIRES_ARM_NEON_DOT; 27187 GemmMicrokernelTester() 27188 .mr(8) 27189 .nr(16) 27190 .kr(4) 27191 .sr(1) 27192 .m(8) 27193 .n(16) 27194 .k(8) 27195 .cn_stride(19) 27196 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27197 } 27198 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile)27199 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile) { 27200 TEST_REQUIRES_ARM_NEON_DOT; 27201 for (uint32_t n = 1; n <= 16; n++) { 27202 for (uint32_t m = 1; m <= 8; m++) { 27203 GemmMicrokernelTester() 27204 .mr(8) 27205 .nr(16) 27206 .kr(4) 27207 .sr(1) 27208 .m(m) 27209 .n(n) 27210 .k(8) 27211 .iterations(1) 27212 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27213 } 27214 } 27215 } 27216 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile_m)27217 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_m) { 27218 TEST_REQUIRES_ARM_NEON_DOT; 27219 for (uint32_t m = 1; m <= 8; m++) { 27220 GemmMicrokernelTester() 27221 .mr(8) 27222 .nr(16) 27223 .kr(4) 27224 .sr(1) 27225 .m(m) 27226 .n(16) 27227 .k(8) 27228 .iterations(1) 27229 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27230 } 27231 } 27232 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_eq_8_subtile_n)27233 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_n) { 27234 TEST_REQUIRES_ARM_NEON_DOT; 27235 for (uint32_t n = 1; n <= 16; n++) { 27236 GemmMicrokernelTester() 27237 .mr(8) 27238 .nr(16) 27239 .kr(4) 27240 .sr(1) 27241 .m(8) 27242 .n(n) 27243 .k(8) 27244 .iterations(1) 27245 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27246 } 27247 } 27248 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_lt_8)27249 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8) { 27250 TEST_REQUIRES_ARM_NEON_DOT; 27251 for (size_t k = 1; k < 8; k++) { 27252 GemmMicrokernelTester() 27253 .mr(8) 27254 .nr(16) 27255 .kr(4) 27256 .sr(1) 27257 .m(8) 27258 .n(16) 27259 .k(k) 27260 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27261 } 27262 } 27263 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_lt_8_subtile)27264 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_subtile) { 27265 TEST_REQUIRES_ARM_NEON_DOT; 27266 for (size_t k = 1; k < 8; k++) { 27267 for (uint32_t n = 1; n <= 16; n++) { 27268 for (uint32_t m = 1; m <= 8; m++) { 27269 GemmMicrokernelTester() 27270 .mr(8) 27271 .nr(16) 27272 .kr(4) 27273 .sr(1) 27274 .m(m) 27275 .n(n) 27276 .k(k) 27277 .iterations(1) 27278 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27279 } 27280 } 27281 } 27282 } 27283 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_gt_8)27284 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8) { 27285 TEST_REQUIRES_ARM_NEON_DOT; 27286 for (size_t k = 9; k < 16; k++) { 27287 GemmMicrokernelTester() 27288 .mr(8) 27289 .nr(16) 27290 .kr(4) 27291 .sr(1) 27292 .m(8) 27293 .n(16) 27294 .k(k) 27295 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27296 } 27297 } 27298 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_gt_8_subtile)27299 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_subtile) { 27300 TEST_REQUIRES_ARM_NEON_DOT; 27301 for (size_t k = 9; k < 16; k++) { 27302 for (uint32_t n = 1; n <= 16; n++) { 27303 for (uint32_t m = 1; m <= 8; m++) { 27304 GemmMicrokernelTester() 27305 .mr(8) 27306 .nr(16) 27307 .kr(4) 27308 .sr(1) 27309 .m(m) 27310 .n(n) 27311 .k(k) 27312 .iterations(1) 27313 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27314 } 27315 } 27316 } 27317 } 27318 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_div_8)27319 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8) { 27320 TEST_REQUIRES_ARM_NEON_DOT; 27321 for (size_t k = 16; k <= 80; k += 8) { 27322 GemmMicrokernelTester() 27323 .mr(8) 27324 .nr(16) 27325 .kr(4) 27326 .sr(1) 27327 .m(8) 27328 .n(16) 27329 .k(k) 27330 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27331 } 27332 } 27333 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,k_div_8_subtile)27334 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_subtile) { 27335 TEST_REQUIRES_ARM_NEON_DOT; 27336 for (size_t k = 16; k <= 80; k += 8) { 27337 for (uint32_t n = 1; n <= 16; n++) { 27338 for (uint32_t m = 1; m <= 8; m++) { 27339 GemmMicrokernelTester() 27340 .mr(8) 27341 .nr(16) 27342 .kr(4) 27343 .sr(1) 27344 .m(m) 27345 .n(n) 27346 .k(k) 27347 .iterations(1) 27348 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27349 } 27350 } 27351 } 27352 } 27353 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16)27354 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16) { 27355 TEST_REQUIRES_ARM_NEON_DOT; 27356 for (uint32_t n = 17; n < 32; n++) { 27357 for (size_t k = 1; k <= 40; k += 9) { 27358 GemmMicrokernelTester() 27359 .mr(8) 27360 .nr(16) 27361 .kr(4) 27362 .sr(1) 27363 .m(8) 27364 .n(n) 27365 .k(k) 27366 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27367 } 27368 } 27369 } 27370 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_strided_cn)27371 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_cn) { 27372 TEST_REQUIRES_ARM_NEON_DOT; 27373 for (uint32_t n = 17; n < 32; n++) { 27374 for (size_t k = 1; k <= 40; k += 9) { 27375 GemmMicrokernelTester() 27376 .mr(8) 27377 .nr(16) 27378 .kr(4) 27379 .sr(1) 27380 .m(8) 27381 .n(n) 27382 .k(k) 27383 .cn_stride(19) 27384 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27385 } 27386 } 27387 } 27388 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_subtile)27389 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_subtile) { 27390 TEST_REQUIRES_ARM_NEON_DOT; 27391 for (uint32_t n = 17; n < 32; n++) { 27392 for (size_t k = 1; k <= 40; k += 9) { 27393 for (uint32_t m = 1; m <= 8; m++) { 27394 GemmMicrokernelTester() 27395 .mr(8) 27396 .nr(16) 27397 .kr(4) 27398 .sr(1) 27399 .m(m) 27400 .n(n) 27401 .k(k) 27402 .iterations(1) 27403 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27404 } 27405 } 27406 } 27407 } 27408 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16)27409 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16) { 27410 TEST_REQUIRES_ARM_NEON_DOT; 27411 for (uint32_t n = 32; n <= 48; n += 16) { 27412 for (size_t k = 1; k <= 40; k += 9) { 27413 GemmMicrokernelTester() 27414 .mr(8) 27415 .nr(16) 27416 .kr(4) 27417 .sr(1) 27418 .m(8) 27419 .n(n) 27420 .k(k) 27421 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27422 } 27423 } 27424 } 27425 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_strided_cn)27426 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_cn) { 27427 TEST_REQUIRES_ARM_NEON_DOT; 27428 for (uint32_t n = 32; n <= 48; n += 16) { 27429 for (size_t k = 1; k <= 40; k += 9) { 27430 GemmMicrokernelTester() 27431 .mr(8) 27432 .nr(16) 27433 .kr(4) 27434 .sr(1) 27435 .m(8) 27436 .n(n) 27437 .k(k) 27438 .cn_stride(19) 27439 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27440 } 27441 } 27442 } 27443 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_subtile)27444 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_subtile) { 27445 TEST_REQUIRES_ARM_NEON_DOT; 27446 for (uint32_t n = 32; n <= 48; n += 16) { 27447 for (size_t k = 1; k <= 40; k += 9) { 27448 for (uint32_t m = 1; m <= 8; m++) { 27449 GemmMicrokernelTester() 27450 .mr(8) 27451 .nr(16) 27452 .kr(4) 27453 .sr(1) 27454 .m(m) 27455 .n(n) 27456 .k(k) 27457 .iterations(1) 27458 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27459 } 27460 } 27461 } 27462 } 27463 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,small_kernel)27464 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel) { 27465 TEST_REQUIRES_ARM_NEON_DOT; 27466 for (size_t k = 1; k <= 40; k += 9) { 27467 GemmMicrokernelTester() 27468 .mr(8) 27469 .nr(16) 27470 .kr(4) 27471 .sr(1) 27472 .m(8) 27473 .n(16) 27474 .k(k) 27475 .ks(3) 27476 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27477 } 27478 } 27479 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,small_kernel_subtile)27480 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel_subtile) { 27481 TEST_REQUIRES_ARM_NEON_DOT; 27482 for (size_t k = 1; k <= 40; k += 9) { 27483 for (uint32_t n = 1; n <= 16; n++) { 27484 for (uint32_t m = 1; m <= 8; m++) { 27485 GemmMicrokernelTester() 27486 .mr(8) 27487 .nr(16) 27488 .kr(4) 27489 .sr(1) 27490 .m(m) 27491 .n(n) 27492 .k(k) 27493 .ks(3) 27494 .iterations(1) 27495 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27496 } 27497 } 27498 } 27499 } 27500 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_gt_16_small_kernel)27501 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_small_kernel) { 27502 TEST_REQUIRES_ARM_NEON_DOT; 27503 for (uint32_t n = 17; n < 32; n++) { 27504 for (size_t k = 1; k <= 40; k += 9) { 27505 GemmMicrokernelTester() 27506 .mr(8) 27507 .nr(16) 27508 .kr(4) 27509 .sr(1) 27510 .m(8) 27511 .n(n) 27512 .k(k) 27513 .ks(3) 27514 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27515 } 27516 } 27517 } 27518 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,n_div_16_small_kernel)27519 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_small_kernel) { 27520 TEST_REQUIRES_ARM_NEON_DOT; 27521 for (uint32_t n = 32; n <= 48; n += 16) { 27522 for (size_t k = 1; k <= 40; k += 9) { 27523 GemmMicrokernelTester() 27524 .mr(8) 27525 .nr(16) 27526 .kr(4) 27527 .sr(1) 27528 .m(8) 27529 .n(n) 27530 .k(k) 27531 .ks(3) 27532 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27533 } 27534 } 27535 } 27536 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cm_subtile)27537 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm_subtile) { 27538 TEST_REQUIRES_ARM_NEON_DOT; 27539 for (size_t k = 1; k <= 40; k += 9) { 27540 for (uint32_t n = 1; n <= 16; n++) { 27541 for (uint32_t m = 1; m <= 8; m++) { 27542 GemmMicrokernelTester() 27543 .mr(8) 27544 .nr(16) 27545 .kr(4) 27546 .sr(1) 27547 .m(m) 27548 .n(n) 27549 .k(k) 27550 .cm_stride(19) 27551 .iterations(1) 27552 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27553 } 27554 } 27555 } 27556 } 27557 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,a_offset)27558 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, a_offset) { 27559 TEST_REQUIRES_ARM_NEON_DOT; 27560 for (size_t k = 1; k <= 40; k += 9) { 27561 GemmMicrokernelTester() 27562 .mr(8) 27563 .nr(16) 27564 .kr(4) 27565 .sr(1) 27566 .m(8) 27567 .n(16) 27568 .k(k) 27569 .ks(3) 27570 .a_offset(331) 27571 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27572 } 27573 } 27574 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,zero)27575 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, zero) { 27576 TEST_REQUIRES_ARM_NEON_DOT; 27577 for (size_t k = 1; k <= 40; k += 9) { 27578 for (uint32_t mz = 0; mz < 8; mz++) { 27579 GemmMicrokernelTester() 27580 .mr(8) 27581 .nr(16) 27582 .kr(4) 27583 .sr(1) 27584 .m(8) 27585 .n(16) 27586 .k(k) 27587 .ks(3) 27588 .a_offset(331) 27589 .zero_index(mz) 27590 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27591 } 27592 } 27593 } 27594 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,qmin)27595 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmin) { 27596 TEST_REQUIRES_ARM_NEON_DOT; 27597 GemmMicrokernelTester() 27598 .mr(8) 27599 .nr(16) 27600 .kr(4) 27601 .sr(1) 27602 .m(8) 27603 .n(16) 27604 .k(8) 27605 .qmin(128) 27606 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27607 } 27608 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,qmax)27609 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmax) { 27610 TEST_REQUIRES_ARM_NEON_DOT; 27611 GemmMicrokernelTester() 27612 .mr(8) 27613 .nr(16) 27614 .kr(4) 27615 .sr(1) 27616 .m(8) 27617 .n(16) 27618 .k(8) 27619 .qmax(128) 27620 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27621 } 27622 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT,strided_cm)27623 TEST(QS8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm) { 27624 TEST_REQUIRES_ARM_NEON_DOT; 27625 GemmMicrokernelTester() 27626 .mr(8) 27627 .nr(16) 27628 .kr(4) 27629 .sr(1) 27630 .m(8) 27631 .n(16) 27632 .k(8) 27633 .cm_stride(19) 27634 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27635 } 27636 #endif // XNN_ENABLE_ARM_DOTPROD && (XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64) 27637 27638 27639 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8)27640 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) { 27641 TEST_REQUIRES_ARM_NEON; 27642 GemmMicrokernelTester() 27643 .mr(4) 27644 .nr(8) 27645 .kr(1) 27646 .sr(1) 27647 .m(4) 27648 .n(8) 27649 .k(8) 27650 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27651 } 27652 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cn)27653 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) { 27654 TEST_REQUIRES_ARM_NEON; 27655 GemmMicrokernelTester() 27656 .mr(4) 27657 .nr(8) 27658 .kr(1) 27659 .sr(1) 27660 .m(4) 27661 .n(8) 27662 .k(8) 27663 .cn_stride(11) 27664 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27665 } 27666 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile)27667 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) { 27668 TEST_REQUIRES_ARM_NEON; 27669 for (uint32_t n = 1; n <= 8; n++) { 27670 for (uint32_t m = 1; m <= 4; m++) { 27671 GemmMicrokernelTester() 27672 .mr(4) 27673 .nr(8) 27674 .kr(1) 27675 .sr(1) 27676 .m(m) 27677 .n(n) 27678 .k(8) 27679 .iterations(1) 27680 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27681 } 27682 } 27683 } 27684 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_m)27685 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) { 27686 TEST_REQUIRES_ARM_NEON; 27687 for (uint32_t m = 1; m <= 4; m++) { 27688 GemmMicrokernelTester() 27689 .mr(4) 27690 .nr(8) 27691 .kr(1) 27692 .sr(1) 27693 .m(m) 27694 .n(8) 27695 .k(8) 27696 .iterations(1) 27697 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27698 } 27699 } 27700 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_eq_8_subtile_n)27701 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) { 27702 TEST_REQUIRES_ARM_NEON; 27703 for (uint32_t n = 1; n <= 8; n++) { 27704 GemmMicrokernelTester() 27705 .mr(4) 27706 .nr(8) 27707 .kr(1) 27708 .sr(1) 27709 .m(4) 27710 .n(n) 27711 .k(8) 27712 .iterations(1) 27713 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27714 } 27715 } 27716 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8)27717 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) { 27718 TEST_REQUIRES_ARM_NEON; 27719 for (size_t k = 1; k < 8; k++) { 27720 GemmMicrokernelTester() 27721 .mr(4) 27722 .nr(8) 27723 .kr(1) 27724 .sr(1) 27725 .m(4) 27726 .n(8) 27727 .k(k) 27728 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27729 } 27730 } 27731 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_lt_8_subtile)27732 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) { 27733 TEST_REQUIRES_ARM_NEON; 27734 for (size_t k = 1; k < 8; k++) { 27735 for (uint32_t n = 1; n <= 8; n++) { 27736 for (uint32_t m = 1; m <= 4; m++) { 27737 GemmMicrokernelTester() 27738 .mr(4) 27739 .nr(8) 27740 .kr(1) 27741 .sr(1) 27742 .m(m) 27743 .n(n) 27744 .k(k) 27745 .iterations(1) 27746 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27747 } 27748 } 27749 } 27750 } 27751 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8)27752 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) { 27753 TEST_REQUIRES_ARM_NEON; 27754 for (size_t k = 9; k < 16; k++) { 27755 GemmMicrokernelTester() 27756 .mr(4) 27757 .nr(8) 27758 .kr(1) 27759 .sr(1) 27760 .m(4) 27761 .n(8) 27762 .k(k) 27763 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27764 } 27765 } 27766 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_gt_8_subtile)27767 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) { 27768 TEST_REQUIRES_ARM_NEON; 27769 for (size_t k = 9; k < 16; k++) { 27770 for (uint32_t n = 1; n <= 8; n++) { 27771 for (uint32_t m = 1; m <= 4; m++) { 27772 GemmMicrokernelTester() 27773 .mr(4) 27774 .nr(8) 27775 .kr(1) 27776 .sr(1) 27777 .m(m) 27778 .n(n) 27779 .k(k) 27780 .iterations(1) 27781 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27782 } 27783 } 27784 } 27785 } 27786 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8)27787 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) { 27788 TEST_REQUIRES_ARM_NEON; 27789 for (size_t k = 16; k <= 80; k += 8) { 27790 GemmMicrokernelTester() 27791 .mr(4) 27792 .nr(8) 27793 .kr(1) 27794 .sr(1) 27795 .m(4) 27796 .n(8) 27797 .k(k) 27798 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27799 } 27800 } 27801 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,k_div_8_subtile)27802 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) { 27803 TEST_REQUIRES_ARM_NEON; 27804 for (size_t k = 16; k <= 80; k += 8) { 27805 for (uint32_t n = 1; n <= 8; n++) { 27806 for (uint32_t m = 1; m <= 4; m++) { 27807 GemmMicrokernelTester() 27808 .mr(4) 27809 .nr(8) 27810 .kr(1) 27811 .sr(1) 27812 .m(m) 27813 .n(n) 27814 .k(k) 27815 .iterations(1) 27816 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27817 } 27818 } 27819 } 27820 } 27821 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8)27822 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) { 27823 TEST_REQUIRES_ARM_NEON; 27824 for (uint32_t n = 9; n < 16; n++) { 27825 for (size_t k = 1; k <= 40; k += 9) { 27826 GemmMicrokernelTester() 27827 .mr(4) 27828 .nr(8) 27829 .kr(1) 27830 .sr(1) 27831 .m(4) 27832 .n(n) 27833 .k(k) 27834 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27835 } 27836 } 27837 } 27838 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_strided_cn)27839 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) { 27840 TEST_REQUIRES_ARM_NEON; 27841 for (uint32_t n = 9; n < 16; n++) { 27842 for (size_t k = 1; k <= 40; k += 9) { 27843 GemmMicrokernelTester() 27844 .mr(4) 27845 .nr(8) 27846 .kr(1) 27847 .sr(1) 27848 .m(4) 27849 .n(n) 27850 .k(k) 27851 .cn_stride(11) 27852 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27853 } 27854 } 27855 } 27856 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_subtile)27857 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) { 27858 TEST_REQUIRES_ARM_NEON; 27859 for (uint32_t n = 9; n < 16; n++) { 27860 for (size_t k = 1; k <= 40; k += 9) { 27861 for (uint32_t m = 1; m <= 4; m++) { 27862 GemmMicrokernelTester() 27863 .mr(4) 27864 .nr(8) 27865 .kr(1) 27866 .sr(1) 27867 .m(m) 27868 .n(n) 27869 .k(k) 27870 .iterations(1) 27871 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27872 } 27873 } 27874 } 27875 } 27876 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8)27877 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) { 27878 TEST_REQUIRES_ARM_NEON; 27879 for (uint32_t n = 16; n <= 24; n += 8) { 27880 for (size_t k = 1; k <= 40; k += 9) { 27881 GemmMicrokernelTester() 27882 .mr(4) 27883 .nr(8) 27884 .kr(1) 27885 .sr(1) 27886 .m(4) 27887 .n(n) 27888 .k(k) 27889 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27890 } 27891 } 27892 } 27893 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_strided_cn)27894 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) { 27895 TEST_REQUIRES_ARM_NEON; 27896 for (uint32_t n = 16; n <= 24; n += 8) { 27897 for (size_t k = 1; k <= 40; k += 9) { 27898 GemmMicrokernelTester() 27899 .mr(4) 27900 .nr(8) 27901 .kr(1) 27902 .sr(1) 27903 .m(4) 27904 .n(n) 27905 .k(k) 27906 .cn_stride(11) 27907 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27908 } 27909 } 27910 } 27911 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_subtile)27912 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) { 27913 TEST_REQUIRES_ARM_NEON; 27914 for (uint32_t n = 16; n <= 24; n += 8) { 27915 for (size_t k = 1; k <= 40; k += 9) { 27916 for (uint32_t m = 1; m <= 4; m++) { 27917 GemmMicrokernelTester() 27918 .mr(4) 27919 .nr(8) 27920 .kr(1) 27921 .sr(1) 27922 .m(m) 27923 .n(n) 27924 .k(k) 27925 .iterations(1) 27926 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27927 } 27928 } 27929 } 27930 } 27931 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel)27932 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) { 27933 TEST_REQUIRES_ARM_NEON; 27934 for (size_t k = 1; k <= 40; k += 9) { 27935 GemmMicrokernelTester() 27936 .mr(4) 27937 .nr(8) 27938 .kr(1) 27939 .sr(1) 27940 .m(4) 27941 .n(8) 27942 .k(k) 27943 .ks(3) 27944 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27945 } 27946 } 27947 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,small_kernel_subtile)27948 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) { 27949 TEST_REQUIRES_ARM_NEON; 27950 for (size_t k = 1; k <= 40; k += 9) { 27951 for (uint32_t n = 1; n <= 8; n++) { 27952 for (uint32_t m = 1; m <= 4; m++) { 27953 GemmMicrokernelTester() 27954 .mr(4) 27955 .nr(8) 27956 .kr(1) 27957 .sr(1) 27958 .m(m) 27959 .n(n) 27960 .k(k) 27961 .ks(3) 27962 .iterations(1) 27963 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27964 } 27965 } 27966 } 27967 } 27968 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_gt_8_small_kernel)27969 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) { 27970 TEST_REQUIRES_ARM_NEON; 27971 for (uint32_t n = 9; n < 16; n++) { 27972 for (size_t k = 1; k <= 40; k += 9) { 27973 GemmMicrokernelTester() 27974 .mr(4) 27975 .nr(8) 27976 .kr(1) 27977 .sr(1) 27978 .m(4) 27979 .n(n) 27980 .k(k) 27981 .ks(3) 27982 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 27983 } 27984 } 27985 } 27986 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,n_div_8_small_kernel)27987 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) { 27988 TEST_REQUIRES_ARM_NEON; 27989 for (uint32_t n = 16; n <= 24; n += 8) { 27990 for (size_t k = 1; k <= 40; k += 9) { 27991 GemmMicrokernelTester() 27992 .mr(4) 27993 .nr(8) 27994 .kr(1) 27995 .sr(1) 27996 .m(4) 27997 .n(n) 27998 .k(k) 27999 .ks(3) 28000 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28001 } 28002 } 28003 } 28004 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm_subtile)28005 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) { 28006 TEST_REQUIRES_ARM_NEON; 28007 for (size_t k = 1; k <= 40; k += 9) { 28008 for (uint32_t n = 1; n <= 8; n++) { 28009 for (uint32_t m = 1; m <= 4; m++) { 28010 GemmMicrokernelTester() 28011 .mr(4) 28012 .nr(8) 28013 .kr(1) 28014 .sr(1) 28015 .m(m) 28016 .n(n) 28017 .k(k) 28018 .cm_stride(11) 28019 .iterations(1) 28020 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28021 } 28022 } 28023 } 28024 } 28025 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,a_offset)28026 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) { 28027 TEST_REQUIRES_ARM_NEON; 28028 for (size_t k = 1; k <= 40; k += 9) { 28029 GemmMicrokernelTester() 28030 .mr(4) 28031 .nr(8) 28032 .kr(1) 28033 .sr(1) 28034 .m(4) 28035 .n(8) 28036 .k(k) 28037 .ks(3) 28038 .a_offset(163) 28039 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28040 } 28041 } 28042 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,zero)28043 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) { 28044 TEST_REQUIRES_ARM_NEON; 28045 for (size_t k = 1; k <= 40; k += 9) { 28046 for (uint32_t mz = 0; mz < 4; mz++) { 28047 GemmMicrokernelTester() 28048 .mr(4) 28049 .nr(8) 28050 .kr(1) 28051 .sr(1) 28052 .m(4) 28053 .n(8) 28054 .k(k) 28055 .ks(3) 28056 .a_offset(163) 28057 .zero_index(mz) 28058 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28059 } 28060 } 28061 } 28062 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmin)28063 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) { 28064 TEST_REQUIRES_ARM_NEON; 28065 GemmMicrokernelTester() 28066 .mr(4) 28067 .nr(8) 28068 .kr(1) 28069 .sr(1) 28070 .m(4) 28071 .n(8) 28072 .k(8) 28073 .qmin(128) 28074 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28075 } 28076 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,qmax)28077 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) { 28078 TEST_REQUIRES_ARM_NEON; 28079 GemmMicrokernelTester() 28080 .mr(4) 28081 .nr(8) 28082 .kr(1) 28083 .sr(1) 28084 .m(4) 28085 .n(8) 28086 .k(8) 28087 .qmax(128) 28088 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28089 } 28090 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64,strided_cm)28091 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) { 28092 TEST_REQUIRES_ARM_NEON; 28093 GemmMicrokernelTester() 28094 .mr(4) 28095 .nr(8) 28096 .kr(1) 28097 .sr(1) 28098 .m(4) 28099 .n(8) 28100 .k(8) 28101 .cm_stride(11) 28102 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28103 } 28104 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT 28105 28106 28107 #if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8)28108 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) { 28109 TEST_REQUIRES_ARM_NEON; 28110 GemmMicrokernelTester() 28111 .mr(4) 28112 .nr(8) 28113 .kr(1) 28114 .sr(1) 28115 .m(4) 28116 .n(8) 28117 .k(8) 28118 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28119 } 28120 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cn)28121 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) { 28122 TEST_REQUIRES_ARM_NEON; 28123 GemmMicrokernelTester() 28124 .mr(4) 28125 .nr(8) 28126 .kr(1) 28127 .sr(1) 28128 .m(4) 28129 .n(8) 28130 .k(8) 28131 .cn_stride(11) 28132 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28133 } 28134 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile)28135 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) { 28136 TEST_REQUIRES_ARM_NEON; 28137 for (uint32_t n = 1; n <= 8; n++) { 28138 for (uint32_t m = 1; m <= 4; m++) { 28139 GemmMicrokernelTester() 28140 .mr(4) 28141 .nr(8) 28142 .kr(1) 28143 .sr(1) 28144 .m(m) 28145 .n(n) 28146 .k(8) 28147 .iterations(1) 28148 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28149 } 28150 } 28151 } 28152 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_m)28153 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) { 28154 TEST_REQUIRES_ARM_NEON; 28155 for (uint32_t m = 1; m <= 4; m++) { 28156 GemmMicrokernelTester() 28157 .mr(4) 28158 .nr(8) 28159 .kr(1) 28160 .sr(1) 28161 .m(m) 28162 .n(8) 28163 .k(8) 28164 .iterations(1) 28165 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28166 } 28167 } 28168 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_eq_8_subtile_n)28169 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) { 28170 TEST_REQUIRES_ARM_NEON; 28171 for (uint32_t n = 1; n <= 8; n++) { 28172 GemmMicrokernelTester() 28173 .mr(4) 28174 .nr(8) 28175 .kr(1) 28176 .sr(1) 28177 .m(4) 28178 .n(n) 28179 .k(8) 28180 .iterations(1) 28181 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28182 } 28183 } 28184 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8)28185 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) { 28186 TEST_REQUIRES_ARM_NEON; 28187 for (size_t k = 1; k < 8; k++) { 28188 GemmMicrokernelTester() 28189 .mr(4) 28190 .nr(8) 28191 .kr(1) 28192 .sr(1) 28193 .m(4) 28194 .n(8) 28195 .k(k) 28196 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28197 } 28198 } 28199 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_lt_8_subtile)28200 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) { 28201 TEST_REQUIRES_ARM_NEON; 28202 for (size_t k = 1; k < 8; k++) { 28203 for (uint32_t n = 1; n <= 8; n++) { 28204 for (uint32_t m = 1; m <= 4; m++) { 28205 GemmMicrokernelTester() 28206 .mr(4) 28207 .nr(8) 28208 .kr(1) 28209 .sr(1) 28210 .m(m) 28211 .n(n) 28212 .k(k) 28213 .iterations(1) 28214 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28215 } 28216 } 28217 } 28218 } 28219 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8)28220 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) { 28221 TEST_REQUIRES_ARM_NEON; 28222 for (size_t k = 9; k < 16; k++) { 28223 GemmMicrokernelTester() 28224 .mr(4) 28225 .nr(8) 28226 .kr(1) 28227 .sr(1) 28228 .m(4) 28229 .n(8) 28230 .k(k) 28231 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28232 } 28233 } 28234 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_gt_8_subtile)28235 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) { 28236 TEST_REQUIRES_ARM_NEON; 28237 for (size_t k = 9; k < 16; k++) { 28238 for (uint32_t n = 1; n <= 8; n++) { 28239 for (uint32_t m = 1; m <= 4; m++) { 28240 GemmMicrokernelTester() 28241 .mr(4) 28242 .nr(8) 28243 .kr(1) 28244 .sr(1) 28245 .m(m) 28246 .n(n) 28247 .k(k) 28248 .iterations(1) 28249 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28250 } 28251 } 28252 } 28253 } 28254 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8)28255 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) { 28256 TEST_REQUIRES_ARM_NEON; 28257 for (size_t k = 16; k <= 80; k += 8) { 28258 GemmMicrokernelTester() 28259 .mr(4) 28260 .nr(8) 28261 .kr(1) 28262 .sr(1) 28263 .m(4) 28264 .n(8) 28265 .k(k) 28266 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28267 } 28268 } 28269 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,k_div_8_subtile)28270 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) { 28271 TEST_REQUIRES_ARM_NEON; 28272 for (size_t k = 16; k <= 80; k += 8) { 28273 for (uint32_t n = 1; n <= 8; n++) { 28274 for (uint32_t m = 1; m <= 4; m++) { 28275 GemmMicrokernelTester() 28276 .mr(4) 28277 .nr(8) 28278 .kr(1) 28279 .sr(1) 28280 .m(m) 28281 .n(n) 28282 .k(k) 28283 .iterations(1) 28284 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28285 } 28286 } 28287 } 28288 } 28289 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8)28290 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) { 28291 TEST_REQUIRES_ARM_NEON; 28292 for (uint32_t n = 9; n < 16; n++) { 28293 for (size_t k = 1; k <= 40; k += 9) { 28294 GemmMicrokernelTester() 28295 .mr(4) 28296 .nr(8) 28297 .kr(1) 28298 .sr(1) 28299 .m(4) 28300 .n(n) 28301 .k(k) 28302 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28303 } 28304 } 28305 } 28306 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_strided_cn)28307 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) { 28308 TEST_REQUIRES_ARM_NEON; 28309 for (uint32_t n = 9; n < 16; n++) { 28310 for (size_t k = 1; k <= 40; k += 9) { 28311 GemmMicrokernelTester() 28312 .mr(4) 28313 .nr(8) 28314 .kr(1) 28315 .sr(1) 28316 .m(4) 28317 .n(n) 28318 .k(k) 28319 .cn_stride(11) 28320 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28321 } 28322 } 28323 } 28324 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_subtile)28325 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) { 28326 TEST_REQUIRES_ARM_NEON; 28327 for (uint32_t n = 9; n < 16; n++) { 28328 for (size_t k = 1; k <= 40; k += 9) { 28329 for (uint32_t m = 1; m <= 4; m++) { 28330 GemmMicrokernelTester() 28331 .mr(4) 28332 .nr(8) 28333 .kr(1) 28334 .sr(1) 28335 .m(m) 28336 .n(n) 28337 .k(k) 28338 .iterations(1) 28339 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28340 } 28341 } 28342 } 28343 } 28344 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8)28345 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) { 28346 TEST_REQUIRES_ARM_NEON; 28347 for (uint32_t n = 16; n <= 24; n += 8) { 28348 for (size_t k = 1; k <= 40; k += 9) { 28349 GemmMicrokernelTester() 28350 .mr(4) 28351 .nr(8) 28352 .kr(1) 28353 .sr(1) 28354 .m(4) 28355 .n(n) 28356 .k(k) 28357 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28358 } 28359 } 28360 } 28361 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_strided_cn)28362 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) { 28363 TEST_REQUIRES_ARM_NEON; 28364 for (uint32_t n = 16; n <= 24; n += 8) { 28365 for (size_t k = 1; k <= 40; k += 9) { 28366 GemmMicrokernelTester() 28367 .mr(4) 28368 .nr(8) 28369 .kr(1) 28370 .sr(1) 28371 .m(4) 28372 .n(n) 28373 .k(k) 28374 .cn_stride(11) 28375 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28376 } 28377 } 28378 } 28379 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_subtile)28380 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) { 28381 TEST_REQUIRES_ARM_NEON; 28382 for (uint32_t n = 16; n <= 24; n += 8) { 28383 for (size_t k = 1; k <= 40; k += 9) { 28384 for (uint32_t m = 1; m <= 4; m++) { 28385 GemmMicrokernelTester() 28386 .mr(4) 28387 .nr(8) 28388 .kr(1) 28389 .sr(1) 28390 .m(m) 28391 .n(n) 28392 .k(k) 28393 .iterations(1) 28394 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28395 } 28396 } 28397 } 28398 } 28399 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,small_kernel)28400 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel) { 28401 TEST_REQUIRES_ARM_NEON; 28402 for (size_t k = 1; k <= 40; k += 9) { 28403 GemmMicrokernelTester() 28404 .mr(4) 28405 .nr(8) 28406 .kr(1) 28407 .sr(1) 28408 .m(4) 28409 .n(8) 28410 .k(k) 28411 .ks(3) 28412 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28413 } 28414 } 28415 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,small_kernel_subtile)28416 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) { 28417 TEST_REQUIRES_ARM_NEON; 28418 for (size_t k = 1; k <= 40; k += 9) { 28419 for (uint32_t n = 1; n <= 8; n++) { 28420 for (uint32_t m = 1; m <= 4; m++) { 28421 GemmMicrokernelTester() 28422 .mr(4) 28423 .nr(8) 28424 .kr(1) 28425 .sr(1) 28426 .m(m) 28427 .n(n) 28428 .k(k) 28429 .ks(3) 28430 .iterations(1) 28431 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28432 } 28433 } 28434 } 28435 } 28436 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_gt_8_small_kernel)28437 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) { 28438 TEST_REQUIRES_ARM_NEON; 28439 for (uint32_t n = 9; n < 16; n++) { 28440 for (size_t k = 1; k <= 40; k += 9) { 28441 GemmMicrokernelTester() 28442 .mr(4) 28443 .nr(8) 28444 .kr(1) 28445 .sr(1) 28446 .m(4) 28447 .n(n) 28448 .k(k) 28449 .ks(3) 28450 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28451 } 28452 } 28453 } 28454 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,n_div_8_small_kernel)28455 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) { 28456 TEST_REQUIRES_ARM_NEON; 28457 for (uint32_t n = 16; n <= 24; n += 8) { 28458 for (size_t k = 1; k <= 40; k += 9) { 28459 GemmMicrokernelTester() 28460 .mr(4) 28461 .nr(8) 28462 .kr(1) 28463 .sr(1) 28464 .m(4) 28465 .n(n) 28466 .k(k) 28467 .ks(3) 28468 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28469 } 28470 } 28471 } 28472 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm_subtile)28473 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) { 28474 TEST_REQUIRES_ARM_NEON; 28475 for (size_t k = 1; k <= 40; k += 9) { 28476 for (uint32_t n = 1; n <= 8; n++) { 28477 for (uint32_t m = 1; m <= 4; m++) { 28478 GemmMicrokernelTester() 28479 .mr(4) 28480 .nr(8) 28481 .kr(1) 28482 .sr(1) 28483 .m(m) 28484 .n(n) 28485 .k(k) 28486 .cm_stride(11) 28487 .iterations(1) 28488 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28489 } 28490 } 28491 } 28492 } 28493 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,a_offset)28494 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, a_offset) { 28495 TEST_REQUIRES_ARM_NEON; 28496 for (size_t k = 1; k <= 40; k += 9) { 28497 GemmMicrokernelTester() 28498 .mr(4) 28499 .nr(8) 28500 .kr(1) 28501 .sr(1) 28502 .m(4) 28503 .n(8) 28504 .k(k) 28505 .ks(3) 28506 .a_offset(163) 28507 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28508 } 28509 } 28510 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,zero)28511 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, zero) { 28512 TEST_REQUIRES_ARM_NEON; 28513 for (size_t k = 1; k <= 40; k += 9) { 28514 for (uint32_t mz = 0; mz < 4; mz++) { 28515 GemmMicrokernelTester() 28516 .mr(4) 28517 .nr(8) 28518 .kr(1) 28519 .sr(1) 28520 .m(4) 28521 .n(8) 28522 .k(k) 28523 .ks(3) 28524 .a_offset(163) 28525 .zero_index(mz) 28526 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28527 } 28528 } 28529 } 28530 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmin)28531 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) { 28532 TEST_REQUIRES_ARM_NEON; 28533 GemmMicrokernelTester() 28534 .mr(4) 28535 .nr(8) 28536 .kr(1) 28537 .sr(1) 28538 .m(4) 28539 .n(8) 28540 .k(8) 28541 .qmin(128) 28542 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28543 } 28544 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,qmax)28545 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) { 28546 TEST_REQUIRES_ARM_NEON; 28547 GemmMicrokernelTester() 28548 .mr(4) 28549 .nr(8) 28550 .kr(1) 28551 .sr(1) 28552 .m(4) 28553 .n(8) 28554 .k(8) 28555 .qmax(128) 28556 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28557 } 28558 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64,strided_cm)28559 TEST(GENERATE_QS8_IGEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) { 28560 TEST_REQUIRES_ARM_NEON; 28561 GemmMicrokernelTester() 28562 .mr(4) 28563 .nr(8) 28564 .kr(1) 28565 .sr(1) 28566 .m(4) 28567 .n(8) 28568 .k(8) 28569 .cm_stride(11) 28570 .Test(xnn_generate_qs8_igemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 28571 } 28572 #endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT 28573