1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 // 6 // Auto-generated file. Do not edit! 7 // Specification: test/f16-vmulcaddc-minmax.yaml 8 // Generator: tools/generate-vmulcaddc-test.py 9 10 11 #include <gtest/gtest.h> 12 13 #include <xnnpack/common.h> 14 #include <xnnpack/isa-checks.h> 15 16 #include <xnnpack/vmulcaddc.h> 17 #include "vmulcaddc-microkernel-tester.h" 18 19 20 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,channels_eq_8)21 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_eq_8) { 22 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 23 VMulCAddCMicrokernelTester() 24 .channel_tile(8) 25 .channels(8) 26 .rows(2) 27 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 28 } 29 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,channels_div_8)30 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_div_8) { 31 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 32 for (size_t channels = 16; channels < 80; channels += 8) { 33 VMulCAddCMicrokernelTester() 34 .channel_tile(8) 35 .channels(channels) 36 .rows(2) 37 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 38 } 39 } 40 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,channels_lt_8)41 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_lt_8) { 42 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 43 for (size_t channels = 1; channels < 8; channels++) { 44 VMulCAddCMicrokernelTester() 45 .channel_tile(8) 46 .channels(channels) 47 .rows(2) 48 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 49 } 50 } 51 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,channels_gt_8)52 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_gt_8) { 53 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 54 for (size_t channels = 9; channels < 16; channels++) { 55 VMulCAddCMicrokernelTester() 56 .channel_tile(8) 57 .channels(channels) 58 .rows(2) 59 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 60 } 61 } 62 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,rows_lt_2)63 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_lt_2) { 64 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 65 for (size_t rows = 1; rows < 2; rows++) { 66 for (size_t channels = 1; channels <= 40; channels += 7) { 67 VMulCAddCMicrokernelTester() 68 .channel_tile(8) 69 .channels(channels) 70 .rows(rows) 71 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 72 } 73 } 74 } 75 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,rows_div_2)76 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_div_2) { 77 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 78 for (size_t rows = 4; rows <= 8; rows += 2) { 79 for (size_t channels = 1; channels <= 40; channels += 7) { 80 VMulCAddCMicrokernelTester() 81 .channel_tile(8) 82 .channels(channels) 83 .rows(rows) 84 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 85 } 86 } 87 } 88 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,rows_gt_2)89 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_gt_2) { 90 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 91 for (size_t rows = 3; rows < 4; rows++) { 92 for (size_t channels = 1; channels <= 40; channels += 7) { 93 VMulCAddCMicrokernelTester() 94 .channel_tile(8) 95 .channels(channels) 96 .rows(rows) 97 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 98 } 99 } 100 } 101 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,input_stride)102 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, input_stride) { 103 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 104 for (size_t rows = 1; rows <= 6; rows += 1) { 105 for (size_t channels = 1; channels <= 40; channels += 7) { 106 VMulCAddCMicrokernelTester() 107 .channel_tile(8) 108 .channels(channels) 109 .rows(rows) 110 .input_stride(43) 111 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 112 } 113 } 114 } 115 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,output_stride)116 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, output_stride) { 117 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 118 for (size_t rows = 1; rows <= 6; rows += 1) { 119 for (size_t channels = 1; channels <= 40; channels += 7) { 120 VMulCAddCMicrokernelTester() 121 .channel_tile(8) 122 .channels(channels) 123 .rows(rows) 124 .output_stride(43) 125 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 126 } 127 } 128 } 129 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,inplace)130 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, inplace) { 131 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 132 for (size_t rows = 1; rows <= 6; rows += 1) { 133 for (size_t channels = 1; channels <= 40; channels += 7) { 134 VMulCAddCMicrokernelTester() 135 .channel_tile(8) 136 .channels(channels) 137 .rows(rows) 138 .inplace(true) 139 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 140 } 141 } 142 } 143 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,qmin)144 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, qmin) { 145 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 146 for (size_t rows = 1; rows <= 6; rows += 1) { 147 for (size_t channels = 1; channels <= 40; channels += 7) { 148 VMulCAddCMicrokernelTester() 149 .channel_tile(8) 150 .channels(channels) 151 .rows(rows) 152 .qmin(128) 153 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 154 } 155 } 156 } 157 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,qmax)158 TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, qmax) { 159 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 160 for (size_t rows = 1; rows <= 6; rows += 1) { 161 for (size_t channels = 1; channels <= 40; channels += 7) { 162 VMulCAddCMicrokernelTester() 163 .channel_tile(8) 164 .channels(channels) 165 .rows(rows) 166 .qmax(128) 167 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 168 } 169 } 170 } 171 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 172 173 174 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,channels_eq_16)175 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_eq_16) { 176 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 177 VMulCAddCMicrokernelTester() 178 .channel_tile(16) 179 .channels(16) 180 .rows(2) 181 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 182 } 183 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,channels_div_16)184 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_div_16) { 185 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 186 for (size_t channels = 32; channels < 160; channels += 16) { 187 VMulCAddCMicrokernelTester() 188 .channel_tile(16) 189 .channels(channels) 190 .rows(2) 191 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 192 } 193 } 194 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,channels_lt_16)195 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_lt_16) { 196 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 197 for (size_t channels = 1; channels < 16; channels++) { 198 VMulCAddCMicrokernelTester() 199 .channel_tile(16) 200 .channels(channels) 201 .rows(2) 202 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 203 } 204 } 205 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,channels_gt_16)206 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_gt_16) { 207 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 208 for (size_t channels = 17; channels < 32; channels++) { 209 VMulCAddCMicrokernelTester() 210 .channel_tile(16) 211 .channels(channels) 212 .rows(2) 213 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 214 } 215 } 216 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,rows_lt_2)217 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_lt_2) { 218 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 219 for (size_t rows = 1; rows < 2; rows++) { 220 for (size_t channels = 1; channels <= 80; channels += 15) { 221 VMulCAddCMicrokernelTester() 222 .channel_tile(16) 223 .channels(channels) 224 .rows(rows) 225 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 226 } 227 } 228 } 229 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,rows_div_2)230 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_div_2) { 231 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 232 for (size_t rows = 4; rows <= 8; rows += 2) { 233 for (size_t channels = 1; channels <= 80; channels += 15) { 234 VMulCAddCMicrokernelTester() 235 .channel_tile(16) 236 .channels(channels) 237 .rows(rows) 238 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 239 } 240 } 241 } 242 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,rows_gt_2)243 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_gt_2) { 244 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 245 for (size_t rows = 3; rows < 4; rows++) { 246 for (size_t channels = 1; channels <= 80; channels += 15) { 247 VMulCAddCMicrokernelTester() 248 .channel_tile(16) 249 .channels(channels) 250 .rows(rows) 251 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 252 } 253 } 254 } 255 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,input_stride)256 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, input_stride) { 257 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 258 for (size_t rows = 1; rows <= 6; rows += 1) { 259 for (size_t channels = 1; channels <= 80; channels += 15) { 260 VMulCAddCMicrokernelTester() 261 .channel_tile(16) 262 .channels(channels) 263 .rows(rows) 264 .input_stride(83) 265 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 266 } 267 } 268 } 269 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,output_stride)270 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, output_stride) { 271 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 272 for (size_t rows = 1; rows <= 6; rows += 1) { 273 for (size_t channels = 1; channels <= 80; channels += 15) { 274 VMulCAddCMicrokernelTester() 275 .channel_tile(16) 276 .channels(channels) 277 .rows(rows) 278 .output_stride(83) 279 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 280 } 281 } 282 } 283 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,inplace)284 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, inplace) { 285 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 286 for (size_t rows = 1; rows <= 6; rows += 1) { 287 for (size_t channels = 1; channels <= 80; channels += 15) { 288 VMulCAddCMicrokernelTester() 289 .channel_tile(16) 290 .channels(channels) 291 .rows(rows) 292 .inplace(true) 293 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 294 } 295 } 296 } 297 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,qmin)298 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, qmin) { 299 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 300 for (size_t rows = 1; rows <= 6; rows += 1) { 301 for (size_t channels = 1; channels <= 80; channels += 15) { 302 VMulCAddCMicrokernelTester() 303 .channel_tile(16) 304 .channels(channels) 305 .rows(rows) 306 .qmin(128) 307 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 308 } 309 } 310 } 311 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,qmax)312 TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, qmax) { 313 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 314 for (size_t rows = 1; rows <= 6; rows += 1) { 315 for (size_t channels = 1; channels <= 80; channels += 15) { 316 VMulCAddCMicrokernelTester() 317 .channel_tile(16) 318 .channels(channels) 319 .rows(rows) 320 .qmax(128) 321 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params); 322 } 323 } 324 } 325 #endif // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64) 326 327 328 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,channels_eq_8)329 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_eq_8) { 330 TEST_REQUIRES_X86_FMA3; 331 VMulCAddCMicrokernelTester() 332 .channel_tile(8) 333 .channels(8) 334 .rows(2) 335 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 336 } 337 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,channels_div_8)338 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_div_8) { 339 TEST_REQUIRES_X86_FMA3; 340 for (size_t channels = 16; channels < 80; channels += 8) { 341 VMulCAddCMicrokernelTester() 342 .channel_tile(8) 343 .channels(channels) 344 .rows(2) 345 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 346 } 347 } 348 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,channels_lt_8)349 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_lt_8) { 350 TEST_REQUIRES_X86_FMA3; 351 for (size_t channels = 1; channels < 8; channels++) { 352 VMulCAddCMicrokernelTester() 353 .channel_tile(8) 354 .channels(channels) 355 .rows(2) 356 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 357 } 358 } 359 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,channels_gt_8)360 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_gt_8) { 361 TEST_REQUIRES_X86_FMA3; 362 for (size_t channels = 9; channels < 16; channels++) { 363 VMulCAddCMicrokernelTester() 364 .channel_tile(8) 365 .channels(channels) 366 .rows(2) 367 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 368 } 369 } 370 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,rows_lt_2)371 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_lt_2) { 372 TEST_REQUIRES_X86_FMA3; 373 for (size_t rows = 1; rows < 2; rows++) { 374 for (size_t channels = 1; channels <= 40; channels += 7) { 375 VMulCAddCMicrokernelTester() 376 .channel_tile(8) 377 .channels(channels) 378 .rows(rows) 379 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 380 } 381 } 382 } 383 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,rows_div_2)384 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_div_2) { 385 TEST_REQUIRES_X86_FMA3; 386 for (size_t rows = 4; rows <= 8; rows += 2) { 387 for (size_t channels = 1; channels <= 40; channels += 7) { 388 VMulCAddCMicrokernelTester() 389 .channel_tile(8) 390 .channels(channels) 391 .rows(rows) 392 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 393 } 394 } 395 } 396 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,rows_gt_2)397 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_gt_2) { 398 TEST_REQUIRES_X86_FMA3; 399 for (size_t rows = 3; rows < 4; rows++) { 400 for (size_t channels = 1; channels <= 40; channels += 7) { 401 VMulCAddCMicrokernelTester() 402 .channel_tile(8) 403 .channels(channels) 404 .rows(rows) 405 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 406 } 407 } 408 } 409 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,input_stride)410 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, input_stride) { 411 TEST_REQUIRES_X86_FMA3; 412 for (size_t rows = 1; rows <= 6; rows += 1) { 413 for (size_t channels = 1; channels <= 40; channels += 7) { 414 VMulCAddCMicrokernelTester() 415 .channel_tile(8) 416 .channels(channels) 417 .rows(rows) 418 .input_stride(43) 419 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 420 } 421 } 422 } 423 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,output_stride)424 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, output_stride) { 425 TEST_REQUIRES_X86_FMA3; 426 for (size_t rows = 1; rows <= 6; rows += 1) { 427 for (size_t channels = 1; channels <= 40; channels += 7) { 428 VMulCAddCMicrokernelTester() 429 .channel_tile(8) 430 .channels(channels) 431 .rows(rows) 432 .output_stride(43) 433 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 434 } 435 } 436 } 437 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,inplace)438 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, inplace) { 439 TEST_REQUIRES_X86_FMA3; 440 for (size_t rows = 1; rows <= 6; rows += 1) { 441 for (size_t channels = 1; channels <= 40; channels += 7) { 442 VMulCAddCMicrokernelTester() 443 .channel_tile(8) 444 .channels(channels) 445 .rows(rows) 446 .inplace(true) 447 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 448 } 449 } 450 } 451 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,qmin)452 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, qmin) { 453 TEST_REQUIRES_X86_FMA3; 454 for (size_t rows = 1; rows <= 6; rows += 1) { 455 for (size_t channels = 1; channels <= 40; channels += 7) { 456 VMulCAddCMicrokernelTester() 457 .channel_tile(8) 458 .channels(channels) 459 .rows(rows) 460 .qmin(128) 461 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 462 } 463 } 464 } 465 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,qmax)466 TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, qmax) { 467 TEST_REQUIRES_X86_FMA3; 468 for (size_t rows = 1; rows <= 6; rows += 1) { 469 for (size_t channels = 1; channels <= 40; channels += 7) { 470 VMulCAddCMicrokernelTester() 471 .channel_tile(8) 472 .channels(channels) 473 .rows(rows) 474 .qmax(128) 475 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params); 476 } 477 } 478 } 479 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 480 481 482 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,channels_eq_16)483 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_eq_16) { 484 TEST_REQUIRES_X86_FMA3; 485 VMulCAddCMicrokernelTester() 486 .channel_tile(16) 487 .channels(16) 488 .rows(2) 489 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 490 } 491 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,channels_div_16)492 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_div_16) { 493 TEST_REQUIRES_X86_FMA3; 494 for (size_t channels = 32; channels < 160; channels += 16) { 495 VMulCAddCMicrokernelTester() 496 .channel_tile(16) 497 .channels(channels) 498 .rows(2) 499 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 500 } 501 } 502 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,channels_lt_16)503 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_lt_16) { 504 TEST_REQUIRES_X86_FMA3; 505 for (size_t channels = 1; channels < 16; channels++) { 506 VMulCAddCMicrokernelTester() 507 .channel_tile(16) 508 .channels(channels) 509 .rows(2) 510 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 511 } 512 } 513 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,channels_gt_16)514 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_gt_16) { 515 TEST_REQUIRES_X86_FMA3; 516 for (size_t channels = 17; channels < 32; channels++) { 517 VMulCAddCMicrokernelTester() 518 .channel_tile(16) 519 .channels(channels) 520 .rows(2) 521 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 522 } 523 } 524 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,rows_lt_2)525 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_lt_2) { 526 TEST_REQUIRES_X86_FMA3; 527 for (size_t rows = 1; rows < 2; rows++) { 528 for (size_t channels = 1; channels <= 80; channels += 15) { 529 VMulCAddCMicrokernelTester() 530 .channel_tile(16) 531 .channels(channels) 532 .rows(rows) 533 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 534 } 535 } 536 } 537 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,rows_div_2)538 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_div_2) { 539 TEST_REQUIRES_X86_FMA3; 540 for (size_t rows = 4; rows <= 8; rows += 2) { 541 for (size_t channels = 1; channels <= 80; channels += 15) { 542 VMulCAddCMicrokernelTester() 543 .channel_tile(16) 544 .channels(channels) 545 .rows(rows) 546 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 547 } 548 } 549 } 550 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,rows_gt_2)551 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_gt_2) { 552 TEST_REQUIRES_X86_FMA3; 553 for (size_t rows = 3; rows < 4; rows++) { 554 for (size_t channels = 1; channels <= 80; channels += 15) { 555 VMulCAddCMicrokernelTester() 556 .channel_tile(16) 557 .channels(channels) 558 .rows(rows) 559 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 560 } 561 } 562 } 563 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,input_stride)564 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, input_stride) { 565 TEST_REQUIRES_X86_FMA3; 566 for (size_t rows = 1; rows <= 6; rows += 1) { 567 for (size_t channels = 1; channels <= 80; channels += 15) { 568 VMulCAddCMicrokernelTester() 569 .channel_tile(16) 570 .channels(channels) 571 .rows(rows) 572 .input_stride(83) 573 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 574 } 575 } 576 } 577 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,output_stride)578 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, output_stride) { 579 TEST_REQUIRES_X86_FMA3; 580 for (size_t rows = 1; rows <= 6; rows += 1) { 581 for (size_t channels = 1; channels <= 80; channels += 15) { 582 VMulCAddCMicrokernelTester() 583 .channel_tile(16) 584 .channels(channels) 585 .rows(rows) 586 .output_stride(83) 587 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 588 } 589 } 590 } 591 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,inplace)592 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, inplace) { 593 TEST_REQUIRES_X86_FMA3; 594 for (size_t rows = 1; rows <= 6; rows += 1) { 595 for (size_t channels = 1; channels <= 80; channels += 15) { 596 VMulCAddCMicrokernelTester() 597 .channel_tile(16) 598 .channels(channels) 599 .rows(rows) 600 .inplace(true) 601 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 602 } 603 } 604 } 605 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,qmin)606 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, qmin) { 607 TEST_REQUIRES_X86_FMA3; 608 for (size_t rows = 1; rows <= 6; rows += 1) { 609 for (size_t channels = 1; channels <= 80; channels += 15) { 610 VMulCAddCMicrokernelTester() 611 .channel_tile(16) 612 .channels(channels) 613 .rows(rows) 614 .qmin(128) 615 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 616 } 617 } 618 } 619 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,qmax)620 TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, qmax) { 621 TEST_REQUIRES_X86_FMA3; 622 for (size_t rows = 1; rows <= 6; rows += 1) { 623 for (size_t channels = 1; channels <= 80; channels += 15) { 624 VMulCAddCMicrokernelTester() 625 .channel_tile(16) 626 .channels(channels) 627 .rows(rows) 628 .qmax(128) 629 .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params); 630 } 631 } 632 } 633 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 634