xref: /aosp_15_r20/external/XNNPACK/test/f16-vmulcaddc-minmax.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f16-vmulcaddc-minmax.yaml
8 //   Generator: tools/generate-vmulcaddc-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/vmulcaddc.h>
17 #include "vmulcaddc-microkernel-tester.h"
18 
19 
20 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,channels_eq_8)21   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_eq_8) {
22     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
23     VMulCAddCMicrokernelTester()
24       .channel_tile(8)
25       .channels(8)
26       .rows(2)
27       .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
28   }
29 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,channels_div_8)30   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_div_8) {
31     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
32     for (size_t channels = 16; channels < 80; channels += 8) {
33       VMulCAddCMicrokernelTester()
34         .channel_tile(8)
35         .channels(channels)
36         .rows(2)
37         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
38     }
39   }
40 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,channels_lt_8)41   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_lt_8) {
42     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
43     for (size_t channels = 1; channels < 8; channels++) {
44       VMulCAddCMicrokernelTester()
45         .channel_tile(8)
46         .channels(channels)
47         .rows(2)
48         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
49     }
50   }
51 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,channels_gt_8)52   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_gt_8) {
53     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
54     for (size_t channels = 9; channels < 16; channels++) {
55       VMulCAddCMicrokernelTester()
56         .channel_tile(8)
57         .channels(channels)
58         .rows(2)
59         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
60     }
61   }
62 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,rows_lt_2)63   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_lt_2) {
64     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
65     for (size_t rows = 1; rows < 2; rows++) {
66       for (size_t channels = 1; channels <= 40; channels += 7) {
67         VMulCAddCMicrokernelTester()
68           .channel_tile(8)
69           .channels(channels)
70           .rows(rows)
71           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
72       }
73     }
74   }
75 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,rows_div_2)76   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_div_2) {
77     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
78     for (size_t rows = 4; rows <= 8; rows += 2) {
79       for (size_t channels = 1; channels <= 40; channels += 7) {
80         VMulCAddCMicrokernelTester()
81           .channel_tile(8)
82           .channels(channels)
83           .rows(rows)
84           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
85       }
86     }
87   }
88 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,rows_gt_2)89   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_gt_2) {
90     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
91     for (size_t rows = 3; rows < 4; rows++) {
92       for (size_t channels = 1; channels <= 40; channels += 7) {
93         VMulCAddCMicrokernelTester()
94           .channel_tile(8)
95           .channels(channels)
96           .rows(rows)
97           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
98       }
99     }
100   }
101 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,input_stride)102   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, input_stride) {
103     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
104     for (size_t rows = 1; rows <= 6; rows += 1) {
105       for (size_t channels = 1; channels <= 40; channels += 7) {
106         VMulCAddCMicrokernelTester()
107           .channel_tile(8)
108           .channels(channels)
109           .rows(rows)
110           .input_stride(43)
111           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
112       }
113     }
114   }
115 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,output_stride)116   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, output_stride) {
117     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
118     for (size_t rows = 1; rows <= 6; rows += 1) {
119       for (size_t channels = 1; channels <= 40; channels += 7) {
120         VMulCAddCMicrokernelTester()
121           .channel_tile(8)
122           .channels(channels)
123           .rows(rows)
124           .output_stride(43)
125           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
126       }
127     }
128   }
129 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,inplace)130   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, inplace) {
131     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
132     for (size_t rows = 1; rows <= 6; rows += 1) {
133       for (size_t channels = 1; channels <= 40; channels += 7) {
134         VMulCAddCMicrokernelTester()
135           .channel_tile(8)
136           .channels(channels)
137           .rows(rows)
138           .inplace(true)
139           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
140       }
141     }
142   }
143 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,qmin)144   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, qmin) {
145     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
146     for (size_t rows = 1; rows <= 6; rows += 1) {
147       for (size_t channels = 1; channels <= 40; channels += 7) {
148         VMulCAddCMicrokernelTester()
149           .channel_tile(8)
150           .channels(channels)
151           .rows(rows)
152           .qmin(128)
153           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
154       }
155     }
156   }
157 
TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X,qmax)158   TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, qmax) {
159     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
160     for (size_t rows = 1; rows <= 6; rows += 1) {
161       for (size_t channels = 1; channels <= 40; channels += 7) {
162         VMulCAddCMicrokernelTester()
163           .channel_tile(8)
164           .channels(channels)
165           .rows(rows)
166           .qmax(128)
167           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
168       }
169     }
170   }
171 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
172 
173 
174 #if XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,channels_eq_16)175   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_eq_16) {
176     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
177     VMulCAddCMicrokernelTester()
178       .channel_tile(16)
179       .channels(16)
180       .rows(2)
181       .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
182   }
183 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,channels_div_16)184   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_div_16) {
185     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
186     for (size_t channels = 32; channels < 160; channels += 16) {
187       VMulCAddCMicrokernelTester()
188         .channel_tile(16)
189         .channels(channels)
190         .rows(2)
191         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
192     }
193   }
194 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,channels_lt_16)195   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_lt_16) {
196     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
197     for (size_t channels = 1; channels < 16; channels++) {
198       VMulCAddCMicrokernelTester()
199         .channel_tile(16)
200         .channels(channels)
201         .rows(2)
202         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
203     }
204   }
205 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,channels_gt_16)206   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_gt_16) {
207     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
208     for (size_t channels = 17; channels < 32; channels++) {
209       VMulCAddCMicrokernelTester()
210         .channel_tile(16)
211         .channels(channels)
212         .rows(2)
213         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
214     }
215   }
216 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,rows_lt_2)217   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_lt_2) {
218     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
219     for (size_t rows = 1; rows < 2; rows++) {
220       for (size_t channels = 1; channels <= 80; channels += 15) {
221         VMulCAddCMicrokernelTester()
222           .channel_tile(16)
223           .channels(channels)
224           .rows(rows)
225           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
226       }
227     }
228   }
229 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,rows_div_2)230   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_div_2) {
231     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
232     for (size_t rows = 4; rows <= 8; rows += 2) {
233       for (size_t channels = 1; channels <= 80; channels += 15) {
234         VMulCAddCMicrokernelTester()
235           .channel_tile(16)
236           .channels(channels)
237           .rows(rows)
238           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
239       }
240     }
241   }
242 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,rows_gt_2)243   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_gt_2) {
244     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
245     for (size_t rows = 3; rows < 4; rows++) {
246       for (size_t channels = 1; channels <= 80; channels += 15) {
247         VMulCAddCMicrokernelTester()
248           .channel_tile(16)
249           .channels(channels)
250           .rows(rows)
251           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
252       }
253     }
254   }
255 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,input_stride)256   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, input_stride) {
257     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
258     for (size_t rows = 1; rows <= 6; rows += 1) {
259       for (size_t channels = 1; channels <= 80; channels += 15) {
260         VMulCAddCMicrokernelTester()
261           .channel_tile(16)
262           .channels(channels)
263           .rows(rows)
264           .input_stride(83)
265           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
266       }
267     }
268   }
269 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,output_stride)270   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, output_stride) {
271     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
272     for (size_t rows = 1; rows <= 6; rows += 1) {
273       for (size_t channels = 1; channels <= 80; channels += 15) {
274         VMulCAddCMicrokernelTester()
275           .channel_tile(16)
276           .channels(channels)
277           .rows(rows)
278           .output_stride(83)
279           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
280       }
281     }
282   }
283 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,inplace)284   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, inplace) {
285     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
286     for (size_t rows = 1; rows <= 6; rows += 1) {
287       for (size_t channels = 1; channels <= 80; channels += 15) {
288         VMulCAddCMicrokernelTester()
289           .channel_tile(16)
290           .channels(channels)
291           .rows(rows)
292           .inplace(true)
293           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
294       }
295     }
296   }
297 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,qmin)298   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, qmin) {
299     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
300     for (size_t rows = 1; rows <= 6; rows += 1) {
301       for (size_t channels = 1; channels <= 80; channels += 15) {
302         VMulCAddCMicrokernelTester()
303           .channel_tile(16)
304           .channels(channels)
305           .rows(rows)
306           .qmin(128)
307           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
308       }
309     }
310   }
311 
TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X,qmax)312   TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, qmax) {
313     TEST_REQUIRES_ARM_NEON_FP16_ARITH;
314     for (size_t rows = 1; rows <= 6; rows += 1) {
315       for (size_t channels = 1; channels <= 80; channels += 15) {
316         VMulCAddCMicrokernelTester()
317           .channel_tile(16)
318           .channels(channels)
319           .rows(rows)
320           .qmax(128)
321           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_neon_params);
322       }
323     }
324   }
325 #endif  // XNN_ENABLE_ARM_FP16 && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
326 
327 
328 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,channels_eq_8)329   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_eq_8) {
330     TEST_REQUIRES_X86_FMA3;
331     VMulCAddCMicrokernelTester()
332       .channel_tile(8)
333       .channels(8)
334       .rows(2)
335       .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
336   }
337 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,channels_div_8)338   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_div_8) {
339     TEST_REQUIRES_X86_FMA3;
340     for (size_t channels = 16; channels < 80; channels += 8) {
341       VMulCAddCMicrokernelTester()
342         .channel_tile(8)
343         .channels(channels)
344         .rows(2)
345         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
346     }
347   }
348 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,channels_lt_8)349   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_lt_8) {
350     TEST_REQUIRES_X86_FMA3;
351     for (size_t channels = 1; channels < 8; channels++) {
352       VMulCAddCMicrokernelTester()
353         .channel_tile(8)
354         .channels(channels)
355         .rows(2)
356         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
357     }
358   }
359 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,channels_gt_8)360   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_gt_8) {
361     TEST_REQUIRES_X86_FMA3;
362     for (size_t channels = 9; channels < 16; channels++) {
363       VMulCAddCMicrokernelTester()
364         .channel_tile(8)
365         .channels(channels)
366         .rows(2)
367         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
368     }
369   }
370 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,rows_lt_2)371   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_lt_2) {
372     TEST_REQUIRES_X86_FMA3;
373     for (size_t rows = 1; rows < 2; rows++) {
374       for (size_t channels = 1; channels <= 40; channels += 7) {
375         VMulCAddCMicrokernelTester()
376           .channel_tile(8)
377           .channels(channels)
378           .rows(rows)
379           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
380       }
381     }
382   }
383 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,rows_div_2)384   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_div_2) {
385     TEST_REQUIRES_X86_FMA3;
386     for (size_t rows = 4; rows <= 8; rows += 2) {
387       for (size_t channels = 1; channels <= 40; channels += 7) {
388         VMulCAddCMicrokernelTester()
389           .channel_tile(8)
390           .channels(channels)
391           .rows(rows)
392           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
393       }
394     }
395   }
396 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,rows_gt_2)397   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_gt_2) {
398     TEST_REQUIRES_X86_FMA3;
399     for (size_t rows = 3; rows < 4; rows++) {
400       for (size_t channels = 1; channels <= 40; channels += 7) {
401         VMulCAddCMicrokernelTester()
402           .channel_tile(8)
403           .channels(channels)
404           .rows(rows)
405           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
406       }
407     }
408   }
409 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,input_stride)410   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, input_stride) {
411     TEST_REQUIRES_X86_FMA3;
412     for (size_t rows = 1; rows <= 6; rows += 1) {
413       for (size_t channels = 1; channels <= 40; channels += 7) {
414         VMulCAddCMicrokernelTester()
415           .channel_tile(8)
416           .channels(channels)
417           .rows(rows)
418           .input_stride(43)
419           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
420       }
421     }
422   }
423 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,output_stride)424   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, output_stride) {
425     TEST_REQUIRES_X86_FMA3;
426     for (size_t rows = 1; rows <= 6; rows += 1) {
427       for (size_t channels = 1; channels <= 40; channels += 7) {
428         VMulCAddCMicrokernelTester()
429           .channel_tile(8)
430           .channels(channels)
431           .rows(rows)
432           .output_stride(43)
433           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
434       }
435     }
436   }
437 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,inplace)438   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, inplace) {
439     TEST_REQUIRES_X86_FMA3;
440     for (size_t rows = 1; rows <= 6; rows += 1) {
441       for (size_t channels = 1; channels <= 40; channels += 7) {
442         VMulCAddCMicrokernelTester()
443           .channel_tile(8)
444           .channels(channels)
445           .rows(rows)
446           .inplace(true)
447           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
448       }
449     }
450   }
451 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,qmin)452   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, qmin) {
453     TEST_REQUIRES_X86_FMA3;
454     for (size_t rows = 1; rows <= 6; rows += 1) {
455       for (size_t channels = 1; channels <= 40; channels += 7) {
456         VMulCAddCMicrokernelTester()
457           .channel_tile(8)
458           .channels(channels)
459           .rows(rows)
460           .qmin(128)
461           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
462       }
463     }
464   }
465 
TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X,qmax)466   TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, qmax) {
467     TEST_REQUIRES_X86_FMA3;
468     for (size_t rows = 1; rows <= 6; rows += 1) {
469       for (size_t channels = 1; channels <= 40; channels += 7) {
470         VMulCAddCMicrokernelTester()
471           .channel_tile(8)
472           .channels(channels)
473           .rows(rows)
474           .qmax(128)
475           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_avx_params);
476       }
477     }
478   }
479 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
480 
481 
482 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,channels_eq_16)483   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_eq_16) {
484     TEST_REQUIRES_X86_FMA3;
485     VMulCAddCMicrokernelTester()
486       .channel_tile(16)
487       .channels(16)
488       .rows(2)
489       .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
490   }
491 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,channels_div_16)492   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_div_16) {
493     TEST_REQUIRES_X86_FMA3;
494     for (size_t channels = 32; channels < 160; channels += 16) {
495       VMulCAddCMicrokernelTester()
496         .channel_tile(16)
497         .channels(channels)
498         .rows(2)
499         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
500     }
501   }
502 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,channels_lt_16)503   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_lt_16) {
504     TEST_REQUIRES_X86_FMA3;
505     for (size_t channels = 1; channels < 16; channels++) {
506       VMulCAddCMicrokernelTester()
507         .channel_tile(16)
508         .channels(channels)
509         .rows(2)
510         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
511     }
512   }
513 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,channels_gt_16)514   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_gt_16) {
515     TEST_REQUIRES_X86_FMA3;
516     for (size_t channels = 17; channels < 32; channels++) {
517       VMulCAddCMicrokernelTester()
518         .channel_tile(16)
519         .channels(channels)
520         .rows(2)
521         .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
522     }
523   }
524 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,rows_lt_2)525   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_lt_2) {
526     TEST_REQUIRES_X86_FMA3;
527     for (size_t rows = 1; rows < 2; rows++) {
528       for (size_t channels = 1; channels <= 80; channels += 15) {
529         VMulCAddCMicrokernelTester()
530           .channel_tile(16)
531           .channels(channels)
532           .rows(rows)
533           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
534       }
535     }
536   }
537 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,rows_div_2)538   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_div_2) {
539     TEST_REQUIRES_X86_FMA3;
540     for (size_t rows = 4; rows <= 8; rows += 2) {
541       for (size_t channels = 1; channels <= 80; channels += 15) {
542         VMulCAddCMicrokernelTester()
543           .channel_tile(16)
544           .channels(channels)
545           .rows(rows)
546           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
547       }
548     }
549   }
550 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,rows_gt_2)551   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_gt_2) {
552     TEST_REQUIRES_X86_FMA3;
553     for (size_t rows = 3; rows < 4; rows++) {
554       for (size_t channels = 1; channels <= 80; channels += 15) {
555         VMulCAddCMicrokernelTester()
556           .channel_tile(16)
557           .channels(channels)
558           .rows(rows)
559           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
560       }
561     }
562   }
563 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,input_stride)564   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, input_stride) {
565     TEST_REQUIRES_X86_FMA3;
566     for (size_t rows = 1; rows <= 6; rows += 1) {
567       for (size_t channels = 1; channels <= 80; channels += 15) {
568         VMulCAddCMicrokernelTester()
569           .channel_tile(16)
570           .channels(channels)
571           .rows(rows)
572           .input_stride(83)
573           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
574       }
575     }
576   }
577 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,output_stride)578   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, output_stride) {
579     TEST_REQUIRES_X86_FMA3;
580     for (size_t rows = 1; rows <= 6; rows += 1) {
581       for (size_t channels = 1; channels <= 80; channels += 15) {
582         VMulCAddCMicrokernelTester()
583           .channel_tile(16)
584           .channels(channels)
585           .rows(rows)
586           .output_stride(83)
587           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
588       }
589     }
590   }
591 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,inplace)592   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, inplace) {
593     TEST_REQUIRES_X86_FMA3;
594     for (size_t rows = 1; rows <= 6; rows += 1) {
595       for (size_t channels = 1; channels <= 80; channels += 15) {
596         VMulCAddCMicrokernelTester()
597           .channel_tile(16)
598           .channels(channels)
599           .rows(rows)
600           .inplace(true)
601           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
602       }
603     }
604   }
605 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,qmin)606   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, qmin) {
607     TEST_REQUIRES_X86_FMA3;
608     for (size_t rows = 1; rows <= 6; rows += 1) {
609       for (size_t channels = 1; channels <= 80; channels += 15) {
610         VMulCAddCMicrokernelTester()
611           .channel_tile(16)
612           .channels(channels)
613           .rows(rows)
614           .qmin(128)
615           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
616       }
617     }
618   }
619 
TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X,qmax)620   TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, qmax) {
621     TEST_REQUIRES_X86_FMA3;
622     for (size_t rows = 1; rows <= 6; rows += 1) {
623       for (size_t channels = 1; channels <= 80; channels += 15) {
624         VMulCAddCMicrokernelTester()
625           .channel_tile(16)
626           .channels(channels)
627           .rows(rows)
628           .qmax(128)
629           .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_avx_params);
630       }
631     }
632   }
633 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
634