xref: /aosp_15_r20/external/XNNPACK/test/xx-fill.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <gtest/gtest.h>
7 
8 #include <xnnpack/common.h>
9 #include <xnnpack/isa-checks.h>
10 
11 #include <xnnpack/fill.h>
12 #include "fill-microkernel-tester.h"
13 
14 
15 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(XX_FILL__NEON_X64,channels_eq_1)16   TEST(XX_FILL__NEON_X64, channels_eq_1) {
17     TEST_REQUIRES_ARM_NEON;
18     FillMicrokernelTester()
19       .channels(1)
20       .Test(xnn_xx_fill_ukernel__neon_x64);
21   }
22 
TEST(XX_FILL__NEON_X64,channels_eq_2)23   TEST(XX_FILL__NEON_X64, channels_eq_2) {
24     TEST_REQUIRES_ARM_NEON;
25     FillMicrokernelTester()
26       .channels(2)
27       .Test(xnn_xx_fill_ukernel__neon_x64);
28   }
29 
TEST(XX_FILL__NEON_X64,channels_eq_4)30   TEST(XX_FILL__NEON_X64, channels_eq_4) {
31     TEST_REQUIRES_ARM_NEON;
32     FillMicrokernelTester()
33       .channels(4)
34       .Test(xnn_xx_fill_ukernel__neon_x64);
35   }
36 
TEST(XX_FILL__NEON_X64,channels_eq_64)37   TEST(XX_FILL__NEON_X64, channels_eq_64) {
38     TEST_REQUIRES_ARM_NEON;
39     FillMicrokernelTester()
40       .channels(64)
41       .Test(xnn_xx_fill_ukernel__neon_x64);
42   }
43 
TEST(XX_FILL__NEON_X64,channels_div_64)44   TEST(XX_FILL__NEON_X64, channels_div_64) {
45     TEST_REQUIRES_ARM_NEON;
46     for (size_t channels = 128; channels <= 192; channels += 64) {
47       FillMicrokernelTester()
48         .channels(channels)
49         .Test(xnn_xx_fill_ukernel__neon_x64);
50     }
51   }
52 
TEST(XX_FILL__NEON_X64,channels_lt_64)53   TEST(XX_FILL__NEON_X64, channels_lt_64) {
54     TEST_REQUIRES_ARM_NEON;
55     for (size_t channels = 1; channels < 64; channels++) {
56       FillMicrokernelTester()
57         .channels(channels)
58         .Test(xnn_xx_fill_ukernel__neon_x64);
59     }
60   }
61 
TEST(XX_FILL__NEON_X64,channels_gt_64)62   TEST(XX_FILL__NEON_X64, channels_gt_64) {
63     TEST_REQUIRES_ARM_NEON;
64     for (size_t channels = 65; channels < 128; channels++) {
65       FillMicrokernelTester()
66         .channels(channels)
67         .Test(xnn_xx_fill_ukernel__neon_x64);
68     }
69   }
70 
TEST(XX_FILL__NEON_X64,multiple_rows)71   TEST(XX_FILL__NEON_X64, multiple_rows) {
72     TEST_REQUIRES_ARM_NEON;
73     for (size_t rows = 2; rows < 5; rows++) {
74       for (size_t channels = 1; channels < 192; channels += 15) {
75         FillMicrokernelTester()
76           .channels(channels)
77           .rows(rows)
78           .Test(xnn_xx_fill_ukernel__neon_x64);
79       }
80     }
81   }
82 
TEST(XX_FILL__NEON_X64,multiple_rows_with_output_stride)83   TEST(XX_FILL__NEON_X64, multiple_rows_with_output_stride) {
84     TEST_REQUIRES_ARM_NEON;
85     for (size_t rows = 2; rows < 5; rows++) {
86       for (size_t channels = 1; channels < 192; channels += 15) {
87         FillMicrokernelTester()
88           .channels(channels)
89           .rows(rows)
90           .output_stride(193)
91           .Test(xnn_xx_fill_ukernel__neon_x64);
92       }
93     }
94   }
95 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
96 
97 
98 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(XX_FILL__SSE2_X64,channels_eq_1)99   TEST(XX_FILL__SSE2_X64, channels_eq_1) {
100     TEST_REQUIRES_X86_SSE2;
101     FillMicrokernelTester()
102       .channels(1)
103       .Test(xnn_xx_fill_ukernel__sse2_x64);
104   }
105 
TEST(XX_FILL__SSE2_X64,channels_eq_2)106   TEST(XX_FILL__SSE2_X64, channels_eq_2) {
107     TEST_REQUIRES_X86_SSE2;
108     FillMicrokernelTester()
109       .channels(2)
110       .Test(xnn_xx_fill_ukernel__sse2_x64);
111   }
112 
TEST(XX_FILL__SSE2_X64,channels_eq_4)113   TEST(XX_FILL__SSE2_X64, channels_eq_4) {
114     TEST_REQUIRES_X86_SSE2;
115     FillMicrokernelTester()
116       .channels(4)
117       .Test(xnn_xx_fill_ukernel__sse2_x64);
118   }
119 
TEST(XX_FILL__SSE2_X64,channels_eq_64)120   TEST(XX_FILL__SSE2_X64, channels_eq_64) {
121     TEST_REQUIRES_X86_SSE2;
122     FillMicrokernelTester()
123       .channels(64)
124       .Test(xnn_xx_fill_ukernel__sse2_x64);
125   }
126 
TEST(XX_FILL__SSE2_X64,channels_div_64)127   TEST(XX_FILL__SSE2_X64, channels_div_64) {
128     TEST_REQUIRES_X86_SSE2;
129     for (size_t channels = 128; channels <= 192; channels += 64) {
130       FillMicrokernelTester()
131         .channels(channels)
132         .Test(xnn_xx_fill_ukernel__sse2_x64);
133     }
134   }
135 
TEST(XX_FILL__SSE2_X64,channels_lt_64)136   TEST(XX_FILL__SSE2_X64, channels_lt_64) {
137     TEST_REQUIRES_X86_SSE2;
138     for (size_t channels = 1; channels < 64; channels++) {
139       FillMicrokernelTester()
140         .channels(channels)
141         .Test(xnn_xx_fill_ukernel__sse2_x64);
142     }
143   }
144 
TEST(XX_FILL__SSE2_X64,channels_gt_64)145   TEST(XX_FILL__SSE2_X64, channels_gt_64) {
146     TEST_REQUIRES_X86_SSE2;
147     for (size_t channels = 65; channels < 128; channels++) {
148       FillMicrokernelTester()
149         .channels(channels)
150         .Test(xnn_xx_fill_ukernel__sse2_x64);
151     }
152   }
153 
TEST(XX_FILL__SSE2_X64,multiple_rows)154   TEST(XX_FILL__SSE2_X64, multiple_rows) {
155     TEST_REQUIRES_X86_SSE2;
156     for (size_t rows = 2; rows < 5; rows++) {
157       for (size_t channels = 1; channels < 192; channels += 15) {
158         FillMicrokernelTester()
159           .channels(channels)
160           .rows(rows)
161           .Test(xnn_xx_fill_ukernel__sse2_x64);
162       }
163     }
164   }
165 
TEST(XX_FILL__SSE2_X64,multiple_rows_with_output_stride)166   TEST(XX_FILL__SSE2_X64, multiple_rows_with_output_stride) {
167     TEST_REQUIRES_X86_SSE2;
168     for (size_t rows = 2; rows < 5; rows++) {
169       for (size_t channels = 1; channels < 192; channels += 15) {
170         FillMicrokernelTester()
171           .channels(channels)
172           .rows(rows)
173           .output_stride(193)
174           .Test(xnn_xx_fill_ukernel__sse2_x64);
175       }
176     }
177   }
178 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
179 
180 
181 #if XNN_ARCH_WASMSIMD
TEST(XX_FILL__WASMSIMD_X64,channels_eq_1)182   TEST(XX_FILL__WASMSIMD_X64, channels_eq_1) {
183     FillMicrokernelTester()
184       .channels(1)
185       .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
186   }
187 
TEST(XX_FILL__WASMSIMD_X64,channels_eq_2)188   TEST(XX_FILL__WASMSIMD_X64, channels_eq_2) {
189     FillMicrokernelTester()
190       .channels(2)
191       .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
192   }
193 
TEST(XX_FILL__WASMSIMD_X64,channels_eq_4)194   TEST(XX_FILL__WASMSIMD_X64, channels_eq_4) {
195     FillMicrokernelTester()
196       .channels(4)
197       .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
198   }
199 
TEST(XX_FILL__WASMSIMD_X64,channels_eq_64)200   TEST(XX_FILL__WASMSIMD_X64, channels_eq_64) {
201     FillMicrokernelTester()
202       .channels(64)
203       .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
204   }
205 
TEST(XX_FILL__WASMSIMD_X64,channels_div_64)206   TEST(XX_FILL__WASMSIMD_X64, channels_div_64) {
207     for (size_t channels = 128; channels <= 192; channels += 64) {
208       FillMicrokernelTester()
209         .channels(channels)
210         .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
211     }
212   }
213 
TEST(XX_FILL__WASMSIMD_X64,channels_lt_64)214   TEST(XX_FILL__WASMSIMD_X64, channels_lt_64) {
215     for (size_t channels = 1; channels < 64; channels++) {
216       FillMicrokernelTester()
217         .channels(channels)
218         .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
219     }
220   }
221 
TEST(XX_FILL__WASMSIMD_X64,channels_gt_64)222   TEST(XX_FILL__WASMSIMD_X64, channels_gt_64) {
223     for (size_t channels = 65; channels < 128; channels++) {
224       FillMicrokernelTester()
225         .channels(channels)
226         .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
227     }
228   }
229 
TEST(XX_FILL__WASMSIMD_X64,multiple_rows)230   TEST(XX_FILL__WASMSIMD_X64, multiple_rows) {
231     for (size_t rows = 2; rows < 5; rows++) {
232       for (size_t channels = 1; channels < 192; channels += 15) {
233         FillMicrokernelTester()
234           .channels(channels)
235           .rows(rows)
236           .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
237       }
238     }
239   }
240 
TEST(XX_FILL__WASMSIMD_X64,multiple_rows_with_output_stride)241   TEST(XX_FILL__WASMSIMD_X64, multiple_rows_with_output_stride) {
242     for (size_t rows = 2; rows < 5; rows++) {
243       for (size_t channels = 1; channels < 192; channels += 15) {
244         FillMicrokernelTester()
245           .channels(channels)
246           .rows(rows)
247           .output_stride(193)
248           .Test(xnn_xx_fill_ukernel__wasmsimd_x64);
249       }
250     }
251   }
252 #endif  // XNN_ARCH_WASMSIMD
253 
254 
TEST(XX_FILL__SCALAR_X16,channels_eq_1)255 TEST(XX_FILL__SCALAR_X16, channels_eq_1) {
256   FillMicrokernelTester()
257     .channels(1)
258     .Test(xnn_xx_fill_ukernel__scalar_x16);
259 }
260 
TEST(XX_FILL__SCALAR_X16,channels_eq_2)261 TEST(XX_FILL__SCALAR_X16, channels_eq_2) {
262   FillMicrokernelTester()
263     .channels(2)
264     .Test(xnn_xx_fill_ukernel__scalar_x16);
265 }
266 
TEST(XX_FILL__SCALAR_X16,channels_eq_4)267 TEST(XX_FILL__SCALAR_X16, channels_eq_4) {
268   FillMicrokernelTester()
269     .channels(4)
270     .Test(xnn_xx_fill_ukernel__scalar_x16);
271 }
272 
TEST(XX_FILL__SCALAR_X16,channels_eq_16)273 TEST(XX_FILL__SCALAR_X16, channels_eq_16) {
274   FillMicrokernelTester()
275     .channels(16)
276     .Test(xnn_xx_fill_ukernel__scalar_x16);
277 }
278 
TEST(XX_FILL__SCALAR_X16,channels_div_16)279 TEST(XX_FILL__SCALAR_X16, channels_div_16) {
280   for (size_t channels = 32; channels <= 48; channels += 48) {
281     FillMicrokernelTester()
282       .channels(channels)
283       .Test(xnn_xx_fill_ukernel__scalar_x16);
284   }
285 }
286 
TEST(XX_FILL__SCALAR_X16,channels_lt_16)287 TEST(XX_FILL__SCALAR_X16, channels_lt_16) {
288   for (size_t channels = 1; channels < 16; channels++) {
289     FillMicrokernelTester()
290       .channels(channels)
291       .Test(xnn_xx_fill_ukernel__scalar_x16);
292   }
293 }
294 
TEST(XX_FILL__SCALAR_X16,channels_gt_16)295 TEST(XX_FILL__SCALAR_X16, channels_gt_16) {
296   for (size_t channels = 17; channels < 32; channels++) {
297     FillMicrokernelTester()
298       .channels(channels)
299       .Test(xnn_xx_fill_ukernel__scalar_x16);
300   }
301 }
302 
TEST(XX_FILL__SCALAR_X16,multiple_rows)303 TEST(XX_FILL__SCALAR_X16, multiple_rows) {
304   for (size_t rows = 2; rows < 5; rows++) {
305     for (size_t channels = 1; channels < 48; channels += 3) {
306       FillMicrokernelTester()
307         .channels(channels)
308         .rows(rows)
309         .Test(xnn_xx_fill_ukernel__scalar_x16);
310     }
311   }
312 }
313 
TEST(XX_FILL__SCALAR_X16,multiple_rows_with_output_stride)314 TEST(XX_FILL__SCALAR_X16, multiple_rows_with_output_stride) {
315   for (size_t rows = 2; rows < 5; rows++) {
316     for (size_t channels = 1; channels < 48; channels += 3) {
317       FillMicrokernelTester()
318         .channels(channels)
319         .rows(rows)
320         .output_stride(53)
321         .Test(xnn_xx_fill_ukernel__scalar_x16);
322     }
323   }
324 }
325