xref: /aosp_15_r20/external/XNNPACK/src/xx-fill/scalar-x16.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 
8 #include <xnnpack/fill.h>
9 #include <xnnpack/unaligned.h>
10 
11 
xnn_xx_fill_ukernel__scalar_x16(size_t rows,size_t channels,void * output,size_t output_stride,const uint32_t fill_pattern)12 void xnn_xx_fill_ukernel__scalar_x16(
13     size_t rows,
14     size_t channels,
15     void* output,
16     size_t output_stride,
17     const uint32_t fill_pattern)
18 {
19   assert(rows != 0);
20   assert(channels != 0);
21 
22   const size_t output_increment = output_stride - channels;
23 
24   do {
25     uint32_t vfill_pattern = fill_pattern;
26     size_t c = channels;
27     for (; c >= 16 * sizeof(uint8_t); c -= 16 * sizeof(uint8_t)) {
28       unaligned_indexed_store_u32(output, 0, vfill_pattern);
29       unaligned_indexed_store_u32(output, 1, vfill_pattern);
30       unaligned_indexed_store_u32(output, 2, vfill_pattern);
31       unaligned_indexed_store_u32(output, 3, vfill_pattern);
32       output = ((uint8_t*) output + 16);
33     }
34     if XNN_UNLIKELY(c != 0) {
35       if XNN_LIKELY(c & (8 * sizeof(uint8_t))) {
36         unaligned_indexed_store_u32(output, 0, vfill_pattern);
37         unaligned_indexed_store_u32(output, 1, vfill_pattern);
38         output = ((uint8_t*) output + 8);
39       }
40       if XNN_LIKELY(c & (4 * sizeof(uint8_t))) {
41         unaligned_store_u32(output, vfill_pattern);
42         output = ((uint8_t*) output + 4);
43       }
44       if XNN_LIKELY(c & (2 * sizeof(uint8_t))) {
45         unaligned_store_u16(output, (uint16_t) vfill_pattern);
46         vfill_pattern >>= 16;
47         output = ((uint8_t*) output + 2);
48       }
49       if XNN_LIKELY(c & (1 * sizeof(uint8_t))) {
50         *((uint8_t*) output) = (uint8_t) vfill_pattern;
51         output = ((uint8_t*) output + 1);
52       }
53     }
54     output = (void*) ((uintptr_t) output + output_increment);
55   } while (--rows != 0);
56 }
57