xref: /aosp_15_r20/external/llvm-libc/src/string/memory_utils/x86_64/inline_memset.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- Memset implementation for x86_64 ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
9 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
10 
11 #include "src/__support/macros/attributes.h" // LIBC_INLINE
12 #include "src/__support/macros/config.h"
13 #include "src/string/memory_utils/op_generic.h"
14 #include "src/string/memory_utils/op_x86.h"
15 #include "src/string/memory_utils/utils.h" // Ptr, CPtr
16 
17 #include <stddef.h> // size_t
18 
19 namespace LIBC_NAMESPACE_DECL {
20 namespace x86 {
21 // Size of one cache line for software prefetching
22 LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE_SIZE = 64;
23 LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES_SIZE =
24     K_ONE_CACHELINE_SIZE * 2;
25 LIBC_INLINE_VAR constexpr size_t K_FIVE_CACHELINES_SIZE =
26     K_ONE_CACHELINE_SIZE * 5;
27 
28 LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET =
29     LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING);
30 
31 } // namespace x86
32 
33 #if defined(__AVX512F__)
34 using uint128_t = generic_v128;
35 using uint256_t = generic_v256;
36 using uint512_t = generic_v512;
37 #elif defined(__AVX__)
38 using uint128_t = generic_v128;
39 using uint256_t = generic_v256;
40 using uint512_t = cpp::array<generic_v256, 2>;
41 #elif defined(__SSE2__)
42 using uint128_t = generic_v128;
43 using uint256_t = cpp::array<generic_v128, 2>;
44 using uint512_t = cpp::array<generic_v128, 4>;
45 #else
46 using uint128_t = cpp::array<uint64_t, 2>;
47 using uint256_t = cpp::array<uint64_t, 4>;
48 using uint512_t = cpp::array<uint64_t, 8>;
49 #endif
50 
51 [[maybe_unused]] LIBC_INLINE static void
inline_memset_x86_gt64_sw_prefetching(Ptr dst,uint8_t value,size_t count)52 inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) {
53   constexpr size_t PREFETCH_DISTANCE = x86::K_FIVE_CACHELINES_SIZE;
54   constexpr size_t PREFETCH_DEGREE = x86::K_TWO_CACHELINES_SIZE;
55   constexpr size_t SIZE = sizeof(uint256_t);
56   // Prefetch one cache line
57   prefetch_for_write(dst + x86::K_ONE_CACHELINE_SIZE);
58   if (count <= 128)
59     return generic::Memset<uint512_t>::head_tail(dst, value, count);
60   // Prefetch the second cache line
61   prefetch_for_write(dst + x86::K_TWO_CACHELINES_SIZE);
62   // Aligned loop
63   generic::Memset<uint256_t>::block(dst, value);
64   align_to_next_boundary<32>(dst, count);
65   if (count <= 192) {
66     return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
67   } else {
68     generic::MemsetSequence<uint512_t, uint256_t>::block(dst, value);
69     size_t offset = 96;
70     while (offset + PREFETCH_DEGREE + SIZE <= count) {
71       prefetch_for_write(dst + offset + PREFETCH_DISTANCE);
72       prefetch_for_write(dst + offset + PREFETCH_DISTANCE +
73                          x86::K_ONE_CACHELINE_SIZE);
74       for (size_t i = 0; i < PREFETCH_DEGREE; i += SIZE, offset += SIZE)
75         generic::Memset<uint256_t>::block(dst + offset, value);
76     }
77     generic::Memset<uint256_t>::loop_and_tail_offset(dst, value, count, offset);
78   }
79 }
80 
81 [[maybe_unused]] LIBC_INLINE static void
inline_memset_x86(Ptr dst,uint8_t value,size_t count)82 inline_memset_x86(Ptr dst, uint8_t value, size_t count) {
83   if (count == 0)
84     return;
85   if (count == 1)
86     return generic::Memset<uint8_t>::block(dst, value);
87   if (count == 2)
88     return generic::Memset<uint16_t>::block(dst, value);
89   if (count == 3)
90     return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value);
91   if (count <= 8)
92     return generic::Memset<uint32_t>::head_tail(dst, value, count);
93   if (count <= 16)
94     return generic::Memset<uint64_t>::head_tail(dst, value, count);
95   if (count <= 32)
96     return generic::Memset<uint128_t>::head_tail(dst, value, count);
97   if (count <= 64)
98     return generic::Memset<uint256_t>::head_tail(dst, value, count);
99   if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET)
100     return inline_memset_x86_gt64_sw_prefetching(dst, value, count);
101   if (count <= 128)
102     return generic::Memset<uint512_t>::head_tail(dst, value, count);
103   // Aligned loop
104   generic::Memset<uint256_t>::block(dst, value);
105   align_to_next_boundary<32>(dst, count);
106   return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
107 }
108 } // namespace LIBC_NAMESPACE_DECL
109 
110 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
111