xref: /aosp_15_r20/external/llvm-libc/src/string/memory_utils/x86_64/inline_memmove.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- Memmove implementation for x86_64 -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H
9 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H
10 
11 #include "src/__support/macros/attributes.h" // LIBC_INLINE
12 #include "src/string/memory_utils/op_builtin.h"
13 #include "src/string/memory_utils/op_generic.h"
14 #include "src/string/memory_utils/op_x86.h"
15 #include "src/string/memory_utils/utils.h"
16 
17 #include <stddef.h> // size_t
18 
19 namespace LIBC_NAMESPACE_DECL {
20 
inline_memmove_small_size_x86(Ptr dst,CPtr src,size_t count)21 LIBC_INLINE bool inline_memmove_small_size_x86(Ptr dst, CPtr src,
22                                                size_t count) {
23 #if defined(__AVX512F__)
24   constexpr size_t vector_size = 64;
25   using uint128_t = generic_v128;
26   using uint256_t = generic_v256;
27   using uint512_t = generic_v512;
28 #elif defined(__AVX__)
29   constexpr size_t vector_size = 32;
30   using uint128_t = generic_v128;
31   using uint256_t = generic_v256;
32   using uint512_t = cpp::array<generic_v256, 2>;
33 #elif defined(__SSE2__)
34   constexpr size_t vector_size = 16;
35   using uint128_t = generic_v128;
36   using uint256_t = cpp::array<generic_v128, 2>;
37   using uint512_t = cpp::array<generic_v128, 4>;
38 #else
39   constexpr size_t vector_size = 8;
40   using uint128_t = cpp::array<uint64_t, 2>;
41   using uint256_t = cpp::array<uint64_t, 4>;
42   using uint512_t = cpp::array<uint64_t, 8>;
43 #endif
44   (void)vector_size;
45   if (count == 0)
46     return true;
47   if (count == 1) {
48     generic::Memmove<uint8_t>::block(dst, src);
49     return true;
50   }
51   if (count == 2) {
52     generic::Memmove<uint16_t>::block(dst, src);
53     return true;
54   }
55   if (count == 3) {
56     generic::Memmove<cpp::array<uint8_t, 3>>::block(dst, src);
57     return true;
58   }
59   if (count == 4) {
60     generic::Memmove<uint32_t>::block(dst, src);
61     return true;
62   }
63   if (count < 8) {
64     generic::Memmove<uint32_t>::head_tail(dst, src, count);
65     return true;
66   }
67   // If count is equal to a power of 2, we can handle it as head-tail
68   // of both smaller size and larger size (head-tail are either
69   // non-overlapping for smaller size, or completely collapsed
70   // for larger size). It seems to be more profitable to do the copy
71   // with the larger size, if it's natively supported (e.g. doing
72   // 2 collapsed 32-byte moves for count=64 if AVX2 is supported).
73   // But it's not profitable to use larger size if it's not natively
74   // supported: we will both use more instructions and handle fewer
75   // sizes in earlier branches.
76   if (vector_size >= 16 ? count < 16 : count <= 16) {
77     generic::Memmove<uint64_t>::head_tail(dst, src, count);
78     return true;
79   }
80   if (vector_size >= 32 ? count < 32 : count <= 32) {
81     generic::Memmove<uint128_t>::head_tail(dst, src, count);
82     return true;
83   }
84   if (vector_size >= 64 ? count < 64 : count <= 64) {
85     generic::Memmove<uint256_t>::head_tail(dst, src, count);
86     return true;
87   }
88   if (count <= 128) {
89     generic::Memmove<uint512_t>::head_tail(dst, src, count);
90     return true;
91   }
92   return false;
93 }
94 
inline_memmove_follow_up_x86(Ptr dst,CPtr src,size_t count)95 LIBC_INLINE void inline_memmove_follow_up_x86(Ptr dst, CPtr src, size_t count) {
96 #if defined(__AVX512F__)
97   using uint256_t = generic_v256;
98   using uint512_t = generic_v512;
99 #elif defined(__AVX__)
100   using uint256_t = generic_v256;
101   using uint512_t = cpp::array<generic_v256, 2>;
102 #elif defined(__SSE2__)
103   using uint256_t = cpp::array<generic_v128, 2>;
104   using uint512_t = cpp::array<generic_v128, 4>;
105 #else
106   using uint256_t = cpp::array<uint64_t, 4>;
107   using uint512_t = cpp::array<uint64_t, 8>;
108 #endif
109   if (dst < src) {
110     generic::Memmove<uint256_t>::align_forward<Arg::Src>(dst, src, count);
111     return generic::Memmove<uint512_t>::loop_and_tail_forward(dst, src, count);
112   } else {
113     generic::Memmove<uint256_t>::align_backward<Arg::Src>(dst, src, count);
114     return generic::Memmove<uint512_t>::loop_and_tail_backward(dst, src, count);
115   }
116 }
117 
118 } // namespace LIBC_NAMESPACE_DECL
119 
120 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMMOVE_H
121