xref: /aosp_15_r20/external/llvm-libc/src/string/memory_utils/utils.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- Memory utils --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
11 
12 #include "src/__support/CPP/bit.h"
13 #include "src/__support/CPP/cstddef.h"
14 #include "src/__support/CPP/type_traits.h"
15 #include "src/__support/endian_internal.h"
16 #include "src/__support/macros/attributes.h" // LIBC_INLINE
17 #include "src/__support/macros/config.h"
18 #include "src/__support/macros/properties/architectures.h"
19 
20 #include <stddef.h> // size_t
21 #include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN
22 
23 namespace LIBC_NAMESPACE_DECL {
24 
25 // Returns the number of bytes to substract from ptr to get to the previous
26 // multiple of alignment. If ptr is already aligned returns 0.
27 template <size_t alignment>
distance_to_align_down(const void * ptr)28 LIBC_INLINE uintptr_t distance_to_align_down(const void *ptr) {
29   static_assert(cpp::has_single_bit(alignment),
30                 "alignment must be a power of 2");
31   return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
32 }
33 
34 // Returns the number of bytes to add to ptr to get to the next multiple of
35 // alignment. If ptr is already aligned returns 0.
36 template <size_t alignment>
distance_to_align_up(const void * ptr)37 LIBC_INLINE uintptr_t distance_to_align_up(const void *ptr) {
38   static_assert(cpp::has_single_bit(alignment),
39                 "alignment must be a power of 2");
40   // The logic is not straightforward and involves unsigned modulo arithmetic
41   // but the generated code is as fast as it can be.
42   return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
43 }
44 
45 // Returns the number of bytes to add to ptr to get to the next multiple of
46 // alignment. If ptr is already aligned returns alignment.
47 template <size_t alignment>
distance_to_next_aligned(const void * ptr)48 LIBC_INLINE uintptr_t distance_to_next_aligned(const void *ptr) {
49   return alignment - distance_to_align_down<alignment>(ptr);
50 }
51 
52 // Returns the same pointer but notifies the compiler that it is aligned.
assume_aligned(T * ptr)53 template <size_t alignment, typename T> LIBC_INLINE T *assume_aligned(T *ptr) {
54   return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
55 }
56 
57 // Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are
58 // disjoint.
is_disjoint(const void * p1,const void * p2,size_t size)59 LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) {
60   const ptrdiff_t sdiff =
61       static_cast<const char *>(p1) - static_cast<const char *>(p2);
62   // We use bit_cast to make sure that we don't run into accidental integer
63   // promotion. Notably the unary minus operator goes through integer promotion
64   // at the expression level. We assume arithmetic to be two's complement (i.e.,
65   // bit_cast has the same behavior as a regular signed to unsigned cast).
66   static_assert(-1 == ~0, "not 2's complement");
67   const size_t udiff = cpp::bit_cast<size_t>(sdiff);
68   // Integer promition would be caught here.
69   const size_t neg_udiff = cpp::bit_cast<size_t>(-sdiff);
70   // This is expected to compile a conditional move.
71   return sdiff >= 0 ? size <= udiff : size <= neg_udiff;
72 }
73 
74 #if __has_builtin(__builtin_memcpy_inline)
75 #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
76 #endif
77 
78 #if __has_builtin(__builtin_memset_inline)
79 #define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
80 #endif
81 
82 // Performs a constant count copy.
83 template <size_t Size>
memcpy_inline(void * __restrict dst,const void * __restrict src)84 LIBC_INLINE void memcpy_inline(void *__restrict dst,
85                                const void *__restrict src) {
86 #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
87   __builtin_memcpy_inline(dst, src, Size);
88 #else
89   // In memory functions `memcpy_inline` is instantiated several times with
90   // different value of the Size parameter. This doesn't play well with GCC's
91   // Value Range Analysis that wrongly detects out of bounds accesses. We
92   // disable these warnings for the purpose of this function.
93 #pragma GCC diagnostic push
94 #pragma GCC diagnostic ignored "-Warray-bounds"
95 #pragma GCC diagnostic ignored "-Wstringop-overread"
96 #pragma GCC diagnostic ignored "-Wstringop-overflow"
97   for (size_t i = 0; i < Size; ++i)
98     static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i];
99 #pragma GCC diagnostic pop
100 #endif
101 }
102 
103 using Ptr = cpp::byte *;        // Pointer to raw data.
104 using CPtr = const cpp::byte *; // Const pointer to raw data.
105 
106 // This type makes sure that we don't accidentally promote an integral type to
107 // another one. It is only constructible from the exact T type.
108 template <typename T> struct StrictIntegralType {
109   static_assert(cpp::is_integral_v<T>);
110 
111   // Can only be constructed from a T.
112   template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0>
StrictIntegralTypeStrictIntegralType113   LIBC_INLINE StrictIntegralType(U value) : value(value) {}
114 
115   // Allows using the type in an if statement.
116   LIBC_INLINE explicit operator bool() const { return value; }
117 
118   // If type is unsigned (bcmp) we allow bitwise OR operations.
119   LIBC_INLINE StrictIntegralType
120   operator|(const StrictIntegralType &Rhs) const {
121     static_assert(!cpp::is_signed_v<T>);
122     return value | Rhs.value;
123   }
124 
125   // For interation with the C API we allow explicit conversion back to the
126   // `int` type.
127   LIBC_INLINE explicit operator int() const {
128     // bit_cast makes sure that T and int have the same size.
129     return cpp::bit_cast<int>(value);
130   }
131 
132   // Helper to get the zero value.
zeroStrictIntegralType133   LIBC_INLINE static constexpr StrictIntegralType zero() { return {T(0)}; }
nonzeroStrictIntegralType134   LIBC_INLINE static constexpr StrictIntegralType nonzero() { return {T(1)}; }
135 
136 private:
137   T value;
138 };
139 
140 using MemcmpReturnType = StrictIntegralType<int32_t>;
141 using BcmpReturnType = StrictIntegralType<uint32_t>;
142 
143 // This implements the semantic of 'memcmp' returning a negative value when 'a'
144 // is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise.
cmp_uint32_t(uint32_t a,uint32_t b)145 LIBC_INLINE MemcmpReturnType cmp_uint32_t(uint32_t a, uint32_t b) {
146   // We perform the difference as an int64_t.
147   const int64_t diff = static_cast<int64_t>(a) - static_cast<int64_t>(b);
148   // For the int64_t to int32_t conversion we want the following properties:
149   // - int32_t[31:31] == 1 iff diff < 0
150   // - int32_t[31:0] == 0 iff diff == 0
151 
152   // We also observe that:
153   // - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0
154   // - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0
155   // - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0
156   // - https://godbolt.org/z/8W7qWP6e5
157   // - This implies that we can only look at diff[32:32] for determining the
158   // sign bit for the returned int32_t.
159 
160   // So, we do the following:
161   // - int32_t[31:31] = diff[32:32]
162   // - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0.
163 
164   // And, we can achieve the above by the expression below. We could have also
165   // used (diff64 >> 1) | (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than
166   // (diff64 & 0x1). https://godbolt.org/z/j3b569rW1
167   return static_cast<int32_t>((diff >> 1) | (diff & 0xFFFF));
168 }
169 
170 // Returns a negative value if 'a' is less than 'b' and a positive value
171 // otherwise. This implements the semantic of 'memcmp' when we know that 'a' and
172 // 'b' differ.
cmp_neq_uint64_t(uint64_t a,uint64_t b)173 LIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) {
174 #if defined(LIBC_TARGET_ARCH_IS_X86)
175   // On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for
176   // positive and negative value respectively as they are one value apart:
177   //   xor     eax, eax         <- free
178   //   cmp     rdi, rsi         <- serializing
179   //   adc     eax, 2147483647  <- serializing
180 
181   // Unfortunately we found instances of client code that negate the result of
182   // 'memcmp' to reverse ordering. Because signed integers are not symmetric
183   // (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as
184   // `-INT_MIN` is not representable as an int32_t.
185 
186   // As a consequence, we use 5 and -5 which is still OK nice in terms of
187   // latency.
188   //   cmp     rdi, rsi         <- serializing
189   //   mov     ecx, -5          <- can be done in parallel
190   //   mov     eax, 5           <- can be done in parallel
191   //   cmovb   eax, ecx         <- serializing
192   static constexpr int32_t POSITIVE = 5;
193   static constexpr int32_t NEGATIVE = -5;
194 #else
195   // On RISC-V we simply use '1' and '-1' as it leads to branchless code.
196   // On ARMv8, both strategies lead to the same performance.
197   static constexpr int32_t POSITIVE = 1;
198   static constexpr int32_t NEGATIVE = -1;
199 #endif
200   static_assert(POSITIVE > 0);
201   static_assert(NEGATIVE < 0);
202   return a < b ? NEGATIVE : POSITIVE;
203 }
204 
205 // Loads bytes from memory (possibly unaligned) and materializes them as
206 // type.
load(CPtr ptr)207 template <typename T> LIBC_INLINE T load(CPtr ptr) {
208   T out;
209   memcpy_inline<sizeof(T)>(&out, ptr);
210   return out;
211 }
212 
213 // Stores a value of type T in memory (possibly unaligned).
store(Ptr ptr,T value)214 template <typename T> LIBC_INLINE void store(Ptr ptr, T value) {
215   memcpy_inline<sizeof(T)>(ptr, &value);
216 }
217 
218 // On architectures that do not allow for unaligned access we perform several
219 // aligned accesses and recombine them through shifts and logicals operations.
220 // For instance, if we know that the pointer is 2-byte aligned we can decompose
221 // a 64-bit operation into four 16-bit operations.
222 
223 // Loads a 'ValueType' by decomposing it into several loads that are assumed to
224 // be aligned.
225 // e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr);
226 template <typename ValueType, typename T, typename... TS>
load_aligned(CPtr src)227 LIBC_INLINE ValueType load_aligned(CPtr src) {
228   static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
229   const ValueType value = load<T>(assume_aligned<sizeof(T)>(src));
230   if constexpr (sizeof...(TS) > 0) {
231     constexpr size_t SHIFT = sizeof(T) * 8;
232     const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T));
233     if constexpr (Endian::IS_LITTLE)
234       return value | (next << SHIFT);
235     else if constexpr (Endian::IS_BIG)
236       return (value << SHIFT) | next;
237     else
238       static_assert(cpp::always_false<T>, "Invalid endianness");
239   } else {
240     return value;
241   }
242 }
243 
244 // Alias for loading a 'uint32_t'.
245 template <typename T, typename... TS>
load32_aligned(CPtr src,size_t offset)246 LIBC_INLINE auto load32_aligned(CPtr src, size_t offset) {
247   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
248   return load_aligned<uint32_t, T, TS...>(src + offset);
249 }
250 
251 // Alias for loading a 'uint64_t'.
252 template <typename T, typename... TS>
load64_aligned(CPtr src,size_t offset)253 LIBC_INLINE auto load64_aligned(CPtr src, size_t offset) {
254   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
255   return load_aligned<uint64_t, T, TS...>(src + offset);
256 }
257 
258 // Stores a 'ValueType' by decomposing it into several stores that are assumed
259 // to be aligned.
260 // e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr);
261 template <typename ValueType, typename T, typename... TS>
store_aligned(ValueType value,Ptr dst)262 LIBC_INLINE void store_aligned(ValueType value, Ptr dst) {
263   static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
264   constexpr size_t SHIFT = sizeof(T) * 8;
265   if constexpr (Endian::IS_LITTLE) {
266     store<T>(assume_aligned<sizeof(T)>(dst), value & ~T(0));
267     if constexpr (sizeof...(TS) > 0)
268       store_aligned<ValueType, TS...>(value >> SHIFT, dst + sizeof(T));
269   } else if constexpr (Endian::IS_BIG) {
270     constexpr size_t OFFSET = (0 + ... + sizeof(TS));
271     store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0));
272     if constexpr (sizeof...(TS) > 0)
273       store_aligned<ValueType, TS...>(value >> SHIFT, dst);
274   } else {
275     static_assert(cpp::always_false<T>, "Invalid endianness");
276   }
277 }
278 
279 // Alias for storing a 'uint32_t'.
280 template <typename T, typename... TS>
store32_aligned(uint32_t value,Ptr dst,size_t offset)281 LIBC_INLINE void store32_aligned(uint32_t value, Ptr dst, size_t offset) {
282   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
283   store_aligned<uint32_t, T, TS...>(value, dst + offset);
284 }
285 
286 // Alias for storing a 'uint64_t'.
287 template <typename T, typename... TS>
store64_aligned(uint64_t value,Ptr dst,size_t offset)288 LIBC_INLINE void store64_aligned(uint64_t value, Ptr dst, size_t offset) {
289   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
290   store_aligned<uint64_t, T, TS...>(value, dst + offset);
291 }
292 
293 // Advances the pointers p1 and p2 by offset bytes and decrease count by the
294 // same amount.
295 template <typename T1, typename T2>
adjust(ptrdiff_t offset,T1 * __restrict & p1,T2 * __restrict & p2,size_t & count)296 LIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1,
297                         T2 *__restrict &p2, size_t &count) {
298   p1 += offset;
299   p2 += offset;
300   count -= offset;
301 }
302 
303 // Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary
304 // and decrease count by the same amount.
305 // We make sure the compiler knows about the adjusted pointer alignment.
306 template <size_t SIZE, typename T1, typename T2>
align_p1_to_next_boundary(T1 * __restrict & p1,T2 * __restrict & p2,size_t & count)307 void align_p1_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
308                                size_t &count) {
309   adjust(distance_to_next_aligned<SIZE>(p1), p1, p2, count);
310   p1 = assume_aligned<SIZE>(p1);
311 }
312 
313 // Same as align_p1_to_next_boundary above but with a single pointer instead.
314 template <size_t SIZE, typename T>
align_to_next_boundary(T * & p1,size_t & count)315 LIBC_INLINE void align_to_next_boundary(T *&p1, size_t &count) {
316   const T *dummy = p1;
317   align_p1_to_next_boundary<SIZE>(p1, dummy, count);
318 }
319 
320 // An enum class that discriminates between the first and second pointer.
321 enum class Arg { P1, P2, Dst = P1, Src = P2 };
322 
323 // Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1.
324 // Precondition: &p1 != &p2
325 template <size_t SIZE, Arg AlignOn, typename T1, typename T2>
align_to_next_boundary(T1 * __restrict & p1,T2 * __restrict & p2,size_t & count)326 LIBC_INLINE void align_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
327                                         size_t &count) {
328   if constexpr (AlignOn == Arg::P1)
329     align_p1_to_next_boundary<SIZE>(p1, p2, count);
330   else if constexpr (AlignOn == Arg::P2)
331     align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2.
332   else
333     static_assert(cpp::always_false<T1>,
334                   "AlignOn must be either Arg::P1 or Arg::P2");
335 }
336 
337 template <size_t SIZE> struct AlignHelper {
AlignHelperAlignHelper338   LIBC_INLINE AlignHelper(CPtr ptr)
339       : offset(distance_to_next_aligned<SIZE>(ptr)) {}
340 
not_alignedAlignHelper341   LIBC_INLINE bool not_aligned() const { return offset != SIZE; }
342   uintptr_t offset;
343 };
344 
prefetch_for_write(CPtr dst)345 LIBC_INLINE void prefetch_for_write(CPtr dst) {
346   __builtin_prefetch(dst, /*write*/ 1, /*max locality*/ 3);
347 }
348 
prefetch_to_local_cache(CPtr dst)349 LIBC_INLINE void prefetch_to_local_cache(CPtr dst) {
350   __builtin_prefetch(dst, /*read*/ 0, /*max locality*/ 3);
351 }
352 
353 } // namespace LIBC_NAMESPACE_DECL
354 
355 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
356