xref: /aosp_15_r20/external/llvm-libc/src/__support/integer_to_string.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- Utilities to convert integral values to string ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Converts an integer to a string.
10 //
11 // By default, the string is written as decimal to an internal buffer and
12 // accessed via the 'view' method.
13 //
14 //   IntegerToString<int> buffer(42);
15 //   cpp::string_view view = buffer.view();
16 //
17 // The buffer is allocated on the stack and its size is so that the conversion
18 // always succeeds.
19 //
20 // It is also possible to write the data to a preallocated buffer, but this may
21 // fail.
22 //
23 //   char buffer[8];
24 //   if (auto maybe_view = IntegerToString<int>::write_to_span(buffer, 42)) {
25 //     cpp::string_view view = *maybe_view;
26 //   }
27 //
28 // The first template parameter is the type of the integer.
29 // The second template parameter defines how the integer is formatted.
30 // Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and
31 // 'radix::Hex'.
32 //
33 // For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always
34 // interpreted as a positive type but 'radix::Dec' will honor negative values.
35 // e.g.,
36 //
37 //   IntegerToString<int8_t>(-1)             // "-1"
38 //   IntegerToString<int8_t, radix::Dec>(-1) // "-1"
39 //   IntegerToString<int8_t, radix::Bin>(-1) // "11111111"
40 //   IntegerToString<int8_t, radix::Oct>(-1) // "377"
41 //   IntegerToString<int8_t, radix::Hex>(-1) // "ff"
42 //
43 // Additionnally, the format can be changed by navigating the subtypes:
44 //  - WithPrefix    : Adds "0b", "0", "0x" for binary, octal and hexadecimal
45 //  - WithWidth<XX> : Pad string to XX characters filling leading digits with 0
46 //  - Uppercase     : Use uppercase letters (only for HexString)
47 //  - WithSign      : Prepend '+' for positive values (only for DecString)
48 //
49 // Examples
50 // --------
51 //   IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(0)     : "+00"
52 //   IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(-1)    : "-01"
53 //   IntegerToString<uint8_t, radix::Hex::WithPrefix::Uppercase>(255)   : "0xFF"
54 //   IntegerToString<uint8_t, radix::Hex::WithWidth<4>::Uppercase>(255) : "00FF"
55 //===----------------------------------------------------------------------===//
56 
57 #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
58 #define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
59 
60 #include <stdint.h>
61 
62 #include "src/__support/CPP/algorithm.h" // max
63 #include "src/__support/CPP/array.h"
64 #include "src/__support/CPP/bit.h"
65 #include "src/__support/CPP/limits.h"
66 #include "src/__support/CPP/optional.h"
67 #include "src/__support/CPP/span.h"
68 #include "src/__support/CPP/string_view.h"
69 #include "src/__support/CPP/type_traits.h"
70 #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
71 #include "src/__support/common.h"
72 #include "src/__support/macros/config.h"
73 
74 namespace LIBC_NAMESPACE_DECL {
75 
76 namespace details {
77 
78 template <uint8_t base, bool prefix = false, bool force_sign = false,
79           bool is_uppercase = false, size_t min_digits = 1>
80 struct Fmt {
81   static constexpr uint8_t BASE = base;
82   static constexpr size_t MIN_DIGITS = min_digits;
83   static constexpr bool IS_UPPERCASE = is_uppercase;
84   static constexpr bool PREFIX = prefix;
85   static constexpr char FORCE_SIGN = force_sign;
86 
87   using WithPrefix = Fmt<BASE, true, FORCE_SIGN, IS_UPPERCASE, MIN_DIGITS>;
88   using WithSign = Fmt<BASE, PREFIX, true, IS_UPPERCASE, MIN_DIGITS>;
89   using Uppercase = Fmt<BASE, PREFIX, FORCE_SIGN, true, MIN_DIGITS>;
90   template <size_t value>
91   using WithWidth = Fmt<BASE, PREFIX, FORCE_SIGN, IS_UPPERCASE, value>;
92 
93   // Invariants
94   static constexpr uint8_t NUMERICAL_DIGITS = 10;
95   static constexpr uint8_t ALPHA_DIGITS = 26;
96   static constexpr uint8_t MAX_DIGIT = NUMERICAL_DIGITS + ALPHA_DIGITS;
97   static_assert(BASE > 1 && BASE <= MAX_DIGIT);
98   static_assert(!IS_UPPERCASE || BASE > 10, "Uppercase is only for radix > 10");
99   static_assert(!FORCE_SIGN || BASE == 10, "WithSign is only for radix == 10");
100   static_assert(!PREFIX || (BASE == 2 || BASE == 8 || BASE == 16),
101                 "WithPrefix is only for radix == 2, 8 or 16");
102 };
103 
104 // Move this to a separate header since it might be useful elsewhere.
105 template <bool forward> class StringBufferWriterImpl {
106   cpp::span<char> buffer;
107   size_t index = 0;
108   bool out_of_range = false;
109 
location()110   LIBC_INLINE size_t location() const {
111     return forward ? index : buffer.size() - 1 - index;
112   }
113 
114 public:
115   StringBufferWriterImpl(const StringBufferWriterImpl &) = delete;
StringBufferWriterImpl(cpp::span<char> buffer)116   StringBufferWriterImpl(cpp::span<char> buffer) : buffer(buffer) {}
117 
size()118   LIBC_INLINE size_t size() const { return index; }
remainder_size()119   LIBC_INLINE size_t remainder_size() const { return buffer.size() - size(); }
empty()120   LIBC_INLINE bool empty() const { return size() == 0; }
full()121   LIBC_INLINE bool full() const { return size() == buffer.size(); }
ok()122   LIBC_INLINE bool ok() const { return !out_of_range; }
123 
push(char c)124   LIBC_INLINE StringBufferWriterImpl &push(char c) {
125     if (ok()) {
126       if (!full()) {
127         buffer[location()] = c;
128         ++index;
129       } else {
130         out_of_range = true;
131       }
132     }
133     return *this;
134   }
135 
remainder_span()136   LIBC_INLINE cpp::span<char> remainder_span() const {
137     return forward ? buffer.last(remainder_size())
138                    : buffer.first(remainder_size());
139   }
140 
buffer_span()141   LIBC_INLINE cpp::span<char> buffer_span() const {
142     return forward ? buffer.first(size()) : buffer.last(size());
143   }
144 
buffer_view()145   LIBC_INLINE cpp::string_view buffer_view() const {
146     const auto s = buffer_span();
147     return {s.data(), s.size()};
148   }
149 };
150 
151 using StringBufferWriter = StringBufferWriterImpl<true>;
152 using BackwardStringBufferWriter = StringBufferWriterImpl<false>;
153 
154 } // namespace details
155 
156 namespace radix {
157 
158 using Bin = details::Fmt<2>;
159 using Oct = details::Fmt<8>;
160 using Dec = details::Fmt<10>;
161 using Hex = details::Fmt<16>;
162 template <size_t radix> using Custom = details::Fmt<radix>;
163 
164 } // namespace radix
165 
166 // See file header for documentation.
167 template <typename T, typename Fmt = radix::Dec> class IntegerToString {
168   static_assert(cpp::is_integral_v<T> || is_big_int_v<T>);
169 
compute_buffer_size()170   LIBC_INLINE static constexpr size_t compute_buffer_size() {
171     constexpr auto MAX_DIGITS = []() -> size_t {
172       // We size the string buffer for base 10 using an approximation algorithm:
173       //
174       //   size = ceil(sizeof(T) * 5 / 2)
175       //
176       // If sizeof(T) is 1, then size is 3 (actually need 3)
177       // If sizeof(T) is 2, then size is 5 (actually need 5)
178       // If sizeof(T) is 4, then size is 10 (actually need 10)
179       // If sizeof(T) is 8, then size is 20 (actually need 20)
180       // If sizeof(T) is 16, then size is 40 (actually need 39)
181       //
182       // NOTE: The ceil operation is actually implemented as
183       //     floor(((sizeof(T) * 5) + 1) / 2)
184       // where floor operation is just integer division.
185       //
186       // This estimation grows slightly faster than the actual value, but the
187       // overhead is small enough to tolerate.
188       if constexpr (Fmt::BASE == 10)
189         return ((sizeof(T) * 5) + 1) / 2;
190       // For other bases, we approximate by rounding down to the nearest power
191       // of two base, since the space needed is easy to calculate and it won't
192       // overestimate by too much.
193       constexpr auto FLOOR_LOG_2 = [](size_t num) -> size_t {
194         size_t i = 0;
195         for (; num > 1; num /= 2)
196           ++i;
197         return i;
198       };
199       constexpr size_t BITS_PER_DIGIT = FLOOR_LOG_2(Fmt::BASE);
200       return ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT);
201     };
202     constexpr size_t DIGIT_SIZE = cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS);
203     constexpr size_t SIGN_SIZE = Fmt::BASE == 10 ? 1 : 0;
204     constexpr size_t PREFIX_SIZE = Fmt::PREFIX ? 2 : 0;
205     return DIGIT_SIZE + SIGN_SIZE + PREFIX_SIZE;
206   }
207 
208   static constexpr size_t BUFFER_SIZE = compute_buffer_size();
209   static_assert(BUFFER_SIZE > 0);
210 
211   // An internal stateless structure that handles the number formatting logic.
212   struct IntegerWriter {
213     static_assert(cpp::is_integral_v<T> || is_big_int_v<T>);
214     using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
215 
digit_charIntegerWriter216     LIBC_INLINE static char digit_char(uint8_t digit) {
217       if (digit < 10)
218         return '0' + static_cast<char>(digit);
219       return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
220     }
221 
222     LIBC_INLINE static void
write_unsigned_numberIntegerWriter223     write_unsigned_number(UNSIGNED_T value,
224                           details::BackwardStringBufferWriter &sink) {
225       for (; sink.ok() && value != 0; value /= Fmt::BASE) {
226         const uint8_t digit(static_cast<uint8_t>(value % Fmt::BASE));
227         sink.push(digit_char(digit));
228       }
229     }
230 
231     // Returns the absolute value of 'value' as 'UNSIGNED_T'.
absIntegerWriter232     LIBC_INLINE static UNSIGNED_T abs(T value) {
233       if (cpp::is_unsigned_v<T> || value >= 0)
234         return value; // already of the right sign.
235 
236       // Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]).
237       // Thus negating the type's minimum value would overflow.
238       // From C++20 on, signed types are guaranteed to be represented as 2's
239       // complement. We take advantage of this representation and negate the
240       // value by using the exact same bit representation, e.g.,
241       // binary : 0b1000'0000
242       // int8_t : -128
243       // uint8_t:  128
244 
245       // Note: the compiler can completely optimize out the two branches and
246       // replace them by a simple negate instruction.
247       // https://godbolt.org/z/hE7zahT9W
248       if (value == cpp::numeric_limits<T>::min()) {
249         return cpp::bit_cast<UNSIGNED_T>(value);
250       } else {
251         return -value; // legal and representable both as T and UNSIGNED_T.`
252       }
253     }
254 
writeIntegerWriter255     LIBC_INLINE static void write(T value,
256                                   details::BackwardStringBufferWriter &sink) {
257       if constexpr (Fmt::BASE == 10) {
258         write_unsigned_number(abs(value), sink);
259       } else {
260         write_unsigned_number(static_cast<UNSIGNED_T>(value), sink);
261       }
262       // width
263       while (sink.ok() && sink.size() < Fmt::MIN_DIGITS)
264         sink.push('0');
265       // sign
266       if constexpr (Fmt::BASE == 10) {
267         if (value < 0)
268           sink.push('-');
269         else if (Fmt::FORCE_SIGN)
270           sink.push('+');
271       }
272       // prefix
273       if constexpr (Fmt::PREFIX) {
274         if constexpr (Fmt::BASE == 2) {
275           sink.push('b');
276           sink.push('0');
277         }
278         if constexpr (Fmt::BASE == 16) {
279           sink.push('x');
280           sink.push('0');
281         }
282         if constexpr (Fmt::BASE == 8) {
283           const cpp::string_view written = sink.buffer_view();
284           if (written.empty() || written.front() != '0')
285             sink.push('0');
286         }
287       }
288     }
289   };
290 
291   cpp::array<char, BUFFER_SIZE> array;
292   size_t written = 0;
293 
294 public:
295   IntegerToString(const IntegerToString &) = delete;
IntegerToString(T value)296   IntegerToString(T value) {
297     details::BackwardStringBufferWriter writer(array);
298     IntegerWriter::write(value, writer);
299     written = writer.size();
300   }
301 
302   [[nodiscard]] LIBC_INLINE static cpp::optional<cpp::string_view>
format_to(cpp::span<char> buffer,T value)303   format_to(cpp::span<char> buffer, T value) {
304     details::BackwardStringBufferWriter writer(buffer);
305     IntegerWriter::write(value, writer);
306     if (writer.ok())
307       return cpp::string_view(buffer.data() + buffer.size() - writer.size(),
308                               writer.size());
309     return cpp::nullopt;
310   }
311 
buffer_size()312   LIBC_INLINE static constexpr size_t buffer_size() { return BUFFER_SIZE; }
313 
size()314   LIBC_INLINE size_t size() const { return written; }
315   LIBC_INLINE cpp::string_view view() && = delete;
view()316   LIBC_INLINE cpp::string_view view() const & {
317     return cpp::string_view(array.data() + array.size() - size(), size());
318   }
319 };
320 
321 } // namespace LIBC_NAMESPACE_DECL
322 
323 #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
324