1 //===-- Utilities to convert integral values to string ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Converts an integer to a string. 10 // 11 // By default, the string is written as decimal to an internal buffer and 12 // accessed via the 'view' method. 13 // 14 // IntegerToString<int> buffer(42); 15 // cpp::string_view view = buffer.view(); 16 // 17 // The buffer is allocated on the stack and its size is so that the conversion 18 // always succeeds. 19 // 20 // It is also possible to write the data to a preallocated buffer, but this may 21 // fail. 22 // 23 // char buffer[8]; 24 // if (auto maybe_view = IntegerToString<int>::write_to_span(buffer, 42)) { 25 // cpp::string_view view = *maybe_view; 26 // } 27 // 28 // The first template parameter is the type of the integer. 29 // The second template parameter defines how the integer is formatted. 30 // Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and 31 // 'radix::Hex'. 32 // 33 // For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always 34 // interpreted as a positive type but 'radix::Dec' will honor negative values. 35 // e.g., 36 // 37 // IntegerToString<int8_t>(-1) // "-1" 38 // IntegerToString<int8_t, radix::Dec>(-1) // "-1" 39 // IntegerToString<int8_t, radix::Bin>(-1) // "11111111" 40 // IntegerToString<int8_t, radix::Oct>(-1) // "377" 41 // IntegerToString<int8_t, radix::Hex>(-1) // "ff" 42 // 43 // Additionnally, the format can be changed by navigating the subtypes: 44 // - WithPrefix : Adds "0b", "0", "0x" for binary, octal and hexadecimal 45 // - WithWidth<XX> : Pad string to XX characters filling leading digits with 0 46 // - Uppercase : Use uppercase letters (only for HexString) 47 // - WithSign : Prepend '+' for positive values (only for DecString) 48 // 49 // Examples 50 // -------- 51 // IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(0) : "+00" 52 // IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(-1) : "-01" 53 // IntegerToString<uint8_t, radix::Hex::WithPrefix::Uppercase>(255) : "0xFF" 54 // IntegerToString<uint8_t, radix::Hex::WithWidth<4>::Uppercase>(255) : "00FF" 55 //===----------------------------------------------------------------------===// 56 57 #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H 58 #define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H 59 60 #include <stdint.h> 61 62 #include "src/__support/CPP/algorithm.h" // max 63 #include "src/__support/CPP/array.h" 64 #include "src/__support/CPP/bit.h" 65 #include "src/__support/CPP/limits.h" 66 #include "src/__support/CPP/optional.h" 67 #include "src/__support/CPP/span.h" 68 #include "src/__support/CPP/string_view.h" 69 #include "src/__support/CPP/type_traits.h" 70 #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t 71 #include "src/__support/common.h" 72 #include "src/__support/macros/config.h" 73 74 namespace LIBC_NAMESPACE_DECL { 75 76 namespace details { 77 78 template <uint8_t base, bool prefix = false, bool force_sign = false, 79 bool is_uppercase = false, size_t min_digits = 1> 80 struct Fmt { 81 static constexpr uint8_t BASE = base; 82 static constexpr size_t MIN_DIGITS = min_digits; 83 static constexpr bool IS_UPPERCASE = is_uppercase; 84 static constexpr bool PREFIX = prefix; 85 static constexpr char FORCE_SIGN = force_sign; 86 87 using WithPrefix = Fmt<BASE, true, FORCE_SIGN, IS_UPPERCASE, MIN_DIGITS>; 88 using WithSign = Fmt<BASE, PREFIX, true, IS_UPPERCASE, MIN_DIGITS>; 89 using Uppercase = Fmt<BASE, PREFIX, FORCE_SIGN, true, MIN_DIGITS>; 90 template <size_t value> 91 using WithWidth = Fmt<BASE, PREFIX, FORCE_SIGN, IS_UPPERCASE, value>; 92 93 // Invariants 94 static constexpr uint8_t NUMERICAL_DIGITS = 10; 95 static constexpr uint8_t ALPHA_DIGITS = 26; 96 static constexpr uint8_t MAX_DIGIT = NUMERICAL_DIGITS + ALPHA_DIGITS; 97 static_assert(BASE > 1 && BASE <= MAX_DIGIT); 98 static_assert(!IS_UPPERCASE || BASE > 10, "Uppercase is only for radix > 10"); 99 static_assert(!FORCE_SIGN || BASE == 10, "WithSign is only for radix == 10"); 100 static_assert(!PREFIX || (BASE == 2 || BASE == 8 || BASE == 16), 101 "WithPrefix is only for radix == 2, 8 or 16"); 102 }; 103 104 // Move this to a separate header since it might be useful elsewhere. 105 template <bool forward> class StringBufferWriterImpl { 106 cpp::span<char> buffer; 107 size_t index = 0; 108 bool out_of_range = false; 109 location()110 LIBC_INLINE size_t location() const { 111 return forward ? index : buffer.size() - 1 - index; 112 } 113 114 public: 115 StringBufferWriterImpl(const StringBufferWriterImpl &) = delete; StringBufferWriterImpl(cpp::span<char> buffer)116 StringBufferWriterImpl(cpp::span<char> buffer) : buffer(buffer) {} 117 size()118 LIBC_INLINE size_t size() const { return index; } remainder_size()119 LIBC_INLINE size_t remainder_size() const { return buffer.size() - size(); } empty()120 LIBC_INLINE bool empty() const { return size() == 0; } full()121 LIBC_INLINE bool full() const { return size() == buffer.size(); } ok()122 LIBC_INLINE bool ok() const { return !out_of_range; } 123 push(char c)124 LIBC_INLINE StringBufferWriterImpl &push(char c) { 125 if (ok()) { 126 if (!full()) { 127 buffer[location()] = c; 128 ++index; 129 } else { 130 out_of_range = true; 131 } 132 } 133 return *this; 134 } 135 remainder_span()136 LIBC_INLINE cpp::span<char> remainder_span() const { 137 return forward ? buffer.last(remainder_size()) 138 : buffer.first(remainder_size()); 139 } 140 buffer_span()141 LIBC_INLINE cpp::span<char> buffer_span() const { 142 return forward ? buffer.first(size()) : buffer.last(size()); 143 } 144 buffer_view()145 LIBC_INLINE cpp::string_view buffer_view() const { 146 const auto s = buffer_span(); 147 return {s.data(), s.size()}; 148 } 149 }; 150 151 using StringBufferWriter = StringBufferWriterImpl<true>; 152 using BackwardStringBufferWriter = StringBufferWriterImpl<false>; 153 154 } // namespace details 155 156 namespace radix { 157 158 using Bin = details::Fmt<2>; 159 using Oct = details::Fmt<8>; 160 using Dec = details::Fmt<10>; 161 using Hex = details::Fmt<16>; 162 template <size_t radix> using Custom = details::Fmt<radix>; 163 164 } // namespace radix 165 166 // See file header for documentation. 167 template <typename T, typename Fmt = radix::Dec> class IntegerToString { 168 static_assert(cpp::is_integral_v<T> || is_big_int_v<T>); 169 compute_buffer_size()170 LIBC_INLINE static constexpr size_t compute_buffer_size() { 171 constexpr auto MAX_DIGITS = []() -> size_t { 172 // We size the string buffer for base 10 using an approximation algorithm: 173 // 174 // size = ceil(sizeof(T) * 5 / 2) 175 // 176 // If sizeof(T) is 1, then size is 3 (actually need 3) 177 // If sizeof(T) is 2, then size is 5 (actually need 5) 178 // If sizeof(T) is 4, then size is 10 (actually need 10) 179 // If sizeof(T) is 8, then size is 20 (actually need 20) 180 // If sizeof(T) is 16, then size is 40 (actually need 39) 181 // 182 // NOTE: The ceil operation is actually implemented as 183 // floor(((sizeof(T) * 5) + 1) / 2) 184 // where floor operation is just integer division. 185 // 186 // This estimation grows slightly faster than the actual value, but the 187 // overhead is small enough to tolerate. 188 if constexpr (Fmt::BASE == 10) 189 return ((sizeof(T) * 5) + 1) / 2; 190 // For other bases, we approximate by rounding down to the nearest power 191 // of two base, since the space needed is easy to calculate and it won't 192 // overestimate by too much. 193 constexpr auto FLOOR_LOG_2 = [](size_t num) -> size_t { 194 size_t i = 0; 195 for (; num > 1; num /= 2) 196 ++i; 197 return i; 198 }; 199 constexpr size_t BITS_PER_DIGIT = FLOOR_LOG_2(Fmt::BASE); 200 return ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT); 201 }; 202 constexpr size_t DIGIT_SIZE = cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS); 203 constexpr size_t SIGN_SIZE = Fmt::BASE == 10 ? 1 : 0; 204 constexpr size_t PREFIX_SIZE = Fmt::PREFIX ? 2 : 0; 205 return DIGIT_SIZE + SIGN_SIZE + PREFIX_SIZE; 206 } 207 208 static constexpr size_t BUFFER_SIZE = compute_buffer_size(); 209 static_assert(BUFFER_SIZE > 0); 210 211 // An internal stateless structure that handles the number formatting logic. 212 struct IntegerWriter { 213 static_assert(cpp::is_integral_v<T> || is_big_int_v<T>); 214 using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>; 215 digit_charIntegerWriter216 LIBC_INLINE static char digit_char(uint8_t digit) { 217 if (digit < 10) 218 return '0' + static_cast<char>(digit); 219 return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10); 220 } 221 222 LIBC_INLINE static void write_unsigned_numberIntegerWriter223 write_unsigned_number(UNSIGNED_T value, 224 details::BackwardStringBufferWriter &sink) { 225 for (; sink.ok() && value != 0; value /= Fmt::BASE) { 226 const uint8_t digit(static_cast<uint8_t>(value % Fmt::BASE)); 227 sink.push(digit_char(digit)); 228 } 229 } 230 231 // Returns the absolute value of 'value' as 'UNSIGNED_T'. absIntegerWriter232 LIBC_INLINE static UNSIGNED_T abs(T value) { 233 if (cpp::is_unsigned_v<T> || value >= 0) 234 return value; // already of the right sign. 235 236 // Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]). 237 // Thus negating the type's minimum value would overflow. 238 // From C++20 on, signed types are guaranteed to be represented as 2's 239 // complement. We take advantage of this representation and negate the 240 // value by using the exact same bit representation, e.g., 241 // binary : 0b1000'0000 242 // int8_t : -128 243 // uint8_t: 128 244 245 // Note: the compiler can completely optimize out the two branches and 246 // replace them by a simple negate instruction. 247 // https://godbolt.org/z/hE7zahT9W 248 if (value == cpp::numeric_limits<T>::min()) { 249 return cpp::bit_cast<UNSIGNED_T>(value); 250 } else { 251 return -value; // legal and representable both as T and UNSIGNED_T.` 252 } 253 } 254 writeIntegerWriter255 LIBC_INLINE static void write(T value, 256 details::BackwardStringBufferWriter &sink) { 257 if constexpr (Fmt::BASE == 10) { 258 write_unsigned_number(abs(value), sink); 259 } else { 260 write_unsigned_number(static_cast<UNSIGNED_T>(value), sink); 261 } 262 // width 263 while (sink.ok() && sink.size() < Fmt::MIN_DIGITS) 264 sink.push('0'); 265 // sign 266 if constexpr (Fmt::BASE == 10) { 267 if (value < 0) 268 sink.push('-'); 269 else if (Fmt::FORCE_SIGN) 270 sink.push('+'); 271 } 272 // prefix 273 if constexpr (Fmt::PREFIX) { 274 if constexpr (Fmt::BASE == 2) { 275 sink.push('b'); 276 sink.push('0'); 277 } 278 if constexpr (Fmt::BASE == 16) { 279 sink.push('x'); 280 sink.push('0'); 281 } 282 if constexpr (Fmt::BASE == 8) { 283 const cpp::string_view written = sink.buffer_view(); 284 if (written.empty() || written.front() != '0') 285 sink.push('0'); 286 } 287 } 288 } 289 }; 290 291 cpp::array<char, BUFFER_SIZE> array; 292 size_t written = 0; 293 294 public: 295 IntegerToString(const IntegerToString &) = delete; IntegerToString(T value)296 IntegerToString(T value) { 297 details::BackwardStringBufferWriter writer(array); 298 IntegerWriter::write(value, writer); 299 written = writer.size(); 300 } 301 302 [[nodiscard]] LIBC_INLINE static cpp::optional<cpp::string_view> format_to(cpp::span<char> buffer,T value)303 format_to(cpp::span<char> buffer, T value) { 304 details::BackwardStringBufferWriter writer(buffer); 305 IntegerWriter::write(value, writer); 306 if (writer.ok()) 307 return cpp::string_view(buffer.data() + buffer.size() - writer.size(), 308 writer.size()); 309 return cpp::nullopt; 310 } 311 buffer_size()312 LIBC_INLINE static constexpr size_t buffer_size() { return BUFFER_SIZE; } 313 size()314 LIBC_INLINE size_t size() const { return written; } 315 LIBC_INLINE cpp::string_view view() && = delete; view()316 LIBC_INLINE cpp::string_view view() const & { 317 return cpp::string_view(array.data() + array.size() - size(), size()); 318 } 319 }; 320 321 } // namespace LIBC_NAMESPACE_DECL 322 323 #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H 324