xref: /aosp_15_r20/external/llvm-libc/src/__support/str_to_float.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- String to float conversion utils ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // -----------------------------------------------------------------------------
10 //                               **** WARNING ****
11 // This file is shared with libc++. You should also be careful when adding
12 // dependencies to this file, since it needs to build for all libc++ targets.
13 // -----------------------------------------------------------------------------
14 
15 #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H
16 #define LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H
17 
18 #include "src/__support/CPP/bit.h"
19 #include "src/__support/CPP/limits.h"
20 #include "src/__support/CPP/optional.h"
21 #include "src/__support/CPP/string_view.h"
22 #include "src/__support/FPUtil/FPBits.h"
23 #include "src/__support/FPUtil/rounding_mode.h"
24 #include "src/__support/common.h"
25 #include "src/__support/ctype_utils.h"
26 #include "src/__support/detailed_powers_of_ten.h"
27 #include "src/__support/high_precision_decimal.h"
28 #include "src/__support/macros/config.h"
29 #include "src/__support/macros/null_check.h"
30 #include "src/__support/macros/optimization.h"
31 #include "src/__support/str_to_integer.h"
32 #include "src/__support/str_to_num_result.h"
33 #include "src/__support/uint128.h"
34 #include "src/errno/libc_errno.h" // For ERANGE
35 
36 #include <stdint.h>
37 
38 namespace LIBC_NAMESPACE_DECL {
39 namespace internal {
40 
41 // -----------------------------------------------------------------------------
42 //                               **** WARNING ****
43 // This interface is shared with libc++, if you change this interface you need
44 // to update it in both libc and libc++.
45 // -----------------------------------------------------------------------------
46 template <class T> struct ExpandedFloat {
47   typename fputil::FPBits<T>::StorageType mantissa;
48   int32_t exponent;
49 };
50 
51 // -----------------------------------------------------------------------------
52 //                               **** WARNING ****
53 // This interface is shared with libc++, if you change this interface you need
54 // to update it in both libc and libc++.
55 // -----------------------------------------------------------------------------
56 template <class T> struct FloatConvertReturn {
57   ExpandedFloat<T> num = {0, 0};
58   int error = 0;
59 };
60 
low64(const UInt128 & num)61 LIBC_INLINE uint64_t low64(const UInt128 &num) {
62   return static_cast<uint64_t>(num & 0xffffffffffffffff);
63 }
64 
high64(const UInt128 & num)65 LIBC_INLINE uint64_t high64(const UInt128 &num) {
66   return static_cast<uint64_t>(num >> 64);
67 }
68 
set_implicit_bit(fputil::FPBits<T> &)69 template <class T> LIBC_INLINE void set_implicit_bit(fputil::FPBits<T> &) {
70   return;
71 }
72 
73 #if defined(LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80)
74 template <>
75 LIBC_INLINE void
76 set_implicit_bit<long double>(fputil::FPBits<long double> &result) {
77   result.set_implicit_bit(result.get_biased_exponent() != 0);
78 }
79 #endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
80 
81 // This Eisel-Lemire implementation is based on the algorithm described in the
82 // paper Number Parsing at a Gigabyte per Second, Software: Practice and
83 // Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the
84 // description by Nigel Tao
85 // (https://nigeltao.github.io/blog/2020/eisel-lemire.html) and the golang
86 // implementation, also by Nigel Tao
87 // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25)
88 // for some optimizations as well as handling 32 bit floats.
89 template <class T>
90 LIBC_INLINE cpp::optional<ExpandedFloat<T>>
91 eisel_lemire(ExpandedFloat<T> init_num,
92              RoundDirection round = RoundDirection::Nearest) {
93   using FPBits = typename fputil::FPBits<T>;
94   using StorageType = typename FPBits::StorageType;
95 
96   StorageType mantissa = init_num.mantissa;
97   int32_t exp10 = init_num.exponent;
98 
99   if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a
100                        // double, so we skip straight to the fallback.
101     return cpp::nullopt;
102   }
103 
104   // Exp10 Range
105   if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 ||
106       exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) {
107     return cpp::nullopt;
108   }
109 
110   // Normalization
111   uint32_t clz = cpp::countl_zero<StorageType>(mantissa);
112   mantissa <<= clz;
113 
114   int32_t exp2 =
115       exp10_to_exp2(exp10) + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - clz;
116 
117   // Multiplication
118   const uint64_t *power_of_ten =
119       DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10];
120 
121   UInt128 first_approx =
122       static_cast<UInt128>(mantissa) * static_cast<UInt128>(power_of_ten[1]);
123 
124   // Wider Approximation
125   UInt128 final_approx;
126   // The halfway constant is used to check if the bits that will be shifted away
127   // intially are all 1. For doubles this is 64 (bitstype size) - 52 (final
128   // mantissa size) - 3 (we shift away the last two bits separately for
129   // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
130   // it's 6 bits for floats in this case.
131   const uint64_t halfway_constant =
132       (uint64_t(1) << (FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))) - 1;
133   if ((high64(first_approx) & halfway_constant) == halfway_constant &&
134       low64(first_approx) + mantissa < mantissa) {
135     UInt128 low_bits =
136         static_cast<UInt128>(mantissa) * static_cast<UInt128>(power_of_ten[0]);
137     UInt128 second_approx =
138         first_approx + static_cast<UInt128>(high64(low_bits));
139 
140     if ((high64(second_approx) & halfway_constant) == halfway_constant &&
141         low64(second_approx) + 1 == 0 &&
142         low64(low_bits) + mantissa < mantissa) {
143       return cpp::nullopt;
144     }
145     final_approx = second_approx;
146   } else {
147     final_approx = first_approx;
148   }
149 
150   // Shifting to 54 bits for doubles and 25 bits for floats
151   StorageType msb = static_cast<StorageType>(high64(final_approx) >>
152                                              (FPBits::STORAGE_LEN - 1));
153   StorageType final_mantissa = static_cast<StorageType>(
154       high64(final_approx) >>
155       (msb + FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3)));
156   exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
157 
158   if (round == RoundDirection::Nearest) {
159     // Half-way ambiguity
160     if (low64(final_approx) == 0 &&
161         (high64(final_approx) & halfway_constant) == 0 &&
162         (final_mantissa & 3) == 1) {
163       return cpp::nullopt;
164     }
165 
166     // Round to even.
167     final_mantissa += final_mantissa & 1;
168 
169   } else if (round == RoundDirection::Up) {
170     // If any of the bits being rounded away are non-zero, then round up.
171     if (low64(final_approx) > 0 ||
172         (high64(final_approx) & halfway_constant) > 0) {
173       // Add two since the last current lowest bit is about to be shifted away.
174       final_mantissa += 2;
175     }
176   }
177   // else round down, which has no effect.
178 
179   // From 54 to 53 bits for doubles and 25 to 24 bits for floats
180   final_mantissa >>= 1;
181   if ((final_mantissa >> (FPBits::FRACTION_LEN + 1)) > 0) {
182     final_mantissa >>= 1;
183     ++exp2;
184   }
185 
186   // The if block is equivalent to (but has fewer branches than):
187   //   if exp2 <= 0 || exp2 >= 0x7FF { etc }
188   if (static_cast<uint32_t>(exp2) - 1 >= (1 << FPBits::EXP_LEN) - 2) {
189     return cpp::nullopt;
190   }
191 
192   ExpandedFloat<T> output;
193   output.mantissa = final_mantissa;
194   output.exponent = exp2;
195   return output;
196 }
197 
198 // TODO: Re-enable eisel-lemire for long double is double double once it's
199 // properly supported.
200 #if !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) &&                             \
201     !defined(LIBC_TYPES_LONG_DOUBLE_IS_DOUBLE_DOUBLE)
202 template <>
203 LIBC_INLINE cpp::optional<ExpandedFloat<long double>>
204 eisel_lemire<long double>(ExpandedFloat<long double> init_num,
205                           RoundDirection round) {
206   using FPBits = typename fputil::FPBits<long double>;
207   using StorageType = typename FPBits::StorageType;
208 
209   UInt128 mantissa = init_num.mantissa;
210   int32_t exp10 = init_num.exponent;
211 
212   // Exp10 Range
213   // This doesn't reach very far into the range for long doubles, since it's
214   // sized for doubles and their 11 exponent bits, and not for long doubles and
215   // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long
216   // double). This is a known tradeoff, and was made because a proper long
217   // double table would be approximately 16 times larger. This would have
218   // significant memory and storage costs all the time to speed up a relatively
219   // uncommon path. In addition the exp10_to_exp2 function only approximates
220   // multiplying by log(10)/log(2), and that approximation may not be accurate
221   // out to the full long double range.
222   if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 ||
223       exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) {
224     return cpp::nullopt;
225   }
226 
227   // Normalization
228   uint32_t clz = cpp::countl_zero(mantissa) -
229                  ((sizeof(UInt128) - sizeof(StorageType)) * CHAR_BIT);
230   mantissa <<= clz;
231 
232   int32_t exp2 =
233       exp10_to_exp2(exp10) + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - clz;
234 
235   // Multiplication
236   const uint64_t *power_of_ten =
237       DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10];
238 
239   // Since the input mantissa is more than 64 bits, we have to multiply with the
240   // full 128 bits of the power of ten to get an approximation with the same
241   // number of significant bits. This means that we only get the one
242   // approximation, and that approximation is 256 bits long.
243   UInt128 approx_upper = static_cast<UInt128>(high64(mantissa)) *
244                          static_cast<UInt128>(power_of_ten[1]);
245 
246   UInt128 approx_middle_a = static_cast<UInt128>(high64(mantissa)) *
247                             static_cast<UInt128>(power_of_ten[0]);
248   UInt128 approx_middle_b = static_cast<UInt128>(low64(mantissa)) *
249                             static_cast<UInt128>(power_of_ten[1]);
250 
251   UInt128 approx_middle = approx_middle_a + approx_middle_b;
252 
253   // Handle overflow in the middle
254   approx_upper += (approx_middle < approx_middle_a) ? UInt128(1) << 64 : 0;
255 
256   UInt128 approx_lower = static_cast<UInt128>(low64(mantissa)) *
257                          static_cast<UInt128>(power_of_ten[0]);
258 
259   UInt128 final_approx_lower =
260       approx_lower + (static_cast<UInt128>(low64(approx_middle)) << 64);
261   UInt128 final_approx_upper = approx_upper + high64(approx_middle) +
262                                (final_approx_lower < approx_lower ? 1 : 0);
263 
264   // The halfway constant is used to check if the bits that will be shifted away
265   // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64
266   // (final mantissa size) - 3 (we shift away the last two bits separately for
267   // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
268   // it's 12 bits for 128 bit floats in this case.
269   constexpr UInt128 HALFWAY_CONSTANT =
270       (UInt128(1) << (FPBits::STORAGE_LEN - (FPBits::FRACTION_LEN + 3))) - 1;
271 
272   if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
273       final_approx_lower + mantissa < mantissa) {
274     return cpp::nullopt;
275   }
276 
277   // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
278   uint32_t msb =
279       static_cast<uint32_t>(final_approx_upper >> (FPBits::STORAGE_LEN - 1));
280   UInt128 final_mantissa = final_approx_upper >> (msb + FPBits::STORAGE_LEN -
281                                                   (FPBits::FRACTION_LEN + 3));
282   exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
283 
284   if (round == RoundDirection::Nearest) {
285     // Half-way ambiguity
286     if (final_approx_lower == 0 &&
287         (final_approx_upper & HALFWAY_CONSTANT) == 0 &&
288         (final_mantissa & 3) == 1) {
289       return cpp::nullopt;
290     }
291     // Round to even.
292     final_mantissa += final_mantissa & 1;
293 
294   } else if (round == RoundDirection::Up) {
295     // If any of the bits being rounded away are non-zero, then round up.
296     if (final_approx_lower > 0 || (final_approx_upper & HALFWAY_CONSTANT) > 0) {
297       // Add two since the last current lowest bit is about to be shifted away.
298       final_mantissa += 2;
299     }
300   }
301   // else round down, which has no effect.
302 
303   // From 65 to 64 bits for 80 bit floats and 113  to 112 bits for 128 bit
304   // floats
305   final_mantissa >>= 1;
306   if ((final_mantissa >> (FPBits::FRACTION_LEN + 1)) > 0) {
307     final_mantissa >>= 1;
308     ++exp2;
309   }
310 
311   // The if block is equivalent to (but has fewer branches than):
312   //   if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc }
313   if (exp2 - 1 >= (1 << FPBits::EXP_LEN) - 2) {
314     return cpp::nullopt;
315   }
316 
317   ExpandedFloat<long double> output;
318   output.mantissa = static_cast<StorageType>(final_mantissa);
319   output.exponent = exp2;
320   return output;
321 }
322 #endif // !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) &&
323        // !defined(LIBC_TYPES_LONG_DOUBLE_IS_DOUBLE_DOUBLE)
324 
325 // The nth item in POWERS_OF_TWO represents the greatest power of two less than
326 // 10^n. This tells us how much we can safely shift without overshooting.
327 constexpr uint8_t POWERS_OF_TWO[19] = {
328     0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59,
329 };
330 constexpr int32_t NUM_POWERS_OF_TWO =
331     sizeof(POWERS_OF_TWO) / sizeof(POWERS_OF_TWO[0]);
332 
333 // Takes a mantissa and base 10 exponent and converts it into its closest
334 // floating point type T equivalent. This is the fallback algorithm used when
335 // the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based
336 // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this
337 // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
338 template <class T>
339 LIBC_INLINE FloatConvertReturn<T> simple_decimal_conversion(
340     const char *__restrict numStart,
341     const size_t num_len = cpp::numeric_limits<size_t>::max(),
342     RoundDirection round = RoundDirection::Nearest) {
343   using FPBits = typename fputil::FPBits<T>;
344   using StorageType = typename FPBits::StorageType;
345 
346   int32_t exp2 = 0;
347   HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart, num_len);
348 
349   FloatConvertReturn<T> output;
350 
351   if (hpd.get_num_digits() == 0) {
352     output.num = {0, 0};
353     return output;
354   }
355 
356   // If the exponent is too large and can't be represented in this size of
357   // float, return inf.
358   if (hpd.get_decimal_point() > 0 &&
359       exp10_to_exp2(hpd.get_decimal_point() - 1) > FPBits::EXP_BIAS) {
360     output.num = {0, fputil::FPBits<T>::MAX_BIASED_EXPONENT};
361     output.error = ERANGE;
362     return output;
363   }
364   // If the exponent is too small even for a subnormal, return 0.
365   if (hpd.get_decimal_point() < 0 &&
366       exp10_to_exp2(-hpd.get_decimal_point()) >
367           (FPBits::EXP_BIAS + static_cast<int32_t>(FPBits::FRACTION_LEN))) {
368     output.num = {0, 0};
369     output.error = ERANGE;
370     return output;
371   }
372 
373   // Right shift until the number is smaller than 1.
374   while (hpd.get_decimal_point() > 0) {
375     int32_t shift_amount = 0;
376     if (hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) {
377       shift_amount = 60;
378     } else {
379       shift_amount = POWERS_OF_TWO[hpd.get_decimal_point()];
380     }
381     exp2 += shift_amount;
382     hpd.shift(-shift_amount);
383   }
384 
385   // Left shift until the number is between 1/2 and 1
386   while (hpd.get_decimal_point() < 0 ||
387          (hpd.get_decimal_point() == 0 && hpd.get_digits()[0] < 5)) {
388     int32_t shift_amount = 0;
389 
390     if (-hpd.get_decimal_point() >= NUM_POWERS_OF_TWO) {
391       shift_amount = 60;
392     } else if (hpd.get_decimal_point() != 0) {
393       shift_amount = POWERS_OF_TWO[-hpd.get_decimal_point()];
394     } else { // This handles the case of the number being between .1 and .5
395       shift_amount = 1;
396     }
397     exp2 -= shift_amount;
398     hpd.shift(shift_amount);
399   }
400 
401   // Left shift once so that the number is between 1 and 2
402   --exp2;
403   hpd.shift(1);
404 
405   // Get the biased exponent
406   exp2 += FPBits::EXP_BIAS;
407 
408   // Handle the exponent being too large (and return inf).
409   if (exp2 >= FPBits::MAX_BIASED_EXPONENT) {
410     output.num = {0, FPBits::MAX_BIASED_EXPONENT};
411     output.error = ERANGE;
412     return output;
413   }
414 
415   // Shift left to fill the mantissa
416   hpd.shift(FPBits::FRACTION_LEN);
417   StorageType final_mantissa = hpd.round_to_integer_type<StorageType>();
418 
419   // Handle subnormals
420   if (exp2 <= 0) {
421     // Shift right until there is a valid exponent
422     while (exp2 < 0) {
423       hpd.shift(-1);
424       ++exp2;
425     }
426     // Shift right one more time to compensate for the left shift to get it
427     // between 1 and 2.
428     hpd.shift(-1);
429     final_mantissa = hpd.round_to_integer_type<StorageType>(round);
430 
431     // Check if by shifting right we've caused this to round to a normal number.
432     if ((final_mantissa >> FPBits::FRACTION_LEN) != 0) {
433       ++exp2;
434     }
435   }
436 
437   // Check if rounding added a bit, and shift down if that's the case.
438   if (final_mantissa == StorageType(2) << FPBits::FRACTION_LEN) {
439     final_mantissa >>= 1;
440     ++exp2;
441 
442     // Check if this rounding causes exp2 to go out of range and make the result
443     // INF. If this is the case, then finalMantissa and exp2 are already the
444     // correct values for an INF result.
445     if (exp2 >= FPBits::MAX_BIASED_EXPONENT) {
446       output.error = ERANGE;
447     }
448   }
449 
450   if (exp2 == 0) {
451     output.error = ERANGE;
452   }
453 
454   output.num = {final_mantissa, exp2};
455   return output;
456 }
457 
458 // This class is used for templating the constants for Clinger's Fast Path,
459 // described as a method of approximation in
460 // Clinger WD. How to Read Floating Point Numbers Accurately. SIGPLAN Not 1990
461 // Jun;25(6):92–101. https://doi.org/10.1145/93548.93557.
462 // As well as the additions by Gay that extend the useful range by the number of
463 // exact digits stored by the float type, described in
464 // Gay DM, Correctly rounded binary-decimal and decimal-binary conversions;
465 // 1990. AT&T Bell Laboratories Numerical Analysis Manuscript 90-10.
466 template <class T> class ClingerConsts;
467 
468 template <> class ClingerConsts<float> {
469 public:
470   static constexpr float POWERS_OF_TEN_ARRAY[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
471                                                   1e6, 1e7, 1e8, 1e9, 1e10};
472   static constexpr int32_t EXACT_POWERS_OF_TEN = 10;
473   static constexpr int32_t DIGITS_IN_MANTISSA = 7;
474   static constexpr float MAX_EXACT_INT = 16777215.0;
475 };
476 
477 template <> class ClingerConsts<double> {
478 public:
479   static constexpr double POWERS_OF_TEN_ARRAY[] = {
480       1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
481       1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
482   static constexpr int32_t EXACT_POWERS_OF_TEN = 22;
483   static constexpr int32_t DIGITS_IN_MANTISSA = 15;
484   static constexpr double MAX_EXACT_INT = 9007199254740991.0;
485 };
486 
487 #if defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64)
488 template <> class ClingerConsts<long double> {
489 public:
490   static constexpr long double POWERS_OF_TEN_ARRAY[] = {
491       1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
492       1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
493   static constexpr int32_t EXACT_POWERS_OF_TEN =
494       ClingerConsts<double>::EXACT_POWERS_OF_TEN;
495   static constexpr int32_t DIGITS_IN_MANTISSA =
496       ClingerConsts<double>::DIGITS_IN_MANTISSA;
497   static constexpr long double MAX_EXACT_INT =
498       ClingerConsts<double>::MAX_EXACT_INT;
499 };
500 #elif defined(LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80)
501 template <> class ClingerConsts<long double> {
502 public:
503   static constexpr long double POWERS_OF_TEN_ARRAY[] = {
504       1e0L,  1e1L,  1e2L,  1e3L,  1e4L,  1e5L,  1e6L,  1e7L,  1e8L,  1e9L,
505       1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L,
506       1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L};
507   static constexpr int32_t EXACT_POWERS_OF_TEN = 27;
508   static constexpr int32_t DIGITS_IN_MANTISSA = 21;
509   static constexpr long double MAX_EXACT_INT = 18446744073709551615.0L;
510 };
511 #elif defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT128)
512 template <> class ClingerConsts<long double> {
513 public:
514   static constexpr long double POWERS_OF_TEN_ARRAY[] = {
515       1e0L,  1e1L,  1e2L,  1e3L,  1e4L,  1e5L,  1e6L,  1e7L,  1e8L,  1e9L,
516       1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L,
517       1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L,
518       1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L,
519       1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L};
520   static constexpr int32_t EXACT_POWERS_OF_TEN = 48;
521   static constexpr int32_t DIGITS_IN_MANTISSA = 33;
522   static constexpr long double MAX_EXACT_INT =
523       10384593717069655257060992658440191.0L;
524 };
525 #elif defined(LIBC_TYPES_LONG_DOUBLE_IS_DOUBLE_DOUBLE)
526 // TODO: Add proper double double type support here, currently using constants
527 // for double since it should be safe.
528 template <> class ClingerConsts<long double> {
529 public:
530   static constexpr double POWERS_OF_TEN_ARRAY[] = {
531       1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
532       1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
533   static constexpr int32_t EXACT_POWERS_OF_TEN = 22;
534   static constexpr int32_t DIGITS_IN_MANTISSA = 15;
535   static constexpr double MAX_EXACT_INT = 9007199254740991.0;
536 };
537 #else
538 #error "Unknown long double type"
539 #endif
540 
541 // Take an exact mantissa and exponent and attempt to convert it using only
542 // exact floating point arithmetic. This only handles numbers with low
543 // exponents, but handles them quickly. This is an implementation of Clinger's
544 // Fast Path, as described above.
545 template <class T>
546 LIBC_INLINE cpp::optional<ExpandedFloat<T>>
547 clinger_fast_path(ExpandedFloat<T> init_num,
548                   RoundDirection round = RoundDirection::Nearest) {
549   using FPBits = typename fputil::FPBits<T>;
550   using StorageType = typename FPBits::StorageType;
551 
552   StorageType mantissa = init_num.mantissa;
553   int32_t exp10 = init_num.exponent;
554 
555   if ((mantissa >> FPBits::FRACTION_LEN) > 0) {
556     return cpp::nullopt;
557   }
558 
559   FPBits result;
560   T float_mantissa;
561   if constexpr (is_big_int_v<StorageType> || sizeof(T) > sizeof(uint64_t)) {
562     float_mantissa =
563         (static_cast<T>(uint64_t(mantissa >> 64)) * static_cast<T>(0x1.0p64)) +
564         static_cast<T>(uint64_t(mantissa));
565   } else {
566     float_mantissa = static_cast<T>(mantissa);
567   }
568 
569   if (exp10 == 0) {
570     result = FPBits(float_mantissa);
571   }
572   if (exp10 > 0) {
573     if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN +
574                     ClingerConsts<T>::DIGITS_IN_MANTISSA) {
575       return cpp::nullopt;
576     }
577     if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) {
578       float_mantissa = float_mantissa *
579                        ClingerConsts<T>::POWERS_OF_TEN_ARRAY
580                            [exp10 - ClingerConsts<T>::EXACT_POWERS_OF_TEN];
581       exp10 = ClingerConsts<T>::EXACT_POWERS_OF_TEN;
582     }
583     if (float_mantissa > ClingerConsts<T>::MAX_EXACT_INT) {
584       return cpp::nullopt;
585     }
586     result =
587         FPBits(float_mantissa * ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]);
588   } else if (exp10 < 0) {
589     if (-exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) {
590       return cpp::nullopt;
591     }
592     result =
593         FPBits(float_mantissa / ClingerConsts<T>::POWERS_OF_TEN_ARRAY[-exp10]);
594   }
595 
596   // If the rounding mode is not nearest, then the sign of the number may affect
597   // the result. To make sure the rounding mode is respected properly, the
598   // calculation is redone with a negative result, and the rounding mode is used
599   // to select the correct result.
600   if (round != RoundDirection::Nearest) {
601     FPBits negative_result;
602     // I'm 99% sure this will break under fast math optimizations.
603     negative_result = FPBits((-float_mantissa) *
604                              ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]);
605 
606     // If the results are equal, then we don't need to use the rounding mode.
607     if (result.get_val() != -negative_result.get_val()) {
608       FPBits lower_result;
609       FPBits higher_result;
610 
611       if (result.get_val() < -negative_result.get_val()) {
612         lower_result = result;
613         higher_result = negative_result;
614       } else {
615         lower_result = negative_result;
616         higher_result = result;
617       }
618 
619       if (round == RoundDirection::Up) {
620         result = higher_result;
621       } else {
622         result = lower_result;
623       }
624     }
625   }
626 
627   ExpandedFloat<T> output;
628   output.mantissa = result.get_explicit_mantissa();
629   output.exponent = result.get_biased_exponent();
630   return output;
631 }
632 
633 // The upper bound is the highest base-10 exponent that could possibly give a
634 // non-inf result for this size of float. The value is
635 // log10(2^(exponent bias)).
636 // The generic approximation uses the fact that log10(2^x) ~= x/3
get_upper_bound()637 template <typename T> LIBC_INLINE constexpr int32_t get_upper_bound() {
638   return fputil::FPBits<T>::EXP_BIAS / 3;
639 }
640 
641 template <> LIBC_INLINE constexpr int32_t get_upper_bound<float>() {
642   return 39;
643 }
644 
645 template <> LIBC_INLINE constexpr int32_t get_upper_bound<double>() {
646   return 309;
647 }
648 
649 // The lower bound is the largest negative base-10 exponent that could possibly
650 // give a non-zero result for this size of float. The value is
651 // log10(2^(exponent bias + final mantissa width + intermediate mantissa width))
652 // The intermediate mantissa is the integer that's been parsed from the string,
653 // and the final mantissa is the fractional part of the output number. A very
654 // low base 10 exponent with a very high intermediate mantissa can cancel each
655 // other out, and subnormal numbers allow for the result to be at the very low
656 // end of the final mantissa.
get_lower_bound()657 template <typename T> LIBC_INLINE constexpr int32_t get_lower_bound() {
658   using FPBits = typename fputil::FPBits<T>;
659   return -((FPBits::EXP_BIAS +
660             static_cast<int32_t>(FPBits::FRACTION_LEN + FPBits::STORAGE_LEN)) /
661            3);
662 }
663 
664 template <> LIBC_INLINE constexpr int32_t get_lower_bound<float>() {
665   return -(39 + 6 + 10);
666 }
667 
668 template <> LIBC_INLINE constexpr int32_t get_lower_bound<double>() {
669   return -(309 + 15 + 20);
670 }
671 
672 // -----------------------------------------------------------------------------
673 //                               **** WARNING ****
674 // This interface is shared with libc++, if you change this interface you need
675 // to update it in both libc and libc++.
676 // -----------------------------------------------------------------------------
677 // Takes a mantissa and base 10 exponent and converts it into its closest
678 // floating point type T equivalient. First we try the Eisel-Lemire algorithm,
679 // then if that fails then we fall back to a more accurate algorithm for
680 // accuracy. The resulting mantissa and exponent are placed in outputMantissa
681 // and outputExp2.
682 template <class T>
683 LIBC_INLINE FloatConvertReturn<T> decimal_exp_to_float(
684     ExpandedFloat<T> init_num, bool truncated, RoundDirection round,
685     const char *__restrict numStart,
686     const size_t num_len = cpp::numeric_limits<size_t>::max()) {
687   using FPBits = typename fputil::FPBits<T>;
688   using StorageType = typename FPBits::StorageType;
689 
690   StorageType mantissa = init_num.mantissa;
691   int32_t exp10 = init_num.exponent;
692 
693   FloatConvertReturn<T> output;
694   cpp::optional<ExpandedFloat<T>> opt_output;
695 
696   // If the exponent is too large and can't be represented in this size of
697   // float, return inf. These bounds are relatively loose, but are mostly
698   // serving as a first pass. Some close numbers getting through is okay.
699   if (exp10 > get_upper_bound<T>()) {
700     output.num = {0, FPBits::MAX_BIASED_EXPONENT};
701     output.error = ERANGE;
702     return output;
703   }
704   // If the exponent is too small even for a subnormal, return 0.
705   if (exp10 < get_lower_bound<T>()) {
706     output.num = {0, 0};
707     output.error = ERANGE;
708     return output;
709   }
710 
711   // Clinger's Fast Path and Eisel-Lemire can't set errno, but they can fail.
712   // For this reason the "error" field in their return values is used to
713   // represent whether they've failed as opposed to the errno value. Any
714   // non-zero value represents a failure.
715 
716 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH
717   if (!truncated) {
718     opt_output = clinger_fast_path<T>(init_num, round);
719     // If the algorithm succeeded the error will be 0, else it will be a
720     // non-zero number.
721     if (opt_output.has_value()) {
722       return {opt_output.value(), 0};
723     }
724   }
725 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH
726 
727 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE
728   // Try Eisel-Lemire
729   opt_output = eisel_lemire<T>(init_num, round);
730   if (opt_output.has_value()) {
731     if (!truncated) {
732       return {opt_output.value(), 0};
733     }
734     // If the mantissa is truncated, then the result may be off by the LSB, so
735     // check if rounding the mantissa up changes the result. If not, then it's
736     // safe, else use the fallback.
737     auto second_output = eisel_lemire<T>({mantissa + 1, exp10}, round);
738     if (second_output.has_value()) {
739       if (opt_output->mantissa == second_output->mantissa &&
740           opt_output->exponent == second_output->exponent) {
741         return {opt_output.value(), 0};
742       }
743     }
744   }
745 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE
746 
747 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION
748   output = simple_decimal_conversion<T>(numStart, num_len, round);
749 #else
750 #warning "Simple decimal conversion is disabled, result may not be correct."
751 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION
752 
753   return output;
754 }
755 
756 // -----------------------------------------------------------------------------
757 //                               **** WARNING ****
758 // This interface is shared with libc++, if you change this interface you need
759 // to update it in both libc and libc++.
760 // -----------------------------------------------------------------------------
761 // Takes a mantissa and base 2 exponent and converts it into its closest
762 // floating point type T equivalient. Since the exponent is already in the right
763 // form, this is mostly just shifting and rounding. This is used for hexadecimal
764 // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent.
765 template <class T>
binary_exp_to_float(ExpandedFloat<T> init_num,bool truncated,RoundDirection round)766 LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
767                                                       bool truncated,
768                                                       RoundDirection round) {
769   using FPBits = typename fputil::FPBits<T>;
770   using StorageType = typename FPBits::StorageType;
771 
772   StorageType mantissa = init_num.mantissa;
773   int32_t exp2 = init_num.exponent;
774 
775   FloatConvertReturn<T> output;
776 
777   // This is the number of leading zeroes a properly normalized float of type T
778   // should have.
779   constexpr int32_t INF_EXP = (1 << FPBits::EXP_LEN) - 1;
780 
781   // Normalization step 1: Bring the leading bit to the highest bit of
782   // StorageType.
783   uint32_t amount_to_shift_left = cpp::countl_zero<StorageType>(mantissa);
784   mantissa <<= amount_to_shift_left;
785 
786   // Keep exp2 representing the exponent of the lowest bit of StorageType.
787   exp2 -= amount_to_shift_left;
788 
789   // biased_exponent represents the biased exponent of the most significant bit.
790   int32_t biased_exponent = exp2 + FPBits::STORAGE_LEN + FPBits::EXP_BIAS - 1;
791 
792   // Handle numbers that're too large and get squashed to inf
793   if (biased_exponent >= INF_EXP) {
794     // This indicates an overflow, so we make the result INF and set errno.
795     output.num = {0, (1 << FPBits::EXP_LEN) - 1};
796     output.error = ERANGE;
797     return output;
798   }
799 
800   uint32_t amount_to_shift_right =
801       FPBits::STORAGE_LEN - FPBits::FRACTION_LEN - 1;
802 
803   // Handle subnormals.
804   if (biased_exponent <= 0) {
805     amount_to_shift_right += 1 - biased_exponent;
806     biased_exponent = 0;
807 
808     if (amount_to_shift_right > FPBits::STORAGE_LEN) {
809       // Return 0 if the exponent is too small.
810       output.num = {0, 0};
811       output.error = ERANGE;
812       return output;
813     }
814   }
815 
816   StorageType round_bit_mask = StorageType(1) << (amount_to_shift_right - 1);
817   StorageType sticky_mask = round_bit_mask - 1;
818   bool round_bit = static_cast<bool>(mantissa & round_bit_mask);
819   bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated;
820 
821   if (amount_to_shift_right < FPBits::STORAGE_LEN) {
822     // Shift the mantissa and clear the implicit bit.
823     mantissa >>= amount_to_shift_right;
824     mantissa &= FPBits::FRACTION_MASK;
825   } else {
826     mantissa = 0;
827   }
828   bool least_significant_bit = static_cast<bool>(mantissa & StorageType(1));
829 
830   // TODO: check that this rounding behavior is correct.
831 
832   if (round == RoundDirection::Nearest) {
833     // Perform rounding-to-nearest, tie-to-even.
834     if (round_bit && (least_significant_bit || sticky_bit)) {
835       ++mantissa;
836     }
837   } else if (round == RoundDirection::Up) {
838     if (round_bit || sticky_bit) {
839       ++mantissa;
840     }
841   } else /* (round == RoundDirection::Down)*/ {
842     if (round_bit && sticky_bit) {
843       ++mantissa;
844     }
845   }
846 
847   if (mantissa > FPBits::FRACTION_MASK) {
848     // Rounding causes the exponent to increase.
849     ++biased_exponent;
850 
851     if (biased_exponent == INF_EXP) {
852       output.error = ERANGE;
853     }
854   }
855 
856   if (biased_exponent == 0) {
857     output.error = ERANGE;
858   }
859 
860   output.num = {mantissa & FPBits::FRACTION_MASK, biased_exponent};
861   return output;
862 }
863 
864 // checks if the next 4 characters of the string pointer are the start of a
865 // hexadecimal floating point number. Does not advance the string pointer.
is_float_hex_start(const char * __restrict src,const char decimalPoint)866 LIBC_INLINE bool is_float_hex_start(const char *__restrict src,
867                                     const char decimalPoint) {
868   if (!(src[0] == '0' && tolower(src[1]) == 'x')) {
869     return false;
870   }
871   size_t first_digit = 2;
872   if (src[2] == decimalPoint) {
873     ++first_digit;
874   }
875   return isalnum(src[first_digit]) && b36_char_to_int(src[first_digit]) < 16;
876 }
877 
878 // Takes the start of a string representing a decimal float, as well as the
879 // local decimalPoint. It returns if it suceeded in parsing any digits, and if
880 // the return value is true then the outputs are pointer to the end of the
881 // number, and the mantissa and exponent for the closest float T representation.
882 // If the return value is false, then it is assumed that there is no number
883 // here.
884 template <class T>
885 LIBC_INLINE StrToNumResult<ExpandedFloat<T>>
decimal_string_to_float(const char * __restrict src,const char DECIMAL_POINT,RoundDirection round)886 decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
887                         RoundDirection round) {
888   using FPBits = typename fputil::FPBits<T>;
889   using StorageType = typename FPBits::StorageType;
890 
891   constexpr uint32_t BASE = 10;
892   constexpr char EXPONENT_MARKER = 'e';
893 
894   bool truncated = false;
895   bool seen_digit = false;
896   bool after_decimal = false;
897   StorageType mantissa = 0;
898   int32_t exponent = 0;
899 
900   size_t index = 0;
901 
902   StrToNumResult<ExpandedFloat<T>> output({0, 0});
903 
904   // The goal for the first step of parsing is to convert the number in src to
905   // the format mantissa * (base ^ exponent)
906 
907   // The loop fills the mantissa with as many digits as it can hold
908   const StorageType bitstype_max_div_by_base =
909       cpp::numeric_limits<StorageType>::max() / BASE;
910   while (true) {
911     if (isdigit(src[index])) {
912       uint32_t digit = src[index] - '0';
913       seen_digit = true;
914 
915       if (mantissa < bitstype_max_div_by_base) {
916         mantissa = (mantissa * BASE) + digit;
917         if (after_decimal) {
918           --exponent;
919         }
920       } else {
921         if (digit > 0)
922           truncated = true;
923         if (!after_decimal)
924           ++exponent;
925       }
926 
927       ++index;
928       continue;
929     }
930     if (src[index] == DECIMAL_POINT) {
931       if (after_decimal) {
932         break; // this means that src[index] points to a second decimal point,
933                // ending the number.
934       }
935       after_decimal = true;
936       ++index;
937       continue;
938     }
939     // The character is neither a digit nor a decimal point.
940     break;
941   }
942 
943   if (!seen_digit)
944     return output;
945 
946   // TODO: When adding max length argument, handle the case of a trailing
947   // EXPONENT MARKER, see scanf for more details.
948   if (tolower(src[index]) == EXPONENT_MARKER) {
949     bool has_sign = false;
950     if (src[index + 1] == '+' || src[index + 1] == '-') {
951       has_sign = true;
952     }
953     if (isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
954       ++index;
955       auto result = strtointeger<int32_t>(src + index, 10);
956       if (result.has_error())
957         output.error = result.error;
958       int32_t add_to_exponent = result.value;
959       index += result.parsed_len;
960 
961       // Here we do this operation as int64 to avoid overflow.
962       int64_t temp_exponent = static_cast<int64_t>(exponent) +
963                               static_cast<int64_t>(add_to_exponent);
964 
965       // If the result is in the valid range, then we use it. The valid range is
966       // also within the int32 range, so this prevents overflow issues.
967       if (temp_exponent > FPBits::MAX_BIASED_EXPONENT) {
968         exponent = FPBits::MAX_BIASED_EXPONENT;
969       } else if (temp_exponent < -FPBits::MAX_BIASED_EXPONENT) {
970         exponent = -FPBits::MAX_BIASED_EXPONENT;
971       } else {
972         exponent = static_cast<int32_t>(temp_exponent);
973       }
974     }
975   }
976 
977   output.parsed_len = index;
978   if (mantissa == 0) { // if we have a 0, then also 0 the exponent.
979     output.value = {0, 0};
980   } else {
981     auto temp =
982         decimal_exp_to_float<T>({mantissa, exponent}, truncated, round, src);
983     output.value = temp.num;
984     output.error = temp.error;
985   }
986   return output;
987 }
988 
989 // Takes the start of a string representing a hexadecimal float, as well as the
990 // local decimal point. It returns if it suceeded in parsing any digits, and if
991 // the return value is true then the outputs are pointer to the end of the
992 // number, and the mantissa and exponent for the closest float T representation.
993 // If the return value is false, then it is assumed that there is no number
994 // here.
995 template <class T>
996 LIBC_INLINE StrToNumResult<ExpandedFloat<T>>
hexadecimal_string_to_float(const char * __restrict src,const char DECIMAL_POINT,RoundDirection round)997 hexadecimal_string_to_float(const char *__restrict src,
998                             const char DECIMAL_POINT, RoundDirection round) {
999   using FPBits = typename fputil::FPBits<T>;
1000   using StorageType = typename FPBits::StorageType;
1001 
1002   constexpr uint32_t BASE = 16;
1003   constexpr char EXPONENT_MARKER = 'p';
1004 
1005   bool truncated = false;
1006   bool seen_digit = false;
1007   bool after_decimal = false;
1008   StorageType mantissa = 0;
1009   int32_t exponent = 0;
1010 
1011   size_t index = 0;
1012 
1013   StrToNumResult<ExpandedFloat<T>> output({0, 0});
1014 
1015   // The goal for the first step of parsing is to convert the number in src to
1016   // the format mantissa * (base ^ exponent)
1017 
1018   // The loop fills the mantissa with as many digits as it can hold
1019   const StorageType bitstype_max_div_by_base =
1020       cpp::numeric_limits<StorageType>::max() / BASE;
1021   while (true) {
1022     if (isalnum(src[index])) {
1023       uint32_t digit = b36_char_to_int(src[index]);
1024       if (digit < BASE)
1025         seen_digit = true;
1026       else
1027         break;
1028 
1029       if (mantissa < bitstype_max_div_by_base) {
1030         mantissa = (mantissa * BASE) + digit;
1031         if (after_decimal)
1032           --exponent;
1033       } else {
1034         if (digit > 0)
1035           truncated = true;
1036         if (!after_decimal)
1037           ++exponent;
1038       }
1039       ++index;
1040       continue;
1041     }
1042     if (src[index] == DECIMAL_POINT) {
1043       if (after_decimal) {
1044         break; // this means that src[index] points to a second decimal point,
1045                // ending the number.
1046       }
1047       after_decimal = true;
1048       ++index;
1049       continue;
1050     }
1051     // The character is neither a hexadecimal digit nor a decimal point.
1052     break;
1053   }
1054 
1055   if (!seen_digit)
1056     return output;
1057 
1058   // Convert the exponent from having a base of 16 to having a base of 2.
1059   exponent *= 4;
1060 
1061   if (tolower(src[index]) == EXPONENT_MARKER) {
1062     bool has_sign = false;
1063     if (src[index + 1] == '+' || src[index + 1] == '-') {
1064       has_sign = true;
1065     }
1066     if (isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
1067       ++index;
1068       auto result = strtointeger<int32_t>(src + index, 10);
1069       if (result.has_error())
1070         output.error = result.error;
1071 
1072       int32_t add_to_exponent = result.value;
1073       index += result.parsed_len;
1074 
1075       // Here we do this operation as int64 to avoid overflow.
1076       int64_t temp_exponent = static_cast<int64_t>(exponent) +
1077                               static_cast<int64_t>(add_to_exponent);
1078 
1079       // If the result is in the valid range, then we use it. The valid range is
1080       // also within the int32 range, so this prevents overflow issues.
1081       if (temp_exponent > FPBits::MAX_BIASED_EXPONENT) {
1082         exponent = FPBits::MAX_BIASED_EXPONENT;
1083       } else if (temp_exponent < -FPBits::MAX_BIASED_EXPONENT) {
1084         exponent = -FPBits::MAX_BIASED_EXPONENT;
1085       } else {
1086         exponent = static_cast<int32_t>(temp_exponent);
1087       }
1088     }
1089   }
1090   output.parsed_len = index;
1091   if (mantissa == 0) { // if we have a 0, then also 0 the exponent.
1092     output.value.exponent = 0;
1093     output.value.mantissa = 0;
1094   } else {
1095     auto temp = binary_exp_to_float<T>({mantissa, exponent}, truncated, round);
1096     output.error = temp.error;
1097     output.value = temp.num;
1098   }
1099   return output;
1100 }
1101 
1102 template <class T>
1103 LIBC_INLINE typename fputil::FPBits<T>::StorageType
nan_mantissa_from_ncharseq(const cpp::string_view ncharseq)1104 nan_mantissa_from_ncharseq(const cpp::string_view ncharseq) {
1105   using FPBits = typename fputil::FPBits<T>;
1106   using StorageType = typename FPBits::StorageType;
1107 
1108   StorageType nan_mantissa = 0;
1109 
1110   if (ncharseq.data() != nullptr && isdigit(ncharseq[0])) {
1111     StrToNumResult<StorageType> strtoint_result =
1112         strtointeger<StorageType>(ncharseq.data(), 0);
1113     if (!strtoint_result.has_error())
1114       nan_mantissa = strtoint_result.value;
1115 
1116     if (strtoint_result.parsed_len != static_cast<ptrdiff_t>(ncharseq.size()))
1117       nan_mantissa = 0;
1118   }
1119 
1120   return nan_mantissa;
1121 }
1122 
1123 // Takes a pointer to a string and a pointer to a string pointer. This function
1124 // is used as the backend for all of the string to float functions.
1125 // TODO: Add src_len member to match strtointeger.
1126 // TODO: Next, move from char* and length to string_view
1127 template <class T>
strtofloatingpoint(const char * __restrict src)1128 LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) {
1129   using FPBits = typename fputil::FPBits<T>;
1130   using StorageType = typename FPBits::StorageType;
1131 
1132   FPBits result = FPBits();
1133   bool seen_digit = false;
1134   char sign = '+';
1135 
1136   int error = 0;
1137 
1138   ptrdiff_t index = first_non_whitespace(src) - src;
1139 
1140   if (src[index] == '+' || src[index] == '-') {
1141     sign = src[index];
1142     ++index;
1143   }
1144 
1145   if (sign == '-') {
1146     result.set_sign(Sign::NEG);
1147   }
1148 
1149   static constexpr char DECIMAL_POINT = '.';
1150   static const char *inf_string = "infinity";
1151   static const char *nan_string = "nan";
1152 
1153   if (isdigit(src[index]) || src[index] == DECIMAL_POINT) { // regular number
1154     int base = 10;
1155     if (is_float_hex_start(src + index, DECIMAL_POINT)) {
1156       base = 16;
1157       index += 2;
1158       seen_digit = true;
1159     }
1160 
1161     RoundDirection round_direction = RoundDirection::Nearest;
1162 
1163     switch (fputil::quick_get_round()) {
1164     case FE_TONEAREST:
1165       round_direction = RoundDirection::Nearest;
1166       break;
1167     case FE_UPWARD:
1168       if (sign == '+') {
1169         round_direction = RoundDirection::Up;
1170       } else {
1171         round_direction = RoundDirection::Down;
1172       }
1173       break;
1174     case FE_DOWNWARD:
1175       if (sign == '+') {
1176         round_direction = RoundDirection::Down;
1177       } else {
1178         round_direction = RoundDirection::Up;
1179       }
1180       break;
1181     case FE_TOWARDZERO:
1182       round_direction = RoundDirection::Down;
1183       break;
1184     }
1185 
1186     StrToNumResult<ExpandedFloat<T>> parse_result({0, 0});
1187     if (base == 16) {
1188       parse_result = hexadecimal_string_to_float<T>(src + index, DECIMAL_POINT,
1189                                                     round_direction);
1190     } else { // base is 10
1191       parse_result = decimal_string_to_float<T>(src + index, DECIMAL_POINT,
1192                                                 round_direction);
1193     }
1194     seen_digit = parse_result.parsed_len != 0;
1195     result.set_mantissa(parse_result.value.mantissa);
1196     result.set_biased_exponent(parse_result.value.exponent);
1197     index += parse_result.parsed_len;
1198     error = parse_result.error;
1199   } else if (tolower(src[index]) == 'n') { // NaN
1200     if (tolower(src[index + 1]) == nan_string[1] &&
1201         tolower(src[index + 2]) == nan_string[2]) {
1202       seen_digit = true;
1203       index += 3;
1204       StorageType nan_mantissa = 0;
1205       // this handles the case of `NaN(n-character-sequence)`, where the
1206       // n-character-sequence is made of 0 or more letters, numbers, or
1207       // underscore characters in any order.
1208       if (src[index] == '(') {
1209         size_t left_paren = index;
1210         ++index;
1211         while (isalnum(src[index]) || src[index] == '_')
1212           ++index;
1213         if (src[index] == ')') {
1214           ++index;
1215           nan_mantissa = nan_mantissa_from_ncharseq<T>(
1216               cpp::string_view(src + (left_paren + 1), index - left_paren - 2));
1217         } else {
1218           index = left_paren;
1219         }
1220       }
1221       result = FPBits(result.quiet_nan(result.sign(), nan_mantissa));
1222     }
1223   } else if (tolower(src[index]) == 'i') { // INF
1224     if (tolower(src[index + 1]) == inf_string[1] &&
1225         tolower(src[index + 2]) == inf_string[2]) {
1226       seen_digit = true;
1227       result = FPBits(result.inf(result.sign()));
1228       if (tolower(src[index + 3]) == inf_string[3] &&
1229           tolower(src[index + 4]) == inf_string[4] &&
1230           tolower(src[index + 5]) == inf_string[5] &&
1231           tolower(src[index + 6]) == inf_string[6] &&
1232           tolower(src[index + 7]) == inf_string[7]) {
1233         // if the string is "INFINITY" then consume 8 characters.
1234         index += 8;
1235       } else {
1236         index += 3;
1237       }
1238     }
1239   }
1240   if (!seen_digit) { // If there is nothing to actually parse, then return 0.
1241     return {T(0), 0, error};
1242   }
1243 
1244   // This function only does something if T is long double and the platform uses
1245   // special 80 bit long doubles. Otherwise it should be inlined out.
1246   set_implicit_bit<T>(result);
1247 
1248   return {result.get_val(), index, error};
1249 }
1250 
strtonan(const char * arg)1251 template <class T> LIBC_INLINE StrToNumResult<T> strtonan(const char *arg) {
1252   using FPBits = typename fputil::FPBits<T>;
1253   using StorageType = typename FPBits::StorageType;
1254 
1255   LIBC_CRASH_ON_NULLPTR(arg);
1256 
1257   FPBits result;
1258   int error = 0;
1259   StorageType nan_mantissa = 0;
1260 
1261   ptrdiff_t index = 0;
1262   while (isalnum(arg[index]) || arg[index] == '_')
1263     ++index;
1264 
1265   if (arg[index] == '\0')
1266     nan_mantissa = nan_mantissa_from_ncharseq<T>(cpp::string_view(arg, index));
1267 
1268   result = FPBits::quiet_nan(Sign::POS, nan_mantissa);
1269   return {result.get_val(), 0, error};
1270 }
1271 
1272 } // namespace internal
1273 } // namespace LIBC_NAMESPACE_DECL
1274 
1275 #endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H
1276