1*9356374aSAndroid Build Coastguard Worker // Copyright 2018 The Abseil Authors.
2*9356374aSAndroid Build Coastguard Worker //
3*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*9356374aSAndroid Build Coastguard Worker //
7*9356374aSAndroid Build Coastguard Worker // https://www.apache.org/licenses/LICENSE-2.0
8*9356374aSAndroid Build Coastguard Worker //
9*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*9356374aSAndroid Build Coastguard Worker // limitations under the License.
14*9356374aSAndroid Build Coastguard Worker
15*9356374aSAndroid Build Coastguard Worker #include "absl/strings/internal/charconv_parse.h"
16*9356374aSAndroid Build Coastguard Worker #include "absl/strings/charconv.h"
17*9356374aSAndroid Build Coastguard Worker
18*9356374aSAndroid Build Coastguard Worker #include <cassert>
19*9356374aSAndroid Build Coastguard Worker #include <cstdint>
20*9356374aSAndroid Build Coastguard Worker #include <limits>
21*9356374aSAndroid Build Coastguard Worker
22*9356374aSAndroid Build Coastguard Worker #include "absl/strings/internal/memutil.h"
23*9356374aSAndroid Build Coastguard Worker
24*9356374aSAndroid Build Coastguard Worker namespace absl {
25*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_BEGIN
26*9356374aSAndroid Build Coastguard Worker namespace {
27*9356374aSAndroid Build Coastguard Worker
28*9356374aSAndroid Build Coastguard Worker // ParseFloat<10> will read the first 19 significant digits of the mantissa.
29*9356374aSAndroid Build Coastguard Worker // This number was chosen for multiple reasons.
30*9356374aSAndroid Build Coastguard Worker //
31*9356374aSAndroid Build Coastguard Worker // (a) First, for whatever integer type we choose to represent the mantissa, we
32*9356374aSAndroid Build Coastguard Worker // want to choose the largest possible number of decimal digits for that integer
33*9356374aSAndroid Build Coastguard Worker // type. We are using uint64_t, which can express any 19-digit unsigned
34*9356374aSAndroid Build Coastguard Worker // integer.
35*9356374aSAndroid Build Coastguard Worker //
36*9356374aSAndroid Build Coastguard Worker // (b) Second, we need to parse enough digits that the binary value of any
37*9356374aSAndroid Build Coastguard Worker // mantissa we capture has more bits of resolution than the mantissa
38*9356374aSAndroid Build Coastguard Worker // representation in the target float. Our algorithm requires at least 3 bits
39*9356374aSAndroid Build Coastguard Worker // of headway, but 19 decimal digits give a little more than that.
40*9356374aSAndroid Build Coastguard Worker //
41*9356374aSAndroid Build Coastguard Worker // The following static assertions verify the above comments:
42*9356374aSAndroid Build Coastguard Worker constexpr int kDecimalMantissaDigitsMax = 19;
43*9356374aSAndroid Build Coastguard Worker
44*9356374aSAndroid Build Coastguard Worker static_assert(std::numeric_limits<uint64_t>::digits10 ==
45*9356374aSAndroid Build Coastguard Worker kDecimalMantissaDigitsMax,
46*9356374aSAndroid Build Coastguard Worker "(a) above");
47*9356374aSAndroid Build Coastguard Worker
48*9356374aSAndroid Build Coastguard Worker // IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
49*9356374aSAndroid Build Coastguard Worker static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed");
50*9356374aSAndroid Build Coastguard Worker static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact");
51*9356374aSAndroid Build Coastguard Worker static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact");
52*9356374aSAndroid Build Coastguard Worker
53*9356374aSAndroid Build Coastguard Worker // The lowest valued 19-digit decimal mantissa we can read still contains
54*9356374aSAndroid Build Coastguard Worker // sufficient information to reconstruct a binary mantissa.
55*9356374aSAndroid Build Coastguard Worker static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above");
56*9356374aSAndroid Build Coastguard Worker
57*9356374aSAndroid Build Coastguard Worker // ParseFloat<16> will read the first 15 significant digits of the mantissa.
58*9356374aSAndroid Build Coastguard Worker //
59*9356374aSAndroid Build Coastguard Worker // Because a base-16-to-base-2 conversion can be done exactly, we do not need
60*9356374aSAndroid Build Coastguard Worker // to maximize the number of scanned hex digits to improve our conversion. What
61*9356374aSAndroid Build Coastguard Worker // is required is to scan two more bits than the mantissa can represent, so that
62*9356374aSAndroid Build Coastguard Worker // we always round correctly.
63*9356374aSAndroid Build Coastguard Worker //
64*9356374aSAndroid Build Coastguard Worker // (One extra bit does not suffice to perform correct rounding, since a number
65*9356374aSAndroid Build Coastguard Worker // exactly halfway between two representable floats has unique rounding rules,
66*9356374aSAndroid Build Coastguard Worker // so we need to differentiate between a "halfway between" number and a "closer
67*9356374aSAndroid Build Coastguard Worker // to the larger value" number.)
68*9356374aSAndroid Build Coastguard Worker constexpr int kHexadecimalMantissaDigitsMax = 15;
69*9356374aSAndroid Build Coastguard Worker
70*9356374aSAndroid Build Coastguard Worker // The minimum number of significant bits that will be read from
71*9356374aSAndroid Build Coastguard Worker // kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since
72*9356374aSAndroid Build Coastguard Worker // the most significant digit can be a "1", which only contributes a single
73*9356374aSAndroid Build Coastguard Worker // significant bit.
74*9356374aSAndroid Build Coastguard Worker constexpr int kGuaranteedHexadecimalMantissaBitPrecision =
75*9356374aSAndroid Build Coastguard Worker 4 * kHexadecimalMantissaDigitsMax - 3;
76*9356374aSAndroid Build Coastguard Worker
77*9356374aSAndroid Build Coastguard Worker static_assert(kGuaranteedHexadecimalMantissaBitPrecision >
78*9356374aSAndroid Build Coastguard Worker std::numeric_limits<double>::digits + 2,
79*9356374aSAndroid Build Coastguard Worker "kHexadecimalMantissaDigitsMax too small");
80*9356374aSAndroid Build Coastguard Worker
81*9356374aSAndroid Build Coastguard Worker // We also impose a limit on the number of significant digits we will read from
82*9356374aSAndroid Build Coastguard Worker // an exponent, to avoid having to deal with integer overflow. We use 9 for
83*9356374aSAndroid Build Coastguard Worker // this purpose.
84*9356374aSAndroid Build Coastguard Worker //
85*9356374aSAndroid Build Coastguard Worker // If we read a 9 digit exponent, the end result of the conversion will
86*9356374aSAndroid Build Coastguard Worker // necessarily be infinity or zero, depending on the sign of the exponent.
87*9356374aSAndroid Build Coastguard Worker // Therefore we can just drop extra digits on the floor without any extra
88*9356374aSAndroid Build Coastguard Worker // logic.
89*9356374aSAndroid Build Coastguard Worker constexpr int kDecimalExponentDigitsMax = 9;
90*9356374aSAndroid Build Coastguard Worker static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax,
91*9356374aSAndroid Build Coastguard Worker "int type too small");
92*9356374aSAndroid Build Coastguard Worker
93*9356374aSAndroid Build Coastguard Worker // To avoid incredibly large inputs causing integer overflow for our exponent,
94*9356374aSAndroid Build Coastguard Worker // we impose an arbitrary but very large limit on the number of significant
95*9356374aSAndroid Build Coastguard Worker // digits we will accept. The implementation refuses to match a string with
96*9356374aSAndroid Build Coastguard Worker // more consecutive significant mantissa digits than this.
97*9356374aSAndroid Build Coastguard Worker constexpr int kDecimalDigitLimit = 50000000;
98*9356374aSAndroid Build Coastguard Worker
99*9356374aSAndroid Build Coastguard Worker // Corresponding limit for hexadecimal digit inputs. This is one fourth the
100*9356374aSAndroid Build Coastguard Worker // amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
101*9356374aSAndroid Build Coastguard Worker // a binary exponent adjustment of 4.
102*9356374aSAndroid Build Coastguard Worker constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4;
103*9356374aSAndroid Build Coastguard Worker
104*9356374aSAndroid Build Coastguard Worker // The largest exponent we can read is 999999999 (per
105*9356374aSAndroid Build Coastguard Worker // kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
106*9356374aSAndroid Build Coastguard Worker // from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
107*9356374aSAndroid Build Coastguard Worker // comfortably fits in an integer.
108*9356374aSAndroid Build Coastguard Worker //
109*9356374aSAndroid Build Coastguard Worker // We count kDecimalDigitLimit twice because there are independent limits for
110*9356374aSAndroid Build Coastguard Worker // numbers before and after the decimal point. (In the case where there are no
111*9356374aSAndroid Build Coastguard Worker // significant digits before the decimal point, there are independent limits for
112*9356374aSAndroid Build Coastguard Worker // post-decimal-point leading zeroes and for significant digits.)
113*9356374aSAndroid Build Coastguard Worker static_assert(999999999 + 2 * kDecimalDigitLimit <
114*9356374aSAndroid Build Coastguard Worker std::numeric_limits<int>::max(),
115*9356374aSAndroid Build Coastguard Worker "int type too small");
116*9356374aSAndroid Build Coastguard Worker static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) <
117*9356374aSAndroid Build Coastguard Worker std::numeric_limits<int>::max(),
118*9356374aSAndroid Build Coastguard Worker "int type too small");
119*9356374aSAndroid Build Coastguard Worker
120*9356374aSAndroid Build Coastguard Worker // Returns true if the provided bitfield allows parsing an exponent value
121*9356374aSAndroid Build Coastguard Worker // (e.g., "1.5e100").
AllowExponent(chars_format flags)122*9356374aSAndroid Build Coastguard Worker bool AllowExponent(chars_format flags) {
123*9356374aSAndroid Build Coastguard Worker bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
124*9356374aSAndroid Build Coastguard Worker bool scientific =
125*9356374aSAndroid Build Coastguard Worker (flags & chars_format::scientific) == chars_format::scientific;
126*9356374aSAndroid Build Coastguard Worker return scientific || !fixed;
127*9356374aSAndroid Build Coastguard Worker }
128*9356374aSAndroid Build Coastguard Worker
129*9356374aSAndroid Build Coastguard Worker // Returns true if the provided bitfield requires an exponent value be present.
RequireExponent(chars_format flags)130*9356374aSAndroid Build Coastguard Worker bool RequireExponent(chars_format flags) {
131*9356374aSAndroid Build Coastguard Worker bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
132*9356374aSAndroid Build Coastguard Worker bool scientific =
133*9356374aSAndroid Build Coastguard Worker (flags & chars_format::scientific) == chars_format::scientific;
134*9356374aSAndroid Build Coastguard Worker return scientific && !fixed;
135*9356374aSAndroid Build Coastguard Worker }
136*9356374aSAndroid Build Coastguard Worker
137*9356374aSAndroid Build Coastguard Worker const int8_t kAsciiToInt[256] = {
138*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
139*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
140*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
141*9356374aSAndroid Build Coastguard Worker 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
142*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
143*9356374aSAndroid Build Coastguard Worker -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
144*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
145*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
146*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
147*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
148*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
149*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
150*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
151*9356374aSAndroid Build Coastguard Worker -1, -1, -1, -1, -1, -1, -1, -1, -1};
152*9356374aSAndroid Build Coastguard Worker
153*9356374aSAndroid Build Coastguard Worker // Returns true if `ch` is a digit in the given base
154*9356374aSAndroid Build Coastguard Worker template <int base>
155*9356374aSAndroid Build Coastguard Worker bool IsDigit(char ch);
156*9356374aSAndroid Build Coastguard Worker
157*9356374aSAndroid Build Coastguard Worker // Converts a valid `ch` to its digit value in the given base.
158*9356374aSAndroid Build Coastguard Worker template <int base>
159*9356374aSAndroid Build Coastguard Worker unsigned ToDigit(char ch);
160*9356374aSAndroid Build Coastguard Worker
161*9356374aSAndroid Build Coastguard Worker // Returns true if `ch` is the exponent delimiter for the given base.
162*9356374aSAndroid Build Coastguard Worker template <int base>
163*9356374aSAndroid Build Coastguard Worker bool IsExponentCharacter(char ch);
164*9356374aSAndroid Build Coastguard Worker
165*9356374aSAndroid Build Coastguard Worker // Returns the maximum number of significant digits we will read for a float
166*9356374aSAndroid Build Coastguard Worker // in the given base.
167*9356374aSAndroid Build Coastguard Worker template <int base>
168*9356374aSAndroid Build Coastguard Worker constexpr int MantissaDigitsMax();
169*9356374aSAndroid Build Coastguard Worker
170*9356374aSAndroid Build Coastguard Worker // Returns the largest consecutive run of digits we will accept when parsing a
171*9356374aSAndroid Build Coastguard Worker // number in the given base.
172*9356374aSAndroid Build Coastguard Worker template <int base>
173*9356374aSAndroid Build Coastguard Worker constexpr int DigitLimit();
174*9356374aSAndroid Build Coastguard Worker
175*9356374aSAndroid Build Coastguard Worker // Returns the amount the exponent must be adjusted by for each dropped digit.
176*9356374aSAndroid Build Coastguard Worker // (For decimal this is 1, since the digits are in base 10 and the exponent base
177*9356374aSAndroid Build Coastguard Worker // is also 10, but for hexadecimal this is 4, since the digits are base 16 but
178*9356374aSAndroid Build Coastguard Worker // the exponent base is 2.)
179*9356374aSAndroid Build Coastguard Worker template <int base>
180*9356374aSAndroid Build Coastguard Worker constexpr int DigitMagnitude();
181*9356374aSAndroid Build Coastguard Worker
182*9356374aSAndroid Build Coastguard Worker template <>
IsDigit(char ch)183*9356374aSAndroid Build Coastguard Worker bool IsDigit<10>(char ch) {
184*9356374aSAndroid Build Coastguard Worker return ch >= '0' && ch <= '9';
185*9356374aSAndroid Build Coastguard Worker }
186*9356374aSAndroid Build Coastguard Worker template <>
IsDigit(char ch)187*9356374aSAndroid Build Coastguard Worker bool IsDigit<16>(char ch) {
188*9356374aSAndroid Build Coastguard Worker return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0;
189*9356374aSAndroid Build Coastguard Worker }
190*9356374aSAndroid Build Coastguard Worker
191*9356374aSAndroid Build Coastguard Worker template <>
ToDigit(char ch)192*9356374aSAndroid Build Coastguard Worker unsigned ToDigit<10>(char ch) {
193*9356374aSAndroid Build Coastguard Worker return static_cast<unsigned>(ch - '0');
194*9356374aSAndroid Build Coastguard Worker }
195*9356374aSAndroid Build Coastguard Worker template <>
ToDigit(char ch)196*9356374aSAndroid Build Coastguard Worker unsigned ToDigit<16>(char ch) {
197*9356374aSAndroid Build Coastguard Worker return static_cast<unsigned>(kAsciiToInt[static_cast<unsigned char>(ch)]);
198*9356374aSAndroid Build Coastguard Worker }
199*9356374aSAndroid Build Coastguard Worker
200*9356374aSAndroid Build Coastguard Worker template <>
IsExponentCharacter(char ch)201*9356374aSAndroid Build Coastguard Worker bool IsExponentCharacter<10>(char ch) {
202*9356374aSAndroid Build Coastguard Worker return ch == 'e' || ch == 'E';
203*9356374aSAndroid Build Coastguard Worker }
204*9356374aSAndroid Build Coastguard Worker
205*9356374aSAndroid Build Coastguard Worker template <>
IsExponentCharacter(char ch)206*9356374aSAndroid Build Coastguard Worker bool IsExponentCharacter<16>(char ch) {
207*9356374aSAndroid Build Coastguard Worker return ch == 'p' || ch == 'P';
208*9356374aSAndroid Build Coastguard Worker }
209*9356374aSAndroid Build Coastguard Worker
210*9356374aSAndroid Build Coastguard Worker template <>
MantissaDigitsMax()211*9356374aSAndroid Build Coastguard Worker constexpr int MantissaDigitsMax<10>() {
212*9356374aSAndroid Build Coastguard Worker return kDecimalMantissaDigitsMax;
213*9356374aSAndroid Build Coastguard Worker }
214*9356374aSAndroid Build Coastguard Worker template <>
MantissaDigitsMax()215*9356374aSAndroid Build Coastguard Worker constexpr int MantissaDigitsMax<16>() {
216*9356374aSAndroid Build Coastguard Worker return kHexadecimalMantissaDigitsMax;
217*9356374aSAndroid Build Coastguard Worker }
218*9356374aSAndroid Build Coastguard Worker
219*9356374aSAndroid Build Coastguard Worker template <>
DigitLimit()220*9356374aSAndroid Build Coastguard Worker constexpr int DigitLimit<10>() {
221*9356374aSAndroid Build Coastguard Worker return kDecimalDigitLimit;
222*9356374aSAndroid Build Coastguard Worker }
223*9356374aSAndroid Build Coastguard Worker template <>
DigitLimit()224*9356374aSAndroid Build Coastguard Worker constexpr int DigitLimit<16>() {
225*9356374aSAndroid Build Coastguard Worker return kHexadecimalDigitLimit;
226*9356374aSAndroid Build Coastguard Worker }
227*9356374aSAndroid Build Coastguard Worker
228*9356374aSAndroid Build Coastguard Worker template <>
DigitMagnitude()229*9356374aSAndroid Build Coastguard Worker constexpr int DigitMagnitude<10>() {
230*9356374aSAndroid Build Coastguard Worker return 1;
231*9356374aSAndroid Build Coastguard Worker }
232*9356374aSAndroid Build Coastguard Worker template <>
DigitMagnitude()233*9356374aSAndroid Build Coastguard Worker constexpr int DigitMagnitude<16>() {
234*9356374aSAndroid Build Coastguard Worker return 4;
235*9356374aSAndroid Build Coastguard Worker }
236*9356374aSAndroid Build Coastguard Worker
237*9356374aSAndroid Build Coastguard Worker // Reads decimal digits from [begin, end) into *out. Returns the number of
238*9356374aSAndroid Build Coastguard Worker // digits consumed.
239*9356374aSAndroid Build Coastguard Worker //
240*9356374aSAndroid Build Coastguard Worker // After max_digits has been read, keeps consuming characters, but no longer
241*9356374aSAndroid Build Coastguard Worker // adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit
242*9356374aSAndroid Build Coastguard Worker // is set; otherwise, it is left unmodified.
243*9356374aSAndroid Build Coastguard Worker //
244*9356374aSAndroid Build Coastguard Worker // If no digits are matched, returns 0 and leaves *out unchanged.
245*9356374aSAndroid Build Coastguard Worker //
246*9356374aSAndroid Build Coastguard Worker // ConsumeDigits does not protect against overflow on *out; max_digits must
247*9356374aSAndroid Build Coastguard Worker // be chosen with respect to type T to avoid the possibility of overflow.
248*9356374aSAndroid Build Coastguard Worker template <int base, typename T>
ConsumeDigits(const char * begin,const char * end,int max_digits,T * out,bool * dropped_nonzero_digit)249*9356374aSAndroid Build Coastguard Worker int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out,
250*9356374aSAndroid Build Coastguard Worker bool* dropped_nonzero_digit) {
251*9356374aSAndroid Build Coastguard Worker if (base == 10) {
252*9356374aSAndroid Build Coastguard Worker assert(max_digits <= std::numeric_limits<T>::digits10);
253*9356374aSAndroid Build Coastguard Worker } else if (base == 16) {
254*9356374aSAndroid Build Coastguard Worker assert(max_digits * 4 <= std::numeric_limits<T>::digits);
255*9356374aSAndroid Build Coastguard Worker }
256*9356374aSAndroid Build Coastguard Worker const char* const original_begin = begin;
257*9356374aSAndroid Build Coastguard Worker
258*9356374aSAndroid Build Coastguard Worker // Skip leading zeros, but only if *out is zero.
259*9356374aSAndroid Build Coastguard Worker // They don't cause an overflow so we don't have to count them for
260*9356374aSAndroid Build Coastguard Worker // `max_digits`.
261*9356374aSAndroid Build Coastguard Worker while (!*out && end != begin && *begin == '0') ++begin;
262*9356374aSAndroid Build Coastguard Worker
263*9356374aSAndroid Build Coastguard Worker T accumulator = *out;
264*9356374aSAndroid Build Coastguard Worker const char* significant_digits_end =
265*9356374aSAndroid Build Coastguard Worker (end - begin > max_digits) ? begin + max_digits : end;
266*9356374aSAndroid Build Coastguard Worker while (begin < significant_digits_end && IsDigit<base>(*begin)) {
267*9356374aSAndroid Build Coastguard Worker // Do not guard against *out overflow; max_digits was chosen to avoid this.
268*9356374aSAndroid Build Coastguard Worker // Do assert against it, to detect problems in debug builds.
269*9356374aSAndroid Build Coastguard Worker auto digit = static_cast<T>(ToDigit<base>(*begin));
270*9356374aSAndroid Build Coastguard Worker assert(accumulator * base >= accumulator);
271*9356374aSAndroid Build Coastguard Worker accumulator *= base;
272*9356374aSAndroid Build Coastguard Worker assert(accumulator + digit >= accumulator);
273*9356374aSAndroid Build Coastguard Worker accumulator += digit;
274*9356374aSAndroid Build Coastguard Worker ++begin;
275*9356374aSAndroid Build Coastguard Worker }
276*9356374aSAndroid Build Coastguard Worker bool dropped_nonzero = false;
277*9356374aSAndroid Build Coastguard Worker while (begin < end && IsDigit<base>(*begin)) {
278*9356374aSAndroid Build Coastguard Worker dropped_nonzero = dropped_nonzero || (*begin != '0');
279*9356374aSAndroid Build Coastguard Worker ++begin;
280*9356374aSAndroid Build Coastguard Worker }
281*9356374aSAndroid Build Coastguard Worker if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
282*9356374aSAndroid Build Coastguard Worker *dropped_nonzero_digit = true;
283*9356374aSAndroid Build Coastguard Worker }
284*9356374aSAndroid Build Coastguard Worker *out = accumulator;
285*9356374aSAndroid Build Coastguard Worker return static_cast<int>(begin - original_begin);
286*9356374aSAndroid Build Coastguard Worker }
287*9356374aSAndroid Build Coastguard Worker
288*9356374aSAndroid Build Coastguard Worker // Returns true if `v` is one of the chars allowed inside parentheses following
289*9356374aSAndroid Build Coastguard Worker // a NaN.
IsNanChar(char v)290*9356374aSAndroid Build Coastguard Worker bool IsNanChar(char v) {
291*9356374aSAndroid Build Coastguard Worker return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') ||
292*9356374aSAndroid Build Coastguard Worker (v >= 'A' && v <= 'Z');
293*9356374aSAndroid Build Coastguard Worker }
294*9356374aSAndroid Build Coastguard Worker
295*9356374aSAndroid Build Coastguard Worker // Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If
296*9356374aSAndroid Build Coastguard Worker // one is found, sets `out` appropriately and returns true.
ParseInfinityOrNan(const char * begin,const char * end,strings_internal::ParsedFloat * out)297*9356374aSAndroid Build Coastguard Worker bool ParseInfinityOrNan(const char* begin, const char* end,
298*9356374aSAndroid Build Coastguard Worker strings_internal::ParsedFloat* out) {
299*9356374aSAndroid Build Coastguard Worker if (end - begin < 3) {
300*9356374aSAndroid Build Coastguard Worker return false;
301*9356374aSAndroid Build Coastguard Worker }
302*9356374aSAndroid Build Coastguard Worker switch (*begin) {
303*9356374aSAndroid Build Coastguard Worker case 'i':
304*9356374aSAndroid Build Coastguard Worker case 'I': {
305*9356374aSAndroid Build Coastguard Worker // An infinity string consists of the characters "inf" or "infinity",
306*9356374aSAndroid Build Coastguard Worker // case insensitive.
307*9356374aSAndroid Build Coastguard Worker if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) {
308*9356374aSAndroid Build Coastguard Worker return false;
309*9356374aSAndroid Build Coastguard Worker }
310*9356374aSAndroid Build Coastguard Worker out->type = strings_internal::FloatType::kInfinity;
311*9356374aSAndroid Build Coastguard Worker if (end - begin >= 8 &&
312*9356374aSAndroid Build Coastguard Worker strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) {
313*9356374aSAndroid Build Coastguard Worker out->end = begin + 8;
314*9356374aSAndroid Build Coastguard Worker } else {
315*9356374aSAndroid Build Coastguard Worker out->end = begin + 3;
316*9356374aSAndroid Build Coastguard Worker }
317*9356374aSAndroid Build Coastguard Worker return true;
318*9356374aSAndroid Build Coastguard Worker }
319*9356374aSAndroid Build Coastguard Worker case 'n':
320*9356374aSAndroid Build Coastguard Worker case 'N': {
321*9356374aSAndroid Build Coastguard Worker // A NaN consists of the characters "nan", case insensitive, optionally
322*9356374aSAndroid Build Coastguard Worker // followed by a parenthesized sequence of zero or more alphanumeric
323*9356374aSAndroid Build Coastguard Worker // characters and/or underscores.
324*9356374aSAndroid Build Coastguard Worker if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) {
325*9356374aSAndroid Build Coastguard Worker return false;
326*9356374aSAndroid Build Coastguard Worker }
327*9356374aSAndroid Build Coastguard Worker out->type = strings_internal::FloatType::kNan;
328*9356374aSAndroid Build Coastguard Worker out->end = begin + 3;
329*9356374aSAndroid Build Coastguard Worker // NaN is allowed to be followed by a parenthesized string, consisting of
330*9356374aSAndroid Build Coastguard Worker // only the characters [a-zA-Z0-9_]. Match that if it's present.
331*9356374aSAndroid Build Coastguard Worker begin += 3;
332*9356374aSAndroid Build Coastguard Worker if (begin < end && *begin == '(') {
333*9356374aSAndroid Build Coastguard Worker const char* nan_begin = begin + 1;
334*9356374aSAndroid Build Coastguard Worker while (nan_begin < end && IsNanChar(*nan_begin)) {
335*9356374aSAndroid Build Coastguard Worker ++nan_begin;
336*9356374aSAndroid Build Coastguard Worker }
337*9356374aSAndroid Build Coastguard Worker if (nan_begin < end && *nan_begin == ')') {
338*9356374aSAndroid Build Coastguard Worker // We found an extra NaN specifier range
339*9356374aSAndroid Build Coastguard Worker out->subrange_begin = begin + 1;
340*9356374aSAndroid Build Coastguard Worker out->subrange_end = nan_begin;
341*9356374aSAndroid Build Coastguard Worker out->end = nan_begin + 1;
342*9356374aSAndroid Build Coastguard Worker }
343*9356374aSAndroid Build Coastguard Worker }
344*9356374aSAndroid Build Coastguard Worker return true;
345*9356374aSAndroid Build Coastguard Worker }
346*9356374aSAndroid Build Coastguard Worker default:
347*9356374aSAndroid Build Coastguard Worker return false;
348*9356374aSAndroid Build Coastguard Worker }
349*9356374aSAndroid Build Coastguard Worker }
350*9356374aSAndroid Build Coastguard Worker } // namespace
351*9356374aSAndroid Build Coastguard Worker
352*9356374aSAndroid Build Coastguard Worker namespace strings_internal {
353*9356374aSAndroid Build Coastguard Worker
354*9356374aSAndroid Build Coastguard Worker template <int base>
ParseFloat(const char * begin,const char * end,chars_format format_flags)355*9356374aSAndroid Build Coastguard Worker strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
356*9356374aSAndroid Build Coastguard Worker chars_format format_flags) {
357*9356374aSAndroid Build Coastguard Worker strings_internal::ParsedFloat result;
358*9356374aSAndroid Build Coastguard Worker
359*9356374aSAndroid Build Coastguard Worker // Exit early if we're given an empty range.
360*9356374aSAndroid Build Coastguard Worker if (begin == end) return result;
361*9356374aSAndroid Build Coastguard Worker
362*9356374aSAndroid Build Coastguard Worker // Handle the infinity and NaN cases.
363*9356374aSAndroid Build Coastguard Worker if (ParseInfinityOrNan(begin, end, &result)) {
364*9356374aSAndroid Build Coastguard Worker return result;
365*9356374aSAndroid Build Coastguard Worker }
366*9356374aSAndroid Build Coastguard Worker
367*9356374aSAndroid Build Coastguard Worker const char* const mantissa_begin = begin;
368*9356374aSAndroid Build Coastguard Worker while (begin < end && *begin == '0') {
369*9356374aSAndroid Build Coastguard Worker ++begin; // skip leading zeros
370*9356374aSAndroid Build Coastguard Worker }
371*9356374aSAndroid Build Coastguard Worker uint64_t mantissa = 0;
372*9356374aSAndroid Build Coastguard Worker
373*9356374aSAndroid Build Coastguard Worker int exponent_adjustment = 0;
374*9356374aSAndroid Build Coastguard Worker bool mantissa_is_inexact = false;
375*9356374aSAndroid Build Coastguard Worker int pre_decimal_digits = ConsumeDigits<base>(
376*9356374aSAndroid Build Coastguard Worker begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
377*9356374aSAndroid Build Coastguard Worker begin += pre_decimal_digits;
378*9356374aSAndroid Build Coastguard Worker int digits_left;
379*9356374aSAndroid Build Coastguard Worker if (pre_decimal_digits >= DigitLimit<base>()) {
380*9356374aSAndroid Build Coastguard Worker // refuse to parse pathological inputs
381*9356374aSAndroid Build Coastguard Worker return result;
382*9356374aSAndroid Build Coastguard Worker } else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
383*9356374aSAndroid Build Coastguard Worker // We dropped some non-fraction digits on the floor. Adjust our exponent
384*9356374aSAndroid Build Coastguard Worker // to compensate.
385*9356374aSAndroid Build Coastguard Worker exponent_adjustment =
386*9356374aSAndroid Build Coastguard Worker static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
387*9356374aSAndroid Build Coastguard Worker digits_left = 0;
388*9356374aSAndroid Build Coastguard Worker } else {
389*9356374aSAndroid Build Coastguard Worker digits_left =
390*9356374aSAndroid Build Coastguard Worker static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
391*9356374aSAndroid Build Coastguard Worker }
392*9356374aSAndroid Build Coastguard Worker if (begin < end && *begin == '.') {
393*9356374aSAndroid Build Coastguard Worker ++begin;
394*9356374aSAndroid Build Coastguard Worker if (mantissa == 0) {
395*9356374aSAndroid Build Coastguard Worker // If we haven't seen any nonzero digits yet, keep skipping zeros. We
396*9356374aSAndroid Build Coastguard Worker // have to adjust the exponent to reflect the changed place value.
397*9356374aSAndroid Build Coastguard Worker const char* begin_zeros = begin;
398*9356374aSAndroid Build Coastguard Worker while (begin < end && *begin == '0') {
399*9356374aSAndroid Build Coastguard Worker ++begin;
400*9356374aSAndroid Build Coastguard Worker }
401*9356374aSAndroid Build Coastguard Worker int zeros_skipped = static_cast<int>(begin - begin_zeros);
402*9356374aSAndroid Build Coastguard Worker if (zeros_skipped >= DigitLimit<base>()) {
403*9356374aSAndroid Build Coastguard Worker // refuse to parse pathological inputs
404*9356374aSAndroid Build Coastguard Worker return result;
405*9356374aSAndroid Build Coastguard Worker }
406*9356374aSAndroid Build Coastguard Worker exponent_adjustment -= static_cast<int>(zeros_skipped);
407*9356374aSAndroid Build Coastguard Worker }
408*9356374aSAndroid Build Coastguard Worker int post_decimal_digits = ConsumeDigits<base>(
409*9356374aSAndroid Build Coastguard Worker begin, end, digits_left, &mantissa, &mantissa_is_inexact);
410*9356374aSAndroid Build Coastguard Worker begin += post_decimal_digits;
411*9356374aSAndroid Build Coastguard Worker
412*9356374aSAndroid Build Coastguard Worker // Since `mantissa` is an integer, each significant digit we read after
413*9356374aSAndroid Build Coastguard Worker // the decimal point requires an adjustment to the exponent. "1.23e0" will
414*9356374aSAndroid Build Coastguard Worker // be stored as `mantissa` == 123 and `exponent` == -2 (that is,
415*9356374aSAndroid Build Coastguard Worker // "123e-2").
416*9356374aSAndroid Build Coastguard Worker if (post_decimal_digits >= DigitLimit<base>()) {
417*9356374aSAndroid Build Coastguard Worker // refuse to parse pathological inputs
418*9356374aSAndroid Build Coastguard Worker return result;
419*9356374aSAndroid Build Coastguard Worker } else if (post_decimal_digits > digits_left) {
420*9356374aSAndroid Build Coastguard Worker exponent_adjustment -= digits_left;
421*9356374aSAndroid Build Coastguard Worker } else {
422*9356374aSAndroid Build Coastguard Worker exponent_adjustment -= post_decimal_digits;
423*9356374aSAndroid Build Coastguard Worker }
424*9356374aSAndroid Build Coastguard Worker }
425*9356374aSAndroid Build Coastguard Worker // If we've found no mantissa whatsoever, this isn't a number.
426*9356374aSAndroid Build Coastguard Worker if (mantissa_begin == begin) {
427*9356374aSAndroid Build Coastguard Worker return result;
428*9356374aSAndroid Build Coastguard Worker }
429*9356374aSAndroid Build Coastguard Worker // A bare "." doesn't count as a mantissa either.
430*9356374aSAndroid Build Coastguard Worker if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
431*9356374aSAndroid Build Coastguard Worker return result;
432*9356374aSAndroid Build Coastguard Worker }
433*9356374aSAndroid Build Coastguard Worker
434*9356374aSAndroid Build Coastguard Worker if (mantissa_is_inexact) {
435*9356374aSAndroid Build Coastguard Worker // We dropped significant digits on the floor. Handle this appropriately.
436*9356374aSAndroid Build Coastguard Worker if (base == 10) {
437*9356374aSAndroid Build Coastguard Worker // If we truncated significant decimal digits, store the full range of the
438*9356374aSAndroid Build Coastguard Worker // mantissa for future big integer math for exact rounding.
439*9356374aSAndroid Build Coastguard Worker result.subrange_begin = mantissa_begin;
440*9356374aSAndroid Build Coastguard Worker result.subrange_end = begin;
441*9356374aSAndroid Build Coastguard Worker } else if (base == 16) {
442*9356374aSAndroid Build Coastguard Worker // If we truncated hex digits, reflect this fact by setting the low
443*9356374aSAndroid Build Coastguard Worker // ("sticky") bit. This allows for correct rounding in all cases.
444*9356374aSAndroid Build Coastguard Worker mantissa |= 1;
445*9356374aSAndroid Build Coastguard Worker }
446*9356374aSAndroid Build Coastguard Worker }
447*9356374aSAndroid Build Coastguard Worker result.mantissa = mantissa;
448*9356374aSAndroid Build Coastguard Worker
449*9356374aSAndroid Build Coastguard Worker const char* const exponent_begin = begin;
450*9356374aSAndroid Build Coastguard Worker result.literal_exponent = 0;
451*9356374aSAndroid Build Coastguard Worker bool found_exponent = false;
452*9356374aSAndroid Build Coastguard Worker if (AllowExponent(format_flags) && begin < end &&
453*9356374aSAndroid Build Coastguard Worker IsExponentCharacter<base>(*begin)) {
454*9356374aSAndroid Build Coastguard Worker bool negative_exponent = false;
455*9356374aSAndroid Build Coastguard Worker ++begin;
456*9356374aSAndroid Build Coastguard Worker if (begin < end && *begin == '-') {
457*9356374aSAndroid Build Coastguard Worker negative_exponent = true;
458*9356374aSAndroid Build Coastguard Worker ++begin;
459*9356374aSAndroid Build Coastguard Worker } else if (begin < end && *begin == '+') {
460*9356374aSAndroid Build Coastguard Worker ++begin;
461*9356374aSAndroid Build Coastguard Worker }
462*9356374aSAndroid Build Coastguard Worker const char* const exponent_digits_begin = begin;
463*9356374aSAndroid Build Coastguard Worker // Exponent is always expressed in decimal, even for hexadecimal floats.
464*9356374aSAndroid Build Coastguard Worker begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
465*9356374aSAndroid Build Coastguard Worker &result.literal_exponent, nullptr);
466*9356374aSAndroid Build Coastguard Worker if (begin == exponent_digits_begin) {
467*9356374aSAndroid Build Coastguard Worker // there were no digits where we expected an exponent. We failed to read
468*9356374aSAndroid Build Coastguard Worker // an exponent and should not consume the 'e' after all. Rewind 'begin'.
469*9356374aSAndroid Build Coastguard Worker found_exponent = false;
470*9356374aSAndroid Build Coastguard Worker begin = exponent_begin;
471*9356374aSAndroid Build Coastguard Worker } else {
472*9356374aSAndroid Build Coastguard Worker found_exponent = true;
473*9356374aSAndroid Build Coastguard Worker if (negative_exponent) {
474*9356374aSAndroid Build Coastguard Worker result.literal_exponent = -result.literal_exponent;
475*9356374aSAndroid Build Coastguard Worker }
476*9356374aSAndroid Build Coastguard Worker }
477*9356374aSAndroid Build Coastguard Worker }
478*9356374aSAndroid Build Coastguard Worker
479*9356374aSAndroid Build Coastguard Worker if (!found_exponent && RequireExponent(format_flags)) {
480*9356374aSAndroid Build Coastguard Worker // Provided flags required an exponent, but none was found. This results
481*9356374aSAndroid Build Coastguard Worker // in a failure to scan.
482*9356374aSAndroid Build Coastguard Worker return result;
483*9356374aSAndroid Build Coastguard Worker }
484*9356374aSAndroid Build Coastguard Worker
485*9356374aSAndroid Build Coastguard Worker // Success!
486*9356374aSAndroid Build Coastguard Worker result.type = strings_internal::FloatType::kNumber;
487*9356374aSAndroid Build Coastguard Worker if (result.mantissa > 0) {
488*9356374aSAndroid Build Coastguard Worker result.exponent = result.literal_exponent +
489*9356374aSAndroid Build Coastguard Worker (DigitMagnitude<base>() * exponent_adjustment);
490*9356374aSAndroid Build Coastguard Worker } else {
491*9356374aSAndroid Build Coastguard Worker result.exponent = 0;
492*9356374aSAndroid Build Coastguard Worker }
493*9356374aSAndroid Build Coastguard Worker result.end = begin;
494*9356374aSAndroid Build Coastguard Worker return result;
495*9356374aSAndroid Build Coastguard Worker }
496*9356374aSAndroid Build Coastguard Worker
497*9356374aSAndroid Build Coastguard Worker template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
498*9356374aSAndroid Build Coastguard Worker chars_format format_flags);
499*9356374aSAndroid Build Coastguard Worker template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
500*9356374aSAndroid Build Coastguard Worker chars_format format_flags);
501*9356374aSAndroid Build Coastguard Worker
502*9356374aSAndroid Build Coastguard Worker } // namespace strings_internal
503*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_END
504*9356374aSAndroid Build Coastguard Worker } // namespace absl
505