1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
16 #define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
17
18 #include <algorithm>
19 #include <cstdint>
20 #include <iostream>
21 #include <string>
22
23 #include "absl/base/config.h"
24 #include "absl/strings/ascii.h"
25 #include "absl/strings/internal/charconv_parse.h"
26 #include "absl/strings/string_view.h"
27
28 namespace absl {
29 ABSL_NAMESPACE_BEGIN
30 namespace strings_internal {
31
32 // The largest power that 5 that can be raised to, and still fit in a uint32_t.
33 constexpr int kMaxSmallPowerOfFive = 13;
34 // The largest power that 10 that can be raised to, and still fit in a uint32_t.
35 constexpr int kMaxSmallPowerOfTen = 9;
36
37 ABSL_DLL extern const uint32_t
38 kFiveToNth[kMaxSmallPowerOfFive + 1];
39 ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
40
41 // Large, fixed-width unsigned integer.
42 //
43 // Exact rounding for decimal-to-binary floating point conversion requires very
44 // large integer math, but a design goal of absl::from_chars is to avoid
45 // allocating memory. The integer precision needed for decimal-to-binary
46 // conversions is large but bounded, so a huge fixed-width integer class
47 // suffices.
48 //
49 // This is an intentionally limited big integer class. Only needed operations
50 // are implemented. All storage lives in an array data member, and all
51 // arithmetic is done in-place, to avoid requiring separate storage for operand
52 // and result.
53 //
54 // This is an internal class. Some methods live in the .cc file, and are
55 // instantiated only for the values of max_words we need.
56 template <int max_words>
57 class BigUnsigned {
58 public:
59 static_assert(max_words == 4 || max_words == 84,
60 "unsupported max_words value");
61
BigUnsigned()62 BigUnsigned() : size_(0), words_{} {}
BigUnsigned(uint64_t v)63 explicit constexpr BigUnsigned(uint64_t v)
64 : size_((v >> 32) ? 2 : v ? 1 : 0),
65 words_{static_cast<uint32_t>(v & 0xffffffffu),
66 static_cast<uint32_t>(v >> 32)} {}
67
68 // Constructs a BigUnsigned from the given string_view containing a decimal
69 // value. If the input string is not a decimal integer, constructs a 0
70 // instead.
BigUnsigned(absl::string_view sv)71 explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
72 // Check for valid input, returning a 0 otherwise. This is reasonable
73 // behavior only because this constructor is for unit tests.
74 if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
75 sv.empty()) {
76 return;
77 }
78 int exponent_adjust =
79 ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
80 if (exponent_adjust > 0) {
81 MultiplyByTenToTheNth(exponent_adjust);
82 }
83 }
84
85 // Loads the mantissa value of a previously-parsed float.
86 //
87 // Returns the associated decimal exponent. The value of the parsed float is
88 // exactly *this * 10**exponent.
89 int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
90
91 // Returns the number of decimal digits of precision this type provides. All
92 // numbers with this many decimal digits or fewer are representable by this
93 // type.
94 //
95 // Analogous to std::numeric_limits<BigUnsigned>::digits10.
Digits10()96 static constexpr int Digits10() {
97 // 9975007/1035508 is very slightly less than log10(2**32).
98 return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
99 }
100
101 // Shifts left by the given number of bits.
ShiftLeft(int count)102 void ShiftLeft(int count) {
103 if (count > 0) {
104 const int word_shift = count / 32;
105 if (word_shift >= max_words) {
106 SetToZero();
107 return;
108 }
109 size_ = (std::min)(size_ + word_shift, max_words);
110 count %= 32;
111 if (count == 0) {
112 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=warray-bounds
113 // shows a lot of bogus -Warray-bounds warnings under GCC.
114 // This is not the only one in Abseil.
115 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(14, 0)
116 #pragma GCC diagnostic push
117 #pragma GCC diagnostic ignored "-Warray-bounds"
118 #endif
119 std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
120 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(14, 0)
121 #pragma GCC diagnostic pop
122 #endif
123 } else {
124 for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {
125 words_[i] = (words_[i - word_shift] << count) |
126 (words_[i - word_shift - 1] >> (32 - count));
127 }
128 words_[word_shift] = words_[0] << count;
129 // Grow size_ if necessary.
130 if (size_ < max_words && words_[size_]) {
131 ++size_;
132 }
133 }
134 std::fill_n(words_, word_shift, 0u);
135 }
136 }
137
138
139 // Multiplies by v in-place.
MultiplyBy(uint32_t v)140 void MultiplyBy(uint32_t v) {
141 if (size_ == 0 || v == 1) {
142 return;
143 }
144 if (v == 0) {
145 SetToZero();
146 return;
147 }
148 const uint64_t factor = v;
149 uint64_t window = 0;
150 for (int i = 0; i < size_; ++i) {
151 window += factor * words_[i];
152 words_[i] = window & 0xffffffff;
153 window >>= 32;
154 }
155 // If carry bits remain and there's space for them, grow size_.
156 if (window && size_ < max_words) {
157 words_[size_] = window & 0xffffffff;
158 ++size_;
159 }
160 }
161
MultiplyBy(uint64_t v)162 void MultiplyBy(uint64_t v) {
163 uint32_t words[2];
164 words[0] = static_cast<uint32_t>(v);
165 words[1] = static_cast<uint32_t>(v >> 32);
166 if (words[1] == 0) {
167 MultiplyBy(words[0]);
168 } else {
169 MultiplyBy(2, words);
170 }
171 }
172
173 // Multiplies in place by 5 to the power of n. n must be non-negative.
MultiplyByFiveToTheNth(int n)174 void MultiplyByFiveToTheNth(int n) {
175 while (n >= kMaxSmallPowerOfFive) {
176 MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
177 n -= kMaxSmallPowerOfFive;
178 }
179 if (n > 0) {
180 MultiplyBy(kFiveToNth[n]);
181 }
182 }
183
184 // Multiplies in place by 10 to the power of n. n must be non-negative.
MultiplyByTenToTheNth(int n)185 void MultiplyByTenToTheNth(int n) {
186 if (n > kMaxSmallPowerOfTen) {
187 // For large n, raise to a power of 5, then shift left by the same amount.
188 // (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
189 MultiplyByFiveToTheNth(n);
190 ShiftLeft(n);
191 } else if (n > 0) {
192 // We can do this more quickly for very small N by using a single
193 // multiplication.
194 MultiplyBy(kTenToNth[n]);
195 }
196 }
197
198 // Returns the value of 5**n, for non-negative n. This implementation uses
199 // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
200 // MultiplyByFiveToTheNth().
201 static BigUnsigned FiveToTheNth(int n);
202
203 // Multiplies by another BigUnsigned, in-place.
204 template <int M>
MultiplyBy(const BigUnsigned<M> & other)205 void MultiplyBy(const BigUnsigned<M>& other) {
206 MultiplyBy(other.size(), other.words());
207 }
208
SetToZero()209 void SetToZero() {
210 std::fill_n(words_, size_, 0u);
211 size_ = 0;
212 }
213
214 // Returns the value of the nth word of this BigUnsigned. This is
215 // range-checked, and returns 0 on out-of-bounds accesses.
GetWord(int index)216 uint32_t GetWord(int index) const {
217 if (index < 0 || index >= size_) {
218 return 0;
219 }
220 return words_[index];
221 }
222
223 // Returns this integer as a decimal string. This is not used in the decimal-
224 // to-binary conversion; it is intended to aid in testing.
225 std::string ToString() const;
226
size()227 int size() const { return size_; }
words()228 const uint32_t* words() const { return words_; }
229
230 private:
231 // Reads the number between [begin, end), possibly containing a decimal point,
232 // into this BigUnsigned.
233 //
234 // Callers are required to ensure [begin, end) contains a valid number, with
235 // one or more decimal digits and at most one decimal point. This routine
236 // will behave unpredictably if these preconditions are not met.
237 //
238 // Only the first `significant_digits` digits are read. Digits beyond this
239 // limit are "sticky": If the final significant digit is 0 or 5, and if any
240 // dropped digit is nonzero, then that final significant digit is adjusted up
241 // to 1 or 6. This adjustment allows for precise rounding.
242 //
243 // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
244 // account for the decimal point and for dropped significant digits. After
245 // this function returns,
246 // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
247 int ReadDigits(const char* begin, const char* end, int significant_digits);
248
249 // Performs a step of big integer multiplication. This computes the full
250 // (64-bit-wide) values that should be added at the given index (step), and
251 // adds to that location in-place.
252 //
253 // Because our math all occurs in place, we must multiply starting from the
254 // highest word working downward. (This is a bit more expensive due to the
255 // extra carries involved.)
256 //
257 // This must be called in steps, for each word to be calculated, starting from
258 // the high end and working down to 0. The first value of `step` should be
259 // `std::min(original_size + other.size_ - 2, max_words - 1)`.
260 // The reason for this expression is that multiplying the i'th word from one
261 // multiplicand and the j'th word of another multiplicand creates a
262 // two-word-wide value to be stored at the (i+j)'th element. The highest
263 // word indices we will access are `original_size - 1` from this object, and
264 // `other.size_ - 1` from our operand. Therefore,
265 // `original_size + other.size_ - 2` is the first step we should calculate,
266 // but limited on an upper bound by max_words.
267
268 // Working from high-to-low ensures that we do not overwrite the portions of
269 // the initial value of *this which are still needed for later steps.
270 //
271 // Once called with step == 0, *this contains the result of the
272 // multiplication.
273 //
274 // `original_size` is the size_ of *this before the first call to
275 // MultiplyStep(). `other_words` and `other_size` are the contents of our
276 // operand. `step` is the step to perform, as described above.
277 void MultiplyStep(int original_size, const uint32_t* other_words,
278 int other_size, int step);
279
MultiplyBy(int other_size,const uint32_t * other_words)280 void MultiplyBy(int other_size, const uint32_t* other_words) {
281 const int original_size = size_;
282 const int first_step =
283 (std::min)(original_size + other_size - 2, max_words - 1);
284 for (int step = first_step; step >= 0; --step) {
285 MultiplyStep(original_size, other_words, other_size, step);
286 }
287 }
288
289 // Adds a 32-bit value to the index'th word, with carry.
AddWithCarry(int index,uint32_t value)290 void AddWithCarry(int index, uint32_t value) {
291 if (value) {
292 while (index < max_words && value > 0) {
293 words_[index] += value;
294 // carry if we overflowed in this word:
295 if (value > words_[index]) {
296 value = 1;
297 ++index;
298 } else {
299 value = 0;
300 }
301 }
302 size_ = (std::min)(max_words, (std::max)(index + 1, size_));
303 }
304 }
305
AddWithCarry(int index,uint64_t value)306 void AddWithCarry(int index, uint64_t value) {
307 if (value && index < max_words) {
308 uint32_t high = value >> 32;
309 uint32_t low = value & 0xffffffff;
310 words_[index] += low;
311 if (words_[index] < low) {
312 ++high;
313 if (high == 0) {
314 // Carry from the low word caused our high word to overflow.
315 // Short circuit here to do the right thing.
316 AddWithCarry(index + 2, static_cast<uint32_t>(1));
317 return;
318 }
319 }
320 if (high > 0) {
321 AddWithCarry(index + 1, high);
322 } else {
323 // Normally 32-bit AddWithCarry() sets size_, but since we don't call
324 // it when `high` is 0, do it ourselves here.
325 size_ = (std::min)(max_words, (std::max)(index + 1, size_));
326 }
327 }
328 }
329
330 // Divide this in place by a constant divisor. Returns the remainder of the
331 // division.
332 template <uint32_t divisor>
DivMod()333 uint32_t DivMod() {
334 uint64_t accumulator = 0;
335 for (int i = size_ - 1; i >= 0; --i) {
336 accumulator <<= 32;
337 accumulator += words_[i];
338 // accumulator / divisor will never overflow an int32_t in this loop
339 words_[i] = static_cast<uint32_t>(accumulator / divisor);
340 accumulator = accumulator % divisor;
341 }
342 while (size_ > 0 && words_[size_ - 1] == 0) {
343 --size_;
344 }
345 return static_cast<uint32_t>(accumulator);
346 }
347
348 // The number of elements in words_ that may carry significant values.
349 // All elements beyond this point are 0.
350 //
351 // When size_ is 0, this BigUnsigned stores the value 0.
352 // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
353 // nonzero. This can occur due to overflow truncation.
354 // In particular, x.size_ != y.size_ does *not* imply x != y.
355 int size_;
356 uint32_t words_[max_words];
357 };
358
359 // Compares two big integer instances.
360 //
361 // Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
362 template <int N, int M>
Compare(const BigUnsigned<N> & lhs,const BigUnsigned<M> & rhs)363 int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
364 int limit = (std::max)(lhs.size(), rhs.size());
365 for (int i = limit - 1; i >= 0; --i) {
366 const uint32_t lhs_word = lhs.GetWord(i);
367 const uint32_t rhs_word = rhs.GetWord(i);
368 if (lhs_word < rhs_word) {
369 return -1;
370 } else if (lhs_word > rhs_word) {
371 return 1;
372 }
373 }
374 return 0;
375 }
376
377 template <int N, int M>
378 bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
379 int limit = (std::max)(lhs.size(), rhs.size());
380 for (int i = 0; i < limit; ++i) {
381 if (lhs.GetWord(i) != rhs.GetWord(i)) {
382 return false;
383 }
384 }
385 return true;
386 }
387
388 template <int N, int M>
389 bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
390 return !(lhs == rhs);
391 }
392
393 template <int N, int M>
394 bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
395 return Compare(lhs, rhs) == -1;
396 }
397
398 template <int N, int M>
399 bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
400 return rhs < lhs;
401 }
402 template <int N, int M>
403 bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
404 return !(rhs < lhs);
405 }
406 template <int N, int M>
407 bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
408 return !(lhs < rhs);
409 }
410
411 // Output operator for BigUnsigned, for testing purposes only.
412 template <int N>
413 std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
414 return os << num.ToString();
415 }
416
417 // Explicit instantiation declarations for the sizes of BigUnsigned that we
418 // are using.
419 //
420 // For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
421 // still bigger than an int128, and 84 is a large value we will want to use
422 // in the from_chars implementation.
423 //
424 // Comments justifying the use of 84 belong in the from_chars implementation,
425 // and will be added in a follow-up CL.
426 extern template class BigUnsigned<4>;
427 extern template class BigUnsigned<84>;
428
429 } // namespace strings_internal
430 ABSL_NAMESPACE_END
431 } // namespace absl
432
433 #endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
434