xref: /aosp_15_r20/external/llvm-libc/src/__support/str_to_integer.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- String to integer conversion utils ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // -----------------------------------------------------------------------------
10 //                               **** WARNING ****
11 // This file is shared with libc++. You should also be careful when adding
12 // dependencies to this file, since it needs to build for all libc++ targets.
13 // -----------------------------------------------------------------------------
14 
15 #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
16 #define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
17 
18 #include "src/__support/CPP/limits.h"
19 #include "src/__support/CPP/type_traits.h"
20 #include "src/__support/CPP/type_traits/make_unsigned.h"
21 #include "src/__support/big_int.h"
22 #include "src/__support/common.h"
23 #include "src/__support/ctype_utils.h"
24 #include "src/__support/macros/config.h"
25 #include "src/__support/str_to_num_result.h"
26 #include "src/__support/uint128.h"
27 #include "src/errno/libc_errno.h" // For ERANGE
28 
29 namespace LIBC_NAMESPACE_DECL {
30 namespace internal {
31 
32 // Returns a pointer to the first character in src that is not a whitespace
33 // character (as determined by isspace())
34 // TODO: Change from returning a pointer to returning a length.
35 LIBC_INLINE const char *
36 first_non_whitespace(const char *__restrict src,
37                      size_t src_len = cpp::numeric_limits<size_t>::max()) {
38   size_t src_cur = 0;
39   while (src_cur < src_len && internal::isspace(src[src_cur])) {
40     ++src_cur;
41   }
42   return src + src_cur;
43 }
44 
b36_char_to_int(char input)45 LIBC_INLINE int b36_char_to_int(char input) {
46   if (isdigit(input))
47     return input - '0';
48   if (isalpha(input))
49     return (input | 32) + 10 - 'a';
50   return 0;
51 }
52 
53 // checks if the next 3 characters of the string pointer are the start of a
54 // hexadecimal number. Does not advance the string pointer.
55 LIBC_INLINE bool
56 is_hex_start(const char *__restrict src,
57              size_t src_len = cpp::numeric_limits<size_t>::max()) {
58   if (src_len < 3)
59     return false;
60   return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
61          b36_char_to_int(*(src + 2)) < 16;
62 }
63 
64 // Takes the address of the string pointer and parses the base from the start of
65 // it.
infer_base(const char * __restrict src,size_t src_len)66 LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
67   // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
68   // sequence of the decimal digits and the letters a (or A) through f (or F)
69   // with values 10 through 15 respectively." (C standard 6.4.4.1)
70   if (is_hex_start(src, src_len))
71     return 16;
72   // An octal number is defined as "the prefix 0 optionally followed by a
73   // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
74   // number that starts with 0, including just 0, is an octal number.
75   if (src_len > 0 && src[0] == '0')
76     return 8;
77   // A decimal number is defined as beginning "with a nonzero digit and
78   // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
79   return 10;
80 }
81 
82 // -----------------------------------------------------------------------------
83 //                               **** WARNING ****
84 // This interface is shared with libc++, if you change this interface you need
85 // to update it in both libc and libc++.
86 // -----------------------------------------------------------------------------
87 // Takes a pointer to a string and the base to convert to. This function is used
88 // as the backend for all of the string to int functions.
89 template <class T>
90 LIBC_INLINE StrToNumResult<T>
91 strtointeger(const char *__restrict src, int base,
92              const size_t src_len = cpp::numeric_limits<size_t>::max()) {
93   using ResultType = make_integral_or_big_int_unsigned_t<T>;
94 
95   ResultType result = 0;
96 
97   bool is_number = false;
98   size_t src_cur = 0;
99   int error_val = 0;
100 
101   if (src_len == 0)
102     return {0, 0, 0};
103 
104   if (base < 0 || base == 1 || base > 36)
105     return {0, 0, EINVAL};
106 
107   src_cur = first_non_whitespace(src, src_len) - src;
108 
109   char result_sign = '+';
110   if (src[src_cur] == '+' || src[src_cur] == '-') {
111     result_sign = src[src_cur];
112     ++src_cur;
113   }
114 
115   if (base == 0)
116     base = infer_base(src + src_cur, src_len - src_cur);
117 
118   if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
119     src_cur = src_cur + 2;
120 
121   constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
122   const bool is_positive = (result_sign == '+');
123 
124   ResultType constexpr NEGATIVE_MAX =
125       !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
126                    : cpp::numeric_limits<T>::max();
127   ResultType const abs_max =
128       (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
129   ResultType const abs_max_div_by_base =
130       static_cast<ResultType>(abs_max / base);
131 
132   while (src_cur < src_len && isalnum(src[src_cur])) {
133     int cur_digit = b36_char_to_int(src[src_cur]);
134     if (cur_digit >= base)
135       break;
136 
137     is_number = true;
138     ++src_cur;
139 
140     // If the number has already hit the maximum value for the current type then
141     // the result cannot change, but we still need to advance src to the end of
142     // the number.
143     if (result == abs_max) {
144       error_val = ERANGE;
145       continue;
146     }
147 
148     if (result > abs_max_div_by_base) {
149       result = abs_max;
150       error_val = ERANGE;
151     } else {
152       result = static_cast<ResultType>(result * base);
153     }
154     if (result > abs_max - cur_digit) {
155       result = abs_max;
156       error_val = ERANGE;
157     } else {
158       result = static_cast<ResultType>(result + cur_digit);
159     }
160   }
161 
162   ptrdiff_t str_len = is_number ? (src_cur) : 0;
163 
164   if (error_val == ERANGE) {
165     if (is_positive || IS_UNSIGNED)
166       return {cpp::numeric_limits<T>::max(), str_len, error_val};
167     else // T is signed and there is a negative overflow
168       return {cpp::numeric_limits<T>::min(), str_len, error_val};
169   }
170 
171   return {static_cast<T>(is_positive ? result : -result), str_len, error_val};
172 }
173 
174 } // namespace internal
175 } // namespace LIBC_NAMESPACE_DECL
176 
177 #endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
178