xref: /aosp_15_r20/external/angle/third_party/abseil-cpp/absl/strings/ascii.h (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2017 The Abseil Authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      https://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // -----------------------------------------------------------------------------
17 // File: ascii.h
18 // -----------------------------------------------------------------------------
19 //
20 // This package contains functions operating on characters and strings
21 // restricted to standard ASCII. These include character classification
22 // functions analogous to those found in the ANSI C Standard Library <ctype.h>
23 // header file.
24 //
25 // C++ implementations provide <ctype.h> functionality based on their
26 // C environment locale. In general, reliance on such a locale is not ideal, as
27 // the locale standard is problematic (and may not return invariant information
28 // for the same character set, for example). These `ascii_*()` functions are
29 // hard-wired for standard ASCII, much faster, and guaranteed to behave
30 // consistently.  They will never be overloaded, nor will their function
31 // signature change.
32 //
33 // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
34 // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
35 // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
36 // `ascii_isxdigit()`
37 //   Analogous to the <ctype.h> functions with similar names, these
38 //   functions take an unsigned char and return a bool, based on whether the
39 //   character matches the condition specified.
40 //
41 //   If the input character has a numerical value greater than 127, these
42 //   functions return `false`.
43 //
44 // `ascii_tolower()`, `ascii_toupper()`
45 //   Analogous to the <ctype.h> functions with similar names, these functions
46 //   take an unsigned char and return a char.
47 //
48 //   If the input character is not an ASCII {lower,upper}-case letter (including
49 //   numerical values greater than 127) then the functions return the same value
50 //   as the input character.
51 
52 #ifndef ABSL_STRINGS_ASCII_H_
53 #define ABSL_STRINGS_ASCII_H_
54 
55 #include <algorithm>
56 #include <cstddef>
57 #include <string>
58 #include <utility>
59 
60 #include "absl/base/attributes.h"
61 #include "absl/base/config.h"
62 #include "absl/base/nullability.h"
63 #include "absl/strings/internal/resize_uninitialized.h"
64 #include "absl/strings/string_view.h"
65 
66 namespace absl {
67 ABSL_NAMESPACE_BEGIN
68 namespace ascii_internal {
69 
70 // Declaration for an array of bitfields holding character information.
71 ABSL_DLL extern const unsigned char kPropertyBits[256];
72 
73 // Declaration for the array of characters to upper-case characters.
74 ABSL_DLL extern const char kToUpper[256];
75 
76 // Declaration for the array of characters to lower-case characters.
77 ABSL_DLL extern const char kToLower[256];
78 
79 void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
80                      size_t n);
81 
82 void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
83                      size_t n);
84 
85 }  // namespace ascii_internal
86 
87 // ascii_isalpha()
88 //
89 // Determines whether the given character is an alphabetic character.
ascii_isalpha(unsigned char c)90 inline bool ascii_isalpha(unsigned char c) {
91   return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
92 }
93 
94 // ascii_isalnum()
95 //
96 // Determines whether the given character is an alphanumeric character.
ascii_isalnum(unsigned char c)97 inline bool ascii_isalnum(unsigned char c) {
98   return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
99 }
100 
101 // ascii_isspace()
102 //
103 // Determines whether the given character is a whitespace character (space,
104 // tab, vertical tab, formfeed, linefeed, or carriage return).
ascii_isspace(unsigned char c)105 inline bool ascii_isspace(unsigned char c) {
106   return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
107 }
108 
109 // ascii_ispunct()
110 //
111 // Determines whether the given character is a punctuation character.
ascii_ispunct(unsigned char c)112 inline bool ascii_ispunct(unsigned char c) {
113   return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
114 }
115 
116 // ascii_isblank()
117 //
118 // Determines whether the given character is a blank character (tab or space).
ascii_isblank(unsigned char c)119 inline bool ascii_isblank(unsigned char c) {
120   return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
121 }
122 
123 // ascii_iscntrl()
124 //
125 // Determines whether the given character is a control character.
ascii_iscntrl(unsigned char c)126 inline bool ascii_iscntrl(unsigned char c) {
127   return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
128 }
129 
130 // ascii_isxdigit()
131 //
132 // Determines whether the given character can be represented as a hexadecimal
133 // digit character (i.e. {0-9} or {A-F}).
ascii_isxdigit(unsigned char c)134 inline bool ascii_isxdigit(unsigned char c) {
135   return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
136 }
137 
138 // ascii_isdigit()
139 //
140 // Determines whether the given character can be represented as a decimal
141 // digit character (i.e. {0-9}).
ascii_isdigit(unsigned char c)142 inline constexpr bool ascii_isdigit(unsigned char c) {
143   return c >= '0' && c <= '9';
144 }
145 
146 // ascii_isprint()
147 //
148 // Determines whether the given character is printable, including spaces.
ascii_isprint(unsigned char c)149 inline constexpr bool ascii_isprint(unsigned char c) {
150   return c >= 32 && c < 127;
151 }
152 
153 // ascii_isgraph()
154 //
155 // Determines whether the given character has a graphical representation.
ascii_isgraph(unsigned char c)156 inline constexpr bool ascii_isgraph(unsigned char c) {
157   return c > 32 && c < 127;
158 }
159 
160 // ascii_isupper()
161 //
162 // Determines whether the given character is uppercase.
ascii_isupper(unsigned char c)163 inline constexpr bool ascii_isupper(unsigned char c) {
164   return c >= 'A' && c <= 'Z';
165 }
166 
167 // ascii_islower()
168 //
169 // Determines whether the given character is lowercase.
ascii_islower(unsigned char c)170 inline constexpr bool ascii_islower(unsigned char c) {
171   return c >= 'a' && c <= 'z';
172 }
173 
174 // ascii_isascii()
175 //
176 // Determines whether the given character is ASCII.
ascii_isascii(unsigned char c)177 inline constexpr bool ascii_isascii(unsigned char c) { return c < 128; }
178 
179 // ascii_tolower()
180 //
181 // Returns an ASCII character, converting to lowercase if uppercase is
182 // passed. Note that character values > 127 are simply returned.
ascii_tolower(unsigned char c)183 inline char ascii_tolower(unsigned char c) {
184   return ascii_internal::kToLower[c];
185 }
186 
187 // Converts the characters in `s` to lowercase, changing the contents of `s`.
188 void AsciiStrToLower(absl::Nonnull<std::string*> s);
189 
190 // Creates a lowercase string from a given absl::string_view.
AsciiStrToLower(absl::string_view s)191 ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
192   std::string result;
193   strings_internal::STLStringResizeUninitialized(&result, s.size());
194   ascii_internal::AsciiStrToLower(&result[0], s.data(), s.size());
195   return result;
196 }
197 
198 // Creates a lowercase string from a given std::string&&.
199 //
200 // (Template is used to lower priority of this overload.)
201 template <int&... DoNotSpecify>
AsciiStrToLower(std::string && s)202 ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(std::string&& s) {
203   std::string result = std::move(s);
204   absl::AsciiStrToLower(&result);
205   return result;
206 }
207 
208 // ascii_toupper()
209 //
210 // Returns the ASCII character, converting to upper-case if lower-case is
211 // passed. Note that characters values > 127 are simply returned.
ascii_toupper(unsigned char c)212 inline char ascii_toupper(unsigned char c) {
213   return ascii_internal::kToUpper[c];
214 }
215 
216 // Converts the characters in `s` to uppercase, changing the contents of `s`.
217 void AsciiStrToUpper(absl::Nonnull<std::string*> s);
218 
219 // Creates an uppercase string from a given absl::string_view.
AsciiStrToUpper(absl::string_view s)220 ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
221   std::string result;
222   strings_internal::STLStringResizeUninitialized(&result, s.size());
223   ascii_internal::AsciiStrToUpper(&result[0], s.data(), s.size());
224   return result;
225 }
226 
227 // Creates an uppercase string from a given std::string&&.
228 //
229 // (Template is used to lower priority of this overload.)
230 template <int&... DoNotSpecify>
AsciiStrToUpper(std::string && s)231 ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(std::string&& s) {
232   std::string result = std::move(s);
233   absl::AsciiStrToUpper(&result);
234   return result;
235 }
236 
237 // Returns absl::string_view with whitespace stripped from the beginning of the
238 // given string_view.
StripLeadingAsciiWhitespace(absl::string_view str)239 ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
240     absl::string_view str) {
241   auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
242   return str.substr(static_cast<size_t>(it - str.begin()));
243 }
244 
245 // Strips in place whitespace from the beginning of the given string.
StripLeadingAsciiWhitespace(absl::Nonnull<std::string * > str)246 inline void StripLeadingAsciiWhitespace(absl::Nonnull<std::string*> str) {
247   auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
248   str->erase(str->begin(), it);
249 }
250 
251 // Returns absl::string_view with whitespace stripped from the end of the given
252 // string_view.
StripTrailingAsciiWhitespace(absl::string_view str)253 ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
254     absl::string_view str) {
255   auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
256   return str.substr(0, static_cast<size_t>(str.rend() - it));
257 }
258 
259 // Strips in place whitespace from the end of the given string
StripTrailingAsciiWhitespace(absl::Nonnull<std::string * > str)260 inline void StripTrailingAsciiWhitespace(absl::Nonnull<std::string*> str) {
261   auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
262   str->erase(static_cast<size_t>(str->rend() - it));
263 }
264 
265 // Returns absl::string_view with whitespace stripped from both ends of the
266 // given string_view.
StripAsciiWhitespace(absl::string_view str)267 ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
268     absl::string_view str) {
269   return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
270 }
271 
272 // Strips in place whitespace from both ends of the given string
StripAsciiWhitespace(absl::Nonnull<std::string * > str)273 inline void StripAsciiWhitespace(absl::Nonnull<std::string*> str) {
274   StripTrailingAsciiWhitespace(str);
275   StripLeadingAsciiWhitespace(str);
276 }
277 
278 // Removes leading, trailing, and consecutive internal whitespace.
279 void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str);
280 
281 ABSL_NAMESPACE_END
282 }  // namespace absl
283 
284 #endif  // ABSL_STRINGS_ASCII_H_
285