xref: /aosp_15_r20/external/abseil-cpp/absl/strings/charset.h (revision 9356374a3709195abf420251b3e825997ff56c0f)
1 // Copyright 2022 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // -----------------------------------------------------------------------------
16 // File: charset.h
17 // -----------------------------------------------------------------------------
18 //
19 // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
20 // characters.
21 //
22 // Instances can be initialized as constexpr constants. For example:
23 //
24 //   constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
25 //   constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
26 //   constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
27 //
28 // Multiple instances can be combined that still forms a constexpr expression.
29 // For example:
30 //
31 //   constexpr absl::CharSet kLettersAndNumbers =
32 //       absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
33 //
34 // Several pre-defined character classes are available that mirror the methods
35 // from <cctype>. For example:
36 //
37 //   constexpr absl::CharSet kLettersAndWhitespace =
38 //       absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
39 //
40 // To check membership, use the .contains method, e.g.
41 //
42 //   absl::CharSet hex_letters("abcdef");
43 //   hex_letters.contains('a');  // true
44 //   hex_letters.contains('g');  // false
45 
46 #ifndef ABSL_STRINGS_CHARSET_H_
47 #define ABSL_STRINGS_CHARSET_H_
48 
49 #include <cstddef>
50 #include <cstdint>
51 #include <cstring>
52 
53 #include "absl/base/macros.h"
54 #include "absl/base/port.h"
55 #include "absl/strings/string_view.h"
56 
57 namespace absl {
58 
59 class CharSet {
60  public:
CharSet()61   constexpr CharSet() : m_() {}
62 
63   // Initializes with a given string_view.
CharSet(absl::string_view str)64   constexpr explicit CharSet(absl::string_view str) : m_() {
65     for (char c : str) {
66       SetChar(static_cast<unsigned char>(c));
67     }
68   }
69 
contains(char c)70   constexpr bool contains(char c) const {
71     return ((m_[static_cast<unsigned char>(c) / 64] >>
72              (static_cast<unsigned char>(c) % 64)) &
73             0x1) == 0x1;
74   }
75 
empty()76   constexpr bool empty() const {
77     for (uint64_t c : m_) {
78       if (c != 0) return false;
79     }
80     return true;
81   }
82 
83   // Containing only a single specified char.
Char(char x)84   static constexpr CharSet Char(char x) {
85     return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
86                    CharMaskForWord(x, 2), CharMaskForWord(x, 3));
87   }
88 
89   // Containing all the chars in the closed interval [lo,hi].
Range(char lo,char hi)90   static constexpr CharSet Range(char lo, char hi) {
91     return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
92                    RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
93   }
94 
95   friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
96     return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
97                    a.m_[3] & b.m_[3]);
98   }
99 
100   friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
101     return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
102                    a.m_[3] | b.m_[3]);
103   }
104 
105   friend constexpr CharSet operator~(const CharSet& a) {
106     return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
107   }
108 
109   // Mirrors the char-classifying predicates in <cctype>.
AsciiUppercase()110   static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
AsciiLowercase()111   static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
AsciiDigits()112   static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
AsciiAlphabet()113   static constexpr CharSet AsciiAlphabet() {
114     return AsciiLowercase() | AsciiUppercase();
115   }
AsciiAlphanumerics()116   static constexpr CharSet AsciiAlphanumerics() {
117     return AsciiDigits() | AsciiAlphabet();
118   }
AsciiHexDigits()119   static constexpr CharSet AsciiHexDigits() {
120     return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
121   }
AsciiPrintable()122   static constexpr CharSet AsciiPrintable() {
123     return CharSet::Range(0x20, 0x7e);
124   }
AsciiWhitespace()125   static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
AsciiPunctuation()126   static constexpr CharSet AsciiPunctuation() {
127     return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
128   }
129 
130  private:
CharSet(uint64_t b0,uint64_t b1,uint64_t b2,uint64_t b3)131   constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
132       : m_{b0, b1, b2, b3} {}
133 
RangeForWord(char lo,char hi,uint64_t word)134   static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
135     return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
136            ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
137   }
138 
139   // All the chars in the specified word of the range [0, upper).
OpenRangeFromZeroForWord(uint64_t upper,uint64_t word)140   static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
141                                                      uint64_t word) {
142     return (upper <= 64 * word) ? 0
143            : (upper >= 64 * (word + 1))
144                ? ~static_cast<uint64_t>(0)
145                : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
146   }
147 
CharMaskForWord(char x,uint64_t word)148   static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
149     return (static_cast<unsigned char>(x) / 64 == word)
150                ? (static_cast<uint64_t>(1)
151                   << (static_cast<unsigned char>(x) % 64))
152                : 0;
153   }
154 
SetChar(unsigned char c)155   constexpr void SetChar(unsigned char c) {
156     m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
157   }
158 
159   uint64_t m_[4];
160 };
161 
162 }  // namespace absl
163 
164 #endif  // ABSL_STRINGS_CHARSET_H_
165