xref: /aosp_15_r20/external/cronet/base/i18n/char_iterator.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2011 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #ifndef BASE_I18N_CHAR_ITERATOR_H_
6*6777b538SAndroid Build Coastguard Worker #define BASE_I18N_CHAR_ITERATOR_H_
7*6777b538SAndroid Build Coastguard Worker 
8*6777b538SAndroid Build Coastguard Worker #include <stdint.h>
9*6777b538SAndroid Build Coastguard Worker 
10*6777b538SAndroid Build Coastguard Worker #include <string_view>
11*6777b538SAndroid Build Coastguard Worker 
12*6777b538SAndroid Build Coastguard Worker #include "base/i18n/base_i18n_export.h"
13*6777b538SAndroid Build Coastguard Worker 
14*6777b538SAndroid Build Coastguard Worker // The CharIterator classes iterate through the characters in UTF8 and
15*6777b538SAndroid Build Coastguard Worker // UTF16 strings.  Example usage:
16*6777b538SAndroid Build Coastguard Worker //
17*6777b538SAndroid Build Coastguard Worker //   for (UTF8CharIterator iter(str); !iter.end(); iter.Advance()) {
18*6777b538SAndroid Build Coastguard Worker //     VLOG(1) << iter.get();
19*6777b538SAndroid Build Coastguard Worker //   }
20*6777b538SAndroid Build Coastguard Worker 
21*6777b538SAndroid Build Coastguard Worker namespace base {
22*6777b538SAndroid Build Coastguard Worker namespace i18n {
23*6777b538SAndroid Build Coastguard Worker 
24*6777b538SAndroid Build Coastguard Worker class BASE_I18N_EXPORT UTF8CharIterator {
25*6777b538SAndroid Build Coastguard Worker  public:
26*6777b538SAndroid Build Coastguard Worker   // Requires |str| to live as long as the UTF8CharIterator does.
27*6777b538SAndroid Build Coastguard Worker   explicit UTF8CharIterator(std::string_view str);
28*6777b538SAndroid Build Coastguard Worker   UTF8CharIterator(const UTF8CharIterator&) = delete;
29*6777b538SAndroid Build Coastguard Worker   UTF8CharIterator& operator=(const UTF8CharIterator&) = delete;
30*6777b538SAndroid Build Coastguard Worker   ~UTF8CharIterator();
31*6777b538SAndroid Build Coastguard Worker 
32*6777b538SAndroid Build Coastguard Worker   // Return the starting array index of the current character within the
33*6777b538SAndroid Build Coastguard Worker   // string.
array_pos()34*6777b538SAndroid Build Coastguard Worker   size_t array_pos() const { return array_pos_; }
35*6777b538SAndroid Build Coastguard Worker 
36*6777b538SAndroid Build Coastguard Worker   // Return the logical index of the current character, independent of the
37*6777b538SAndroid Build Coastguard Worker   // number of bytes each character takes.
char_pos()38*6777b538SAndroid Build Coastguard Worker   size_t char_pos() const { return char_pos_; }
39*6777b538SAndroid Build Coastguard Worker 
40*6777b538SAndroid Build Coastguard Worker   // Return the current char.
get()41*6777b538SAndroid Build Coastguard Worker   int32_t get() const { return char_; }
42*6777b538SAndroid Build Coastguard Worker 
43*6777b538SAndroid Build Coastguard Worker   // Returns true if we're at the end of the string.
end()44*6777b538SAndroid Build Coastguard Worker   bool end() const { return array_pos_ == str_.length(); }
45*6777b538SAndroid Build Coastguard Worker 
46*6777b538SAndroid Build Coastguard Worker   // Advance to the next actual character.  Returns false if we're at the
47*6777b538SAndroid Build Coastguard Worker   // end of the string.
48*6777b538SAndroid Build Coastguard Worker   bool Advance();
49*6777b538SAndroid Build Coastguard Worker 
50*6777b538SAndroid Build Coastguard Worker  private:
51*6777b538SAndroid Build Coastguard Worker   // The string we're iterating over.
52*6777b538SAndroid Build Coastguard Worker   std::string_view str_;
53*6777b538SAndroid Build Coastguard Worker 
54*6777b538SAndroid Build Coastguard Worker   // Array index.
55*6777b538SAndroid Build Coastguard Worker   size_t array_pos_;
56*6777b538SAndroid Build Coastguard Worker 
57*6777b538SAndroid Build Coastguard Worker   // The next array index.
58*6777b538SAndroid Build Coastguard Worker   size_t next_pos_;
59*6777b538SAndroid Build Coastguard Worker 
60*6777b538SAndroid Build Coastguard Worker   // Character index.
61*6777b538SAndroid Build Coastguard Worker   size_t char_pos_;
62*6777b538SAndroid Build Coastguard Worker 
63*6777b538SAndroid Build Coastguard Worker   // The current character.
64*6777b538SAndroid Build Coastguard Worker   int32_t char_;
65*6777b538SAndroid Build Coastguard Worker };
66*6777b538SAndroid Build Coastguard Worker 
67*6777b538SAndroid Build Coastguard Worker class BASE_I18N_EXPORT UTF16CharIterator {
68*6777b538SAndroid Build Coastguard Worker  public:
69*6777b538SAndroid Build Coastguard Worker   // Requires |str| to live as long as the UTF16CharIterator does.
70*6777b538SAndroid Build Coastguard Worker   explicit UTF16CharIterator(std::u16string_view str);
71*6777b538SAndroid Build Coastguard Worker   UTF16CharIterator(UTF16CharIterator&& to_move);
72*6777b538SAndroid Build Coastguard Worker   UTF16CharIterator& operator=(UTF16CharIterator&& to_move);
73*6777b538SAndroid Build Coastguard Worker 
74*6777b538SAndroid Build Coastguard Worker   UTF16CharIterator(const UTF16CharIterator&) = delete;
75*6777b538SAndroid Build Coastguard Worker   UTF16CharIterator operator=(const UTF16CharIterator&) = delete;
76*6777b538SAndroid Build Coastguard Worker 
77*6777b538SAndroid Build Coastguard Worker   ~UTF16CharIterator();
78*6777b538SAndroid Build Coastguard Worker 
79*6777b538SAndroid Build Coastguard Worker   // Returns an iterator starting on the unicode character at offset
80*6777b538SAndroid Build Coastguard Worker   // |array_index| into the string, or the previous array offset if
81*6777b538SAndroid Build Coastguard Worker   // |array_index| is the second half of a surrogate pair.
82*6777b538SAndroid Build Coastguard Worker   static UTF16CharIterator LowerBound(std::u16string_view str,
83*6777b538SAndroid Build Coastguard Worker                                       size_t array_index);
84*6777b538SAndroid Build Coastguard Worker 
85*6777b538SAndroid Build Coastguard Worker   // Returns an iterator starting on the unicode character at offset
86*6777b538SAndroid Build Coastguard Worker   // |array_index| into the string, or the next offset if |array_index| is the
87*6777b538SAndroid Build Coastguard Worker   // second half of a surrogate pair.
88*6777b538SAndroid Build Coastguard Worker   static UTF16CharIterator UpperBound(std::u16string_view str,
89*6777b538SAndroid Build Coastguard Worker                                       size_t array_index);
90*6777b538SAndroid Build Coastguard Worker 
91*6777b538SAndroid Build Coastguard Worker   // Return the starting array index of the current character within the
92*6777b538SAndroid Build Coastguard Worker   // string.
array_pos()93*6777b538SAndroid Build Coastguard Worker   size_t array_pos() const { return array_pos_; }
94*6777b538SAndroid Build Coastguard Worker 
95*6777b538SAndroid Build Coastguard Worker   // Returns the offset in code points from the initial iterator position, which
96*6777b538SAndroid Build Coastguard Worker   // could be negative if Rewind() is called. The initial value is always zero,
97*6777b538SAndroid Build Coastguard Worker   // regardless of how the iterator is constructed.
char_offset()98*6777b538SAndroid Build Coastguard Worker   int32_t char_offset() const { return char_offset_; }
99*6777b538SAndroid Build Coastguard Worker 
100*6777b538SAndroid Build Coastguard Worker   // Returns the code point at the current position.
get()101*6777b538SAndroid Build Coastguard Worker   int32_t get() const { return char_; }
102*6777b538SAndroid Build Coastguard Worker 
103*6777b538SAndroid Build Coastguard Worker   // Returns the code point (i.e. the full Unicode character, not half of a
104*6777b538SAndroid Build Coastguard Worker   // surrogate pair) following the current one. Should not be called if end() is
105*6777b538SAndroid Build Coastguard Worker   // true. If the current code point is the last one in the string, returns
106*6777b538SAndroid Build Coastguard Worker   // zero.
107*6777b538SAndroid Build Coastguard Worker   int32_t NextCodePoint() const;
108*6777b538SAndroid Build Coastguard Worker 
109*6777b538SAndroid Build Coastguard Worker   // Returns the code point (i.e. the full Unicode character, not half of a
110*6777b538SAndroid Build Coastguard Worker   // surrogate pair) preceding the current one. Should not be called if start()
111*6777b538SAndroid Build Coastguard Worker   // is true.
112*6777b538SAndroid Build Coastguard Worker   int32_t PreviousCodePoint() const;
113*6777b538SAndroid Build Coastguard Worker 
114*6777b538SAndroid Build Coastguard Worker   // Returns true if we're at the start of the string.
start()115*6777b538SAndroid Build Coastguard Worker   bool start() const { return array_pos_ == 0; }
116*6777b538SAndroid Build Coastguard Worker 
117*6777b538SAndroid Build Coastguard Worker   // Returns true if we're at the end of the string.
end()118*6777b538SAndroid Build Coastguard Worker   bool end() const { return array_pos_ == str_.length(); }
119*6777b538SAndroid Build Coastguard Worker 
120*6777b538SAndroid Build Coastguard Worker   // Advances to the next actual character.  Returns false if we're at the
121*6777b538SAndroid Build Coastguard Worker   // end of the string.
122*6777b538SAndroid Build Coastguard Worker   bool Advance();
123*6777b538SAndroid Build Coastguard Worker 
124*6777b538SAndroid Build Coastguard Worker   // Moves to the previous actual character. Returns false if we're at the start
125*6777b538SAndroid Build Coastguard Worker   // of the string.
126*6777b538SAndroid Build Coastguard Worker   bool Rewind();
127*6777b538SAndroid Build Coastguard Worker 
128*6777b538SAndroid Build Coastguard Worker  private:
129*6777b538SAndroid Build Coastguard Worker   UTF16CharIterator(std::u16string_view str, size_t initial_pos);
130*6777b538SAndroid Build Coastguard Worker 
131*6777b538SAndroid Build Coastguard Worker   // Fills in the current character we found and advances to the next
132*6777b538SAndroid Build Coastguard Worker   // character, updating all flags as necessary.
133*6777b538SAndroid Build Coastguard Worker   void ReadChar();
134*6777b538SAndroid Build Coastguard Worker 
135*6777b538SAndroid Build Coastguard Worker   // The string we're iterating over.
136*6777b538SAndroid Build Coastguard Worker   std::u16string_view str_;
137*6777b538SAndroid Build Coastguard Worker 
138*6777b538SAndroid Build Coastguard Worker   // Array index.
139*6777b538SAndroid Build Coastguard Worker   size_t array_pos_;
140*6777b538SAndroid Build Coastguard Worker 
141*6777b538SAndroid Build Coastguard Worker   // The next array index.
142*6777b538SAndroid Build Coastguard Worker   size_t next_pos_;
143*6777b538SAndroid Build Coastguard Worker 
144*6777b538SAndroid Build Coastguard Worker   // Character offset from the initial position of the iterator.
145*6777b538SAndroid Build Coastguard Worker   int32_t char_offset_;
146*6777b538SAndroid Build Coastguard Worker 
147*6777b538SAndroid Build Coastguard Worker   // The current character.
148*6777b538SAndroid Build Coastguard Worker   int32_t char_;
149*6777b538SAndroid Build Coastguard Worker };
150*6777b538SAndroid Build Coastguard Worker 
151*6777b538SAndroid Build Coastguard Worker }  // namespace i18n
152*6777b538SAndroid Build Coastguard Worker }  // namespace base
153*6777b538SAndroid Build Coastguard Worker 
154*6777b538SAndroid Build Coastguard Worker #endif  // BASE_I18N_CHAR_ITERATOR_H_
155