xref: /aosp_15_r20/external/libchrome/base/i18n/char_iterator.h (revision 635a864187cb8b6c713ff48b7e790a6b21769273)
1*635a8641SAndroid Build Coastguard Worker // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file.
4*635a8641SAndroid Build Coastguard Worker 
5*635a8641SAndroid Build Coastguard Worker #ifndef BASE_I18N_CHAR_ITERATOR_H_
6*635a8641SAndroid Build Coastguard Worker #define BASE_I18N_CHAR_ITERATOR_H_
7*635a8641SAndroid Build Coastguard Worker 
8*635a8641SAndroid Build Coastguard Worker #include <stddef.h>
9*635a8641SAndroid Build Coastguard Worker #include <stdint.h>
10*635a8641SAndroid Build Coastguard Worker 
11*635a8641SAndroid Build Coastguard Worker #include <string>
12*635a8641SAndroid Build Coastguard Worker 
13*635a8641SAndroid Build Coastguard Worker #include "base/i18n/base_i18n_export.h"
14*635a8641SAndroid Build Coastguard Worker #include "base/macros.h"
15*635a8641SAndroid Build Coastguard Worker #include "base/strings/string16.h"
16*635a8641SAndroid Build Coastguard Worker #include "build/build_config.h"
17*635a8641SAndroid Build Coastguard Worker 
18*635a8641SAndroid Build Coastguard Worker // The CharIterator classes iterate through the characters in UTF8 and
19*635a8641SAndroid Build Coastguard Worker // UTF16 strings.  Example usage:
20*635a8641SAndroid Build Coastguard Worker //
21*635a8641SAndroid Build Coastguard Worker //   UTF8CharIterator iter(&str);
22*635a8641SAndroid Build Coastguard Worker //   while (!iter.end()) {
23*635a8641SAndroid Build Coastguard Worker //     VLOG(1) << iter.get();
24*635a8641SAndroid Build Coastguard Worker //     iter.Advance();
25*635a8641SAndroid Build Coastguard Worker //   }
26*635a8641SAndroid Build Coastguard Worker 
27*635a8641SAndroid Build Coastguard Worker #if defined(OS_WIN)
28*635a8641SAndroid Build Coastguard Worker typedef unsigned char uint8_t;
29*635a8641SAndroid Build Coastguard Worker #endif
30*635a8641SAndroid Build Coastguard Worker 
31*635a8641SAndroid Build Coastguard Worker namespace base {
32*635a8641SAndroid Build Coastguard Worker namespace i18n {
33*635a8641SAndroid Build Coastguard Worker 
34*635a8641SAndroid Build Coastguard Worker class BASE_I18N_EXPORT UTF8CharIterator {
35*635a8641SAndroid Build Coastguard Worker  public:
36*635a8641SAndroid Build Coastguard Worker   // Requires |str| to live as long as the UTF8CharIterator does.
37*635a8641SAndroid Build Coastguard Worker   explicit UTF8CharIterator(const std::string* str);
38*635a8641SAndroid Build Coastguard Worker   ~UTF8CharIterator();
39*635a8641SAndroid Build Coastguard Worker 
40*635a8641SAndroid Build Coastguard Worker   // Return the starting array index of the current character within the
41*635a8641SAndroid Build Coastguard Worker   // string.
array_pos()42*635a8641SAndroid Build Coastguard Worker   int32_t array_pos() const { return array_pos_; }
43*635a8641SAndroid Build Coastguard Worker 
44*635a8641SAndroid Build Coastguard Worker   // Return the logical index of the current character, independent of the
45*635a8641SAndroid Build Coastguard Worker   // number of bytes each character takes.
char_pos()46*635a8641SAndroid Build Coastguard Worker   int32_t char_pos() const { return char_pos_; }
47*635a8641SAndroid Build Coastguard Worker 
48*635a8641SAndroid Build Coastguard Worker   // Return the current char.
get()49*635a8641SAndroid Build Coastguard Worker   int32_t get() const { return char_; }
50*635a8641SAndroid Build Coastguard Worker 
51*635a8641SAndroid Build Coastguard Worker   // Returns true if we're at the end of the string.
end()52*635a8641SAndroid Build Coastguard Worker   bool end() const { return array_pos_ == len_; }
53*635a8641SAndroid Build Coastguard Worker 
54*635a8641SAndroid Build Coastguard Worker   // Advance to the next actual character.  Returns false if we're at the
55*635a8641SAndroid Build Coastguard Worker   // end of the string.
56*635a8641SAndroid Build Coastguard Worker   bool Advance();
57*635a8641SAndroid Build Coastguard Worker 
58*635a8641SAndroid Build Coastguard Worker  private:
59*635a8641SAndroid Build Coastguard Worker   // The string we're iterating over.
60*635a8641SAndroid Build Coastguard Worker   const uint8_t* str_;
61*635a8641SAndroid Build Coastguard Worker 
62*635a8641SAndroid Build Coastguard Worker   // The length of the encoded string.
63*635a8641SAndroid Build Coastguard Worker   int32_t len_;
64*635a8641SAndroid Build Coastguard Worker 
65*635a8641SAndroid Build Coastguard Worker   // Array index.
66*635a8641SAndroid Build Coastguard Worker   int32_t array_pos_;
67*635a8641SAndroid Build Coastguard Worker 
68*635a8641SAndroid Build Coastguard Worker   // The next array index.
69*635a8641SAndroid Build Coastguard Worker   int32_t next_pos_;
70*635a8641SAndroid Build Coastguard Worker 
71*635a8641SAndroid Build Coastguard Worker   // Character index.
72*635a8641SAndroid Build Coastguard Worker   int32_t char_pos_;
73*635a8641SAndroid Build Coastguard Worker 
74*635a8641SAndroid Build Coastguard Worker   // The current character.
75*635a8641SAndroid Build Coastguard Worker   int32_t char_;
76*635a8641SAndroid Build Coastguard Worker 
77*635a8641SAndroid Build Coastguard Worker   DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator);
78*635a8641SAndroid Build Coastguard Worker };
79*635a8641SAndroid Build Coastguard Worker 
80*635a8641SAndroid Build Coastguard Worker class BASE_I18N_EXPORT UTF16CharIterator {
81*635a8641SAndroid Build Coastguard Worker  public:
82*635a8641SAndroid Build Coastguard Worker   // Requires |str| to live as long as the UTF16CharIterator does.
83*635a8641SAndroid Build Coastguard Worker   explicit UTF16CharIterator(const string16* str);
84*635a8641SAndroid Build Coastguard Worker   UTF16CharIterator(const char16* str, size_t str_len);
85*635a8641SAndroid Build Coastguard Worker   ~UTF16CharIterator();
86*635a8641SAndroid Build Coastguard Worker 
87*635a8641SAndroid Build Coastguard Worker   // Return the starting array index of the current character within the
88*635a8641SAndroid Build Coastguard Worker   // string.
array_pos()89*635a8641SAndroid Build Coastguard Worker   int32_t array_pos() const { return array_pos_; }
90*635a8641SAndroid Build Coastguard Worker 
91*635a8641SAndroid Build Coastguard Worker   // Return the logical index of the current character, independent of the
92*635a8641SAndroid Build Coastguard Worker   // number of codewords each character takes.
char_pos()93*635a8641SAndroid Build Coastguard Worker   int32_t char_pos() const { return char_pos_; }
94*635a8641SAndroid Build Coastguard Worker 
95*635a8641SAndroid Build Coastguard Worker   // Return the current char.
get()96*635a8641SAndroid Build Coastguard Worker   int32_t get() const { return char_; }
97*635a8641SAndroid Build Coastguard Worker 
98*635a8641SAndroid Build Coastguard Worker   // Returns true if we're at the end of the string.
end()99*635a8641SAndroid Build Coastguard Worker   bool end() const { return array_pos_ == len_; }
100*635a8641SAndroid Build Coastguard Worker 
101*635a8641SAndroid Build Coastguard Worker   // Advance to the next actual character.  Returns false if we're at the
102*635a8641SAndroid Build Coastguard Worker   // end of the string.
103*635a8641SAndroid Build Coastguard Worker   bool Advance();
104*635a8641SAndroid Build Coastguard Worker 
105*635a8641SAndroid Build Coastguard Worker  private:
106*635a8641SAndroid Build Coastguard Worker   // Fills in the current character we found and advances to the next
107*635a8641SAndroid Build Coastguard Worker   // character, updating all flags as necessary.
108*635a8641SAndroid Build Coastguard Worker   void ReadChar();
109*635a8641SAndroid Build Coastguard Worker 
110*635a8641SAndroid Build Coastguard Worker   // The string we're iterating over.
111*635a8641SAndroid Build Coastguard Worker   const char16* str_;
112*635a8641SAndroid Build Coastguard Worker 
113*635a8641SAndroid Build Coastguard Worker   // The length of the encoded string.
114*635a8641SAndroid Build Coastguard Worker   int32_t len_;
115*635a8641SAndroid Build Coastguard Worker 
116*635a8641SAndroid Build Coastguard Worker   // Array index.
117*635a8641SAndroid Build Coastguard Worker   int32_t array_pos_;
118*635a8641SAndroid Build Coastguard Worker 
119*635a8641SAndroid Build Coastguard Worker   // The next array index.
120*635a8641SAndroid Build Coastguard Worker   int32_t next_pos_;
121*635a8641SAndroid Build Coastguard Worker 
122*635a8641SAndroid Build Coastguard Worker   // Character index.
123*635a8641SAndroid Build Coastguard Worker   int32_t char_pos_;
124*635a8641SAndroid Build Coastguard Worker 
125*635a8641SAndroid Build Coastguard Worker   // The current character.
126*635a8641SAndroid Build Coastguard Worker   int32_t char_;
127*635a8641SAndroid Build Coastguard Worker 
128*635a8641SAndroid Build Coastguard Worker   DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator);
129*635a8641SAndroid Build Coastguard Worker };
130*635a8641SAndroid Build Coastguard Worker 
131*635a8641SAndroid Build Coastguard Worker }  // namespace i18n
132*635a8641SAndroid Build Coastguard Worker }  // namespace base
133*635a8641SAndroid Build Coastguard Worker 
134*635a8641SAndroid Build Coastguard Worker #endif  // BASE_I18N_CHAR_ITERATOR_H_
135