1*635a8641SAndroid Build Coastguard Worker // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be 3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file. 4*635a8641SAndroid Build Coastguard Worker 5*635a8641SAndroid Build Coastguard Worker #ifndef BASE_I18N_CHAR_ITERATOR_H_ 6*635a8641SAndroid Build Coastguard Worker #define BASE_I18N_CHAR_ITERATOR_H_ 7*635a8641SAndroid Build Coastguard Worker 8*635a8641SAndroid Build Coastguard Worker #include <stddef.h> 9*635a8641SAndroid Build Coastguard Worker #include <stdint.h> 10*635a8641SAndroid Build Coastguard Worker 11*635a8641SAndroid Build Coastguard Worker #include <string> 12*635a8641SAndroid Build Coastguard Worker 13*635a8641SAndroid Build Coastguard Worker #include "base/i18n/base_i18n_export.h" 14*635a8641SAndroid Build Coastguard Worker #include "base/macros.h" 15*635a8641SAndroid Build Coastguard Worker #include "base/strings/string16.h" 16*635a8641SAndroid Build Coastguard Worker #include "build/build_config.h" 17*635a8641SAndroid Build Coastguard Worker 18*635a8641SAndroid Build Coastguard Worker // The CharIterator classes iterate through the characters in UTF8 and 19*635a8641SAndroid Build Coastguard Worker // UTF16 strings. Example usage: 20*635a8641SAndroid Build Coastguard Worker // 21*635a8641SAndroid Build Coastguard Worker // UTF8CharIterator iter(&str); 22*635a8641SAndroid Build Coastguard Worker // while (!iter.end()) { 23*635a8641SAndroid Build Coastguard Worker // VLOG(1) << iter.get(); 24*635a8641SAndroid Build Coastguard Worker // iter.Advance(); 25*635a8641SAndroid Build Coastguard Worker // } 26*635a8641SAndroid Build Coastguard Worker 27*635a8641SAndroid Build Coastguard Worker #if defined(OS_WIN) 28*635a8641SAndroid Build Coastguard Worker typedef unsigned char uint8_t; 29*635a8641SAndroid Build Coastguard Worker #endif 30*635a8641SAndroid Build Coastguard Worker 31*635a8641SAndroid Build Coastguard Worker namespace base { 32*635a8641SAndroid Build Coastguard Worker namespace i18n { 33*635a8641SAndroid Build Coastguard Worker 34*635a8641SAndroid Build Coastguard Worker class BASE_I18N_EXPORT UTF8CharIterator { 35*635a8641SAndroid Build Coastguard Worker public: 36*635a8641SAndroid Build Coastguard Worker // Requires |str| to live as long as the UTF8CharIterator does. 37*635a8641SAndroid Build Coastguard Worker explicit UTF8CharIterator(const std::string* str); 38*635a8641SAndroid Build Coastguard Worker ~UTF8CharIterator(); 39*635a8641SAndroid Build Coastguard Worker 40*635a8641SAndroid Build Coastguard Worker // Return the starting array index of the current character within the 41*635a8641SAndroid Build Coastguard Worker // string. array_pos()42*635a8641SAndroid Build Coastguard Worker int32_t array_pos() const { return array_pos_; } 43*635a8641SAndroid Build Coastguard Worker 44*635a8641SAndroid Build Coastguard Worker // Return the logical index of the current character, independent of the 45*635a8641SAndroid Build Coastguard Worker // number of bytes each character takes. char_pos()46*635a8641SAndroid Build Coastguard Worker int32_t char_pos() const { return char_pos_; } 47*635a8641SAndroid Build Coastguard Worker 48*635a8641SAndroid Build Coastguard Worker // Return the current char. get()49*635a8641SAndroid Build Coastguard Worker int32_t get() const { return char_; } 50*635a8641SAndroid Build Coastguard Worker 51*635a8641SAndroid Build Coastguard Worker // Returns true if we're at the end of the string. end()52*635a8641SAndroid Build Coastguard Worker bool end() const { return array_pos_ == len_; } 53*635a8641SAndroid Build Coastguard Worker 54*635a8641SAndroid Build Coastguard Worker // Advance to the next actual character. Returns false if we're at the 55*635a8641SAndroid Build Coastguard Worker // end of the string. 56*635a8641SAndroid Build Coastguard Worker bool Advance(); 57*635a8641SAndroid Build Coastguard Worker 58*635a8641SAndroid Build Coastguard Worker private: 59*635a8641SAndroid Build Coastguard Worker // The string we're iterating over. 60*635a8641SAndroid Build Coastguard Worker const uint8_t* str_; 61*635a8641SAndroid Build Coastguard Worker 62*635a8641SAndroid Build Coastguard Worker // The length of the encoded string. 63*635a8641SAndroid Build Coastguard Worker int32_t len_; 64*635a8641SAndroid Build Coastguard Worker 65*635a8641SAndroid Build Coastguard Worker // Array index. 66*635a8641SAndroid Build Coastguard Worker int32_t array_pos_; 67*635a8641SAndroid Build Coastguard Worker 68*635a8641SAndroid Build Coastguard Worker // The next array index. 69*635a8641SAndroid Build Coastguard Worker int32_t next_pos_; 70*635a8641SAndroid Build Coastguard Worker 71*635a8641SAndroid Build Coastguard Worker // Character index. 72*635a8641SAndroid Build Coastguard Worker int32_t char_pos_; 73*635a8641SAndroid Build Coastguard Worker 74*635a8641SAndroid Build Coastguard Worker // The current character. 75*635a8641SAndroid Build Coastguard Worker int32_t char_; 76*635a8641SAndroid Build Coastguard Worker 77*635a8641SAndroid Build Coastguard Worker DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator); 78*635a8641SAndroid Build Coastguard Worker }; 79*635a8641SAndroid Build Coastguard Worker 80*635a8641SAndroid Build Coastguard Worker class BASE_I18N_EXPORT UTF16CharIterator { 81*635a8641SAndroid Build Coastguard Worker public: 82*635a8641SAndroid Build Coastguard Worker // Requires |str| to live as long as the UTF16CharIterator does. 83*635a8641SAndroid Build Coastguard Worker explicit UTF16CharIterator(const string16* str); 84*635a8641SAndroid Build Coastguard Worker UTF16CharIterator(const char16* str, size_t str_len); 85*635a8641SAndroid Build Coastguard Worker ~UTF16CharIterator(); 86*635a8641SAndroid Build Coastguard Worker 87*635a8641SAndroid Build Coastguard Worker // Return the starting array index of the current character within the 88*635a8641SAndroid Build Coastguard Worker // string. array_pos()89*635a8641SAndroid Build Coastguard Worker int32_t array_pos() const { return array_pos_; } 90*635a8641SAndroid Build Coastguard Worker 91*635a8641SAndroid Build Coastguard Worker // Return the logical index of the current character, independent of the 92*635a8641SAndroid Build Coastguard Worker // number of codewords each character takes. char_pos()93*635a8641SAndroid Build Coastguard Worker int32_t char_pos() const { return char_pos_; } 94*635a8641SAndroid Build Coastguard Worker 95*635a8641SAndroid Build Coastguard Worker // Return the current char. get()96*635a8641SAndroid Build Coastguard Worker int32_t get() const { return char_; } 97*635a8641SAndroid Build Coastguard Worker 98*635a8641SAndroid Build Coastguard Worker // Returns true if we're at the end of the string. end()99*635a8641SAndroid Build Coastguard Worker bool end() const { return array_pos_ == len_; } 100*635a8641SAndroid Build Coastguard Worker 101*635a8641SAndroid Build Coastguard Worker // Advance to the next actual character. Returns false if we're at the 102*635a8641SAndroid Build Coastguard Worker // end of the string. 103*635a8641SAndroid Build Coastguard Worker bool Advance(); 104*635a8641SAndroid Build Coastguard Worker 105*635a8641SAndroid Build Coastguard Worker private: 106*635a8641SAndroid Build Coastguard Worker // Fills in the current character we found and advances to the next 107*635a8641SAndroid Build Coastguard Worker // character, updating all flags as necessary. 108*635a8641SAndroid Build Coastguard Worker void ReadChar(); 109*635a8641SAndroid Build Coastguard Worker 110*635a8641SAndroid Build Coastguard Worker // The string we're iterating over. 111*635a8641SAndroid Build Coastguard Worker const char16* str_; 112*635a8641SAndroid Build Coastguard Worker 113*635a8641SAndroid Build Coastguard Worker // The length of the encoded string. 114*635a8641SAndroid Build Coastguard Worker int32_t len_; 115*635a8641SAndroid Build Coastguard Worker 116*635a8641SAndroid Build Coastguard Worker // Array index. 117*635a8641SAndroid Build Coastguard Worker int32_t array_pos_; 118*635a8641SAndroid Build Coastguard Worker 119*635a8641SAndroid Build Coastguard Worker // The next array index. 120*635a8641SAndroid Build Coastguard Worker int32_t next_pos_; 121*635a8641SAndroid Build Coastguard Worker 122*635a8641SAndroid Build Coastguard Worker // Character index. 123*635a8641SAndroid Build Coastguard Worker int32_t char_pos_; 124*635a8641SAndroid Build Coastguard Worker 125*635a8641SAndroid Build Coastguard Worker // The current character. 126*635a8641SAndroid Build Coastguard Worker int32_t char_; 127*635a8641SAndroid Build Coastguard Worker 128*635a8641SAndroid Build Coastguard Worker DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator); 129*635a8641SAndroid Build Coastguard Worker }; 130*635a8641SAndroid Build Coastguard Worker 131*635a8641SAndroid Build Coastguard Worker } // namespace i18n 132*635a8641SAndroid Build Coastguard Worker } // namespace base 133*635a8641SAndroid Build Coastguard Worker 134*635a8641SAndroid Build Coastguard Worker #endif // BASE_I18N_CHAR_ITERATOR_H_ 135