1 // Copyright 2023 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CORE_FXCRT_CODE_POINT_VIEW_H_ 6 #define CORE_FXCRT_CODE_POINT_VIEW_H_ 7 8 #include "build/build_config.h" 9 #include "core/fxcrt/string_view_template.h" 10 #include "core/fxcrt/utf16.h" 11 #include "third_party/base/check_op.h" 12 13 namespace pdfium { 14 15 #if defined(WCHAR_T_IS_UTF16) 16 // A view over a UTF-16 `WideStringView` suitable for iterating by code point 17 // using a range-based `for` loop. 18 class CodePointView final { 19 public: 20 class Iterator { 21 public: 22 bool operator==(const Iterator& other) const { 23 return current_ == other.current_; 24 } 25 26 bool operator!=(const Iterator& other) const { 27 return current_ != other.current_; 28 } 29 30 Iterator& operator++() { 31 DCHECK_LT(current_, end_); 32 current_ += IsSupplementary(code_point_) ? 2 : 1; 33 code_point_ = Decode(); 34 return *this; 35 } 36 37 char32_t operator*() const { 38 DCHECK_NE(kSentinel, code_point_); 39 return code_point_; 40 } 41 42 private: 43 friend class CodePointView; 44 45 static constexpr char32_t kSentinel = -1; 46 Iterator(const wchar_t * begin,const wchar_t * end)47 Iterator(const wchar_t* begin, const wchar_t* end) 48 : current_(begin), end_(end), code_point_(Decode()) {} 49 Decode()50 char32_t Decode() { 51 if (current_ >= end_) { 52 return kSentinel; 53 } 54 55 char32_t code_point = *current_; 56 if (IsHighSurrogate(code_point)) { 57 const wchar_t* next = current_ + 1; 58 if (next < end_ && IsLowSurrogate(*next)) { 59 code_point = SurrogatePair(code_point, *next).ToCodePoint(); 60 } 61 } 62 63 return code_point; 64 } 65 66 const wchar_t* current_; 67 const wchar_t* end_; 68 char32_t code_point_; 69 }; 70 CodePointView(WideStringView backing)71 explicit CodePointView(WideStringView backing) 72 : begin_(backing.begin()), end_(backing.end()) { 73 DCHECK_LE(begin_, end_); 74 } 75 begin()76 Iterator begin() const { return Iterator(begin_, end_); } 77 end()78 Iterator end() const { return Iterator(end_, end_); } 79 80 private: 81 // Note that a `WideStringView` member would make the constructor too complex. 82 const wchar_t* begin_; 83 const wchar_t* end_; 84 }; 85 #else 86 using CodePointView = WideStringView; 87 #endif // defined(WCHAR_T_IS_UTF16) 88 89 } // namespace pdfium 90 91 #endif // CORE_FXCRT_CODE_POINT_VIEW_H_ 92