xref: /aosp_15_r20/external/pdfium/core/fxcrt/code_point_view.h (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2023 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CORE_FXCRT_CODE_POINT_VIEW_H_
6 #define CORE_FXCRT_CODE_POINT_VIEW_H_
7 
8 #include "build/build_config.h"
9 #include "core/fxcrt/string_view_template.h"
10 #include "core/fxcrt/utf16.h"
11 #include "third_party/base/check_op.h"
12 
13 namespace pdfium {
14 
15 #if defined(WCHAR_T_IS_UTF16)
16 // A view over a UTF-16 `WideStringView` suitable for iterating by code point
17 // using a range-based `for` loop.
18 class CodePointView final {
19  public:
20   class Iterator {
21    public:
22     bool operator==(const Iterator& other) const {
23       return current_ == other.current_;
24     }
25 
26     bool operator!=(const Iterator& other) const {
27       return current_ != other.current_;
28     }
29 
30     Iterator& operator++() {
31       DCHECK_LT(current_, end_);
32       current_ += IsSupplementary(code_point_) ? 2 : 1;
33       code_point_ = Decode();
34       return *this;
35     }
36 
37     char32_t operator*() const {
38       DCHECK_NE(kSentinel, code_point_);
39       return code_point_;
40     }
41 
42    private:
43     friend class CodePointView;
44 
45     static constexpr char32_t kSentinel = -1;
46 
Iterator(const wchar_t * begin,const wchar_t * end)47     Iterator(const wchar_t* begin, const wchar_t* end)
48         : current_(begin), end_(end), code_point_(Decode()) {}
49 
Decode()50     char32_t Decode() {
51       if (current_ >= end_) {
52         return kSentinel;
53       }
54 
55       char32_t code_point = *current_;
56       if (IsHighSurrogate(code_point)) {
57         const wchar_t* next = current_ + 1;
58         if (next < end_ && IsLowSurrogate(*next)) {
59           code_point = SurrogatePair(code_point, *next).ToCodePoint();
60         }
61       }
62 
63       return code_point;
64     }
65 
66     const wchar_t* current_;
67     const wchar_t* end_;
68     char32_t code_point_;
69   };
70 
CodePointView(WideStringView backing)71   explicit CodePointView(WideStringView backing)
72       : begin_(backing.begin()), end_(backing.end()) {
73     DCHECK_LE(begin_, end_);
74   }
75 
begin()76   Iterator begin() const { return Iterator(begin_, end_); }
77 
end()78   Iterator end() const { return Iterator(end_, end_); }
79 
80  private:
81   // Note that a `WideStringView` member would make the constructor too complex.
82   const wchar_t* begin_;
83   const wchar_t* end_;
84 };
85 #else
86 using CodePointView = WideStringView;
87 #endif  // defined(WCHAR_T_IS_UTF16)
88 
89 }  // namespace pdfium
90 
91 #endif  // CORE_FXCRT_CODE_POINT_VIEW_H_
92