xref: /aosp_15_r20/external/skia/modules/skunicode/src/SkUnicode_icu4x.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1*c8dee2aaSAndroid Build Coastguard Worker /*
2*c8dee2aaSAndroid Build Coastguard Worker * Copyright 2023 Google Inc.
3*c8dee2aaSAndroid Build Coastguard Worker *
4*c8dee2aaSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license that can be
5*c8dee2aaSAndroid Build Coastguard Worker * found in the LICENSE file.
6*c8dee2aaSAndroid Build Coastguard Worker */
7*c8dee2aaSAndroid Build Coastguard Worker #include "modules/skunicode/include/SkUnicode_icu4x.h"
8*c8dee2aaSAndroid Build Coastguard Worker 
9*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkSpan.h"
10*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkString.h"
11*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkTypes.h"
12*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkTArray.h"
13*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkTo.h"
14*c8dee2aaSAndroid Build Coastguard Worker #include "modules/skunicode/include/SkUnicode.h"
15*c8dee2aaSAndroid Build Coastguard Worker #include "modules/skunicode/src/SkUnicode_hardcoded.h"
16*c8dee2aaSAndroid Build Coastguard Worker #include "src/base/SkBitmaskEnum.h"
17*c8dee2aaSAndroid Build Coastguard Worker #include "src/base/SkUTF.h"
18*c8dee2aaSAndroid Build Coastguard Worker 
19*c8dee2aaSAndroid Build Coastguard Worker #include <ICU4XBidi.hpp>
20*c8dee2aaSAndroid Build Coastguard Worker #include <ICU4XCaseMapper.hpp>
21*c8dee2aaSAndroid Build Coastguard Worker #include <ICU4XCodePointMapData8.hpp>
22*c8dee2aaSAndroid Build Coastguard Worker #include <ICU4XCodePointSetData.hpp>
23*c8dee2aaSAndroid Build Coastguard Worker #include <ICU4XDataProvider.hpp>
24*c8dee2aaSAndroid Build Coastguard Worker #include <ICU4XGraphemeClusterSegmenter.hpp>
25*c8dee2aaSAndroid Build Coastguard Worker #include <ICU4XLineSegmenter.hpp>
26*c8dee2aaSAndroid Build Coastguard Worker #include <ICU4XWordSegmenter.hpp>
27*c8dee2aaSAndroid Build Coastguard Worker 
28*c8dee2aaSAndroid Build Coastguard Worker #include <algorithm>
29*c8dee2aaSAndroid Build Coastguard Worker #include <cstdint>
30*c8dee2aaSAndroid Build Coastguard Worker #include <memory>
31*c8dee2aaSAndroid Build Coastguard Worker #include <string>
32*c8dee2aaSAndroid Build Coastguard Worker #include <utility>
33*c8dee2aaSAndroid Build Coastguard Worker #include <vector>
34*c8dee2aaSAndroid Build Coastguard Worker 
35*c8dee2aaSAndroid Build Coastguard Worker class SkUnicode_icu4x :  public SkUnicode {
36*c8dee2aaSAndroid Build Coastguard Worker public:
SkUnicode_icu4x()37*c8dee2aaSAndroid Build Coastguard Worker     SkUnicode_icu4x() {
38*c8dee2aaSAndroid Build Coastguard Worker         fLocale = ICU4XLocale::create_from_string("tr").ok().value();
39*c8dee2aaSAndroid Build Coastguard Worker         fDataProvider = ICU4XDataProvider::create_compiled();
40*c8dee2aaSAndroid Build Coastguard Worker         fCaseMapper = ICU4XCaseMapper::create(fDataProvider).ok().value();
41*c8dee2aaSAndroid Build Coastguard Worker         const auto general = ICU4XCodePointMapData8::load_general_category(fDataProvider).ok().value();
42*c8dee2aaSAndroid Build Coastguard Worker         fControls = general.get_set_for_value(/*Control*/15);
43*c8dee2aaSAndroid Build Coastguard Worker         fWhitespaces = general.get_set_for_value(/*SpaceSeparator*/12);
44*c8dee2aaSAndroid Build Coastguard Worker         fSpaces = general.get_set_for_value(/*SpaceSeparator*/12);
45*c8dee2aaSAndroid Build Coastguard Worker         // TODO: u_isSpace
46*c8dee2aaSAndroid Build Coastguard Worker         fBlanks = ICU4XCodePointSetData::load_blank(fDataProvider).ok().value();
47*c8dee2aaSAndroid Build Coastguard Worker         fEmoji = ICU4XCodePointSetData::load_emoji(fDataProvider).ok().value();
48*c8dee2aaSAndroid Build Coastguard Worker         fEmojiComponent = ICU4XCodePointSetData::load_emoji_component(fDataProvider).ok().value();
49*c8dee2aaSAndroid Build Coastguard Worker         fEmojiModifier = ICU4XCodePointSetData::load_emoji_modifier(fDataProvider).ok().value();
50*c8dee2aaSAndroid Build Coastguard Worker         fEmojiModifierBase = ICU4XCodePointSetData::load_emoji_modifier_base(fDataProvider).ok().value();
51*c8dee2aaSAndroid Build Coastguard Worker         fEmoji = ICU4XCodePointSetData::load_emoji(fDataProvider).ok().value();
52*c8dee2aaSAndroid Build Coastguard Worker         fRegionalIndicator = ICU4XCodePointSetData::load_regional_indicator(fDataProvider).ok().value();
53*c8dee2aaSAndroid Build Coastguard Worker         fIdeographic = ICU4XCodePointSetData::load_ideographic(fDataProvider).ok().value();
54*c8dee2aaSAndroid Build Coastguard Worker         fLineBreaks = ICU4XCodePointMapData8::load_line_break(fDataProvider).ok().value();
55*c8dee2aaSAndroid Build Coastguard Worker     }
56*c8dee2aaSAndroid Build Coastguard Worker 
57*c8dee2aaSAndroid Build Coastguard Worker     ~SkUnicode_icu4x() override = default;
58*c8dee2aaSAndroid Build Coastguard Worker 
59*c8dee2aaSAndroid Build Coastguard Worker     void reset();
60*c8dee2aaSAndroid Build Coastguard Worker 
61*c8dee2aaSAndroid Build Coastguard Worker     // SkUnicode properties
isControl(SkUnichar utf8)62*c8dee2aaSAndroid Build Coastguard Worker     bool isControl(SkUnichar utf8) override { return fControls.contains(utf8); }
isWhitespace(SkUnichar utf8)63*c8dee2aaSAndroid Build Coastguard Worker     bool isWhitespace(SkUnichar utf8) override { return fWhitespaces.contains(utf8); }
isSpace(SkUnichar utf8)64*c8dee2aaSAndroid Build Coastguard Worker     bool isSpace(SkUnichar utf8) override { return fBlanks.contains(utf8); }
isHardBreak(SkUnichar utf8)65*c8dee2aaSAndroid Build Coastguard Worker     bool isHardBreak(SkUnichar utf8) override {
66*c8dee2aaSAndroid Build Coastguard Worker         auto value = fLineBreaks.get(utf8);
67*c8dee2aaSAndroid Build Coastguard Worker         return (value == /*MandatoryBreak*/6) ||
68*c8dee2aaSAndroid Build Coastguard Worker                (value == /*CarriageReturn*/10) ||
69*c8dee2aaSAndroid Build Coastguard Worker                (value == /*LineFeed*/17) ||
70*c8dee2aaSAndroid Build Coastguard Worker                (value == /*NextLine*/29);
71*c8dee2aaSAndroid Build Coastguard Worker     }
isEmoji(SkUnichar utf8)72*c8dee2aaSAndroid Build Coastguard Worker     bool isEmoji(SkUnichar utf8) override { return fEmoji.contains(utf8); }
isEmojiComponent(SkUnichar utf8)73*c8dee2aaSAndroid Build Coastguard Worker     bool isEmojiComponent(SkUnichar utf8) override { return fEmojiComponent.contains(utf8); }
isEmojiModifierBase(SkUnichar utf8)74*c8dee2aaSAndroid Build Coastguard Worker     bool isEmojiModifierBase(SkUnichar utf8) override { return fEmojiModifierBase.contains(utf8); }
isEmojiModifier(SkUnichar utf8)75*c8dee2aaSAndroid Build Coastguard Worker     bool isEmojiModifier(SkUnichar utf8) override { return fEmojiModifier.contains(utf8); }
isRegionalIndicator(SkUnichar utf8)76*c8dee2aaSAndroid Build Coastguard Worker     bool isRegionalIndicator(SkUnichar utf8) override { return fRegionalIndicator.contains(utf8); }
isIdeographic(SkUnichar utf8)77*c8dee2aaSAndroid Build Coastguard Worker     bool isIdeographic(SkUnichar utf8) override { return fIdeographic.contains(utf8); }
78*c8dee2aaSAndroid Build Coastguard Worker 
79*c8dee2aaSAndroid Build Coastguard Worker     // TODO: is there a check for tabulation
isTabulation(SkUnichar utf8)80*c8dee2aaSAndroid Build Coastguard Worker     bool isTabulation(SkUnichar utf8) override {
81*c8dee2aaSAndroid Build Coastguard Worker         return utf8 == '\t';
82*c8dee2aaSAndroid Build Coastguard Worker     }
83*c8dee2aaSAndroid Build Coastguard Worker 
84*c8dee2aaSAndroid Build Coastguard Worker     // For SkShaper
85*c8dee2aaSAndroid Build Coastguard Worker     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
86*c8dee2aaSAndroid Build Coastguard Worker                                                      SkBidiIterator::Direction dir) override;
87*c8dee2aaSAndroid Build Coastguard Worker     std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
88*c8dee2aaSAndroid Build Coastguard Worker                                                      int count,
89*c8dee2aaSAndroid Build Coastguard Worker                                                      SkBidiIterator::Direction dir) override;
90*c8dee2aaSAndroid Build Coastguard Worker     std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
91*c8dee2aaSAndroid Build Coastguard Worker                                                        BreakType breakType) override;
92*c8dee2aaSAndroid Build Coastguard Worker     std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override;
93*c8dee2aaSAndroid Build Coastguard Worker     // For SkParagraph
getBidiRegions(const char utf8[],int utf8Units,TextDirection dir,std::vector<BidiRegion> * results)94*c8dee2aaSAndroid Build Coastguard Worker     bool getBidiRegions(const char utf8[],
95*c8dee2aaSAndroid Build Coastguard Worker                         int utf8Units,
96*c8dee2aaSAndroid Build Coastguard Worker                         TextDirection dir,
97*c8dee2aaSAndroid Build Coastguard Worker                         std::vector<BidiRegion>* results) override {
98*c8dee2aaSAndroid Build Coastguard Worker 
99*c8dee2aaSAndroid Build Coastguard Worker         const auto bidi = ICU4XBidi::create(fDataProvider).ok().value();
100*c8dee2aaSAndroid Build Coastguard Worker         std::string_view string_view(utf8, utf8Units);
101*c8dee2aaSAndroid Build Coastguard Worker         auto info = bidi.for_text(string_view, dir == TextDirection::kLTR ? 0 : 1);
102*c8dee2aaSAndroid Build Coastguard Worker         auto currentLevel = info.level_at(0);
103*c8dee2aaSAndroid Build Coastguard Worker         size_t start = 0;
104*c8dee2aaSAndroid Build Coastguard Worker 
105*c8dee2aaSAndroid Build Coastguard Worker         for (size_t i = 1; i < info.size(); i++) {
106*c8dee2aaSAndroid Build Coastguard Worker             const auto level =  info.level_at(i);
107*c8dee2aaSAndroid Build Coastguard Worker             if (level != currentLevel) {
108*c8dee2aaSAndroid Build Coastguard Worker                 (*results).emplace_back(start, i, currentLevel);
109*c8dee2aaSAndroid Build Coastguard Worker                 currentLevel = level;
110*c8dee2aaSAndroid Build Coastguard Worker                 start = i;
111*c8dee2aaSAndroid Build Coastguard Worker             }
112*c8dee2aaSAndroid Build Coastguard Worker         }
113*c8dee2aaSAndroid Build Coastguard Worker         (*results).emplace_back(start, info.size(), currentLevel);
114*c8dee2aaSAndroid Build Coastguard Worker         return true;
115*c8dee2aaSAndroid Build Coastguard Worker     }
116*c8dee2aaSAndroid Build Coastguard Worker 
getBidiRegions(const uint16_t utf16[],int utf16Units,TextDirection dir,std::vector<BidiRegion> * results)117*c8dee2aaSAndroid Build Coastguard Worker     bool getBidiRegions(const uint16_t utf16[],
118*c8dee2aaSAndroid Build Coastguard Worker                         int utf16Units,
119*c8dee2aaSAndroid Build Coastguard Worker                         TextDirection dir,
120*c8dee2aaSAndroid Build Coastguard Worker                         std::vector<BidiRegion>* results) {
121*c8dee2aaSAndroid Build Coastguard Worker         auto utf8 = SkUnicode::convertUtf16ToUtf8((char16_t*)utf16, utf16Units);
122*c8dee2aaSAndroid Build Coastguard Worker         return this->getBidiRegions(utf8.data(), utf8.size(), dir, results);
123*c8dee2aaSAndroid Build Coastguard Worker     }
124*c8dee2aaSAndroid Build Coastguard Worker 
computeCodeUnitFlags(char utf8[],int utf8Units,bool replaceTabs,skia_private::TArray<SkUnicode::CodeUnitFlags,true> * results)125*c8dee2aaSAndroid Build Coastguard Worker     bool computeCodeUnitFlags(char utf8[],
126*c8dee2aaSAndroid Build Coastguard Worker                               int utf8Units,
127*c8dee2aaSAndroid Build Coastguard Worker                               bool replaceTabs,
128*c8dee2aaSAndroid Build Coastguard Worker                               skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) override {
129*c8dee2aaSAndroid Build Coastguard Worker         results->clear();
130*c8dee2aaSAndroid Build Coastguard Worker         results->push_back_n(utf8Units + 1, CodeUnitFlags::kNoCodeUnitFlag);
131*c8dee2aaSAndroid Build Coastguard Worker         this->markLineBreaks(utf8, utf8Units, /*hardLineBreaks=*/false, results);
132*c8dee2aaSAndroid Build Coastguard Worker         this->markHardLineBreaksHack(utf8, utf8Units, results);
133*c8dee2aaSAndroid Build Coastguard Worker         this->markGraphemes(utf8, utf8Units, results);
134*c8dee2aaSAndroid Build Coastguard Worker         this->markCharacters(utf8, utf8Units, replaceTabs, results);
135*c8dee2aaSAndroid Build Coastguard Worker         return true;
136*c8dee2aaSAndroid Build Coastguard Worker     }
137*c8dee2aaSAndroid Build Coastguard Worker 
computeCodeUnitFlags(char16_t utf16[],int utf16Units,bool replaceTabs,skia_private::TArray<SkUnicode::CodeUnitFlags,true> * results)138*c8dee2aaSAndroid Build Coastguard Worker     bool computeCodeUnitFlags(char16_t utf16[], int utf16Units, bool replaceTabs,
139*c8dee2aaSAndroid Build Coastguard Worker                           skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) override {
140*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(false);
141*c8dee2aaSAndroid Build Coastguard Worker         return true;
142*c8dee2aaSAndroid Build Coastguard Worker     }
143*c8dee2aaSAndroid Build Coastguard Worker 
getWords(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)144*c8dee2aaSAndroid Build Coastguard Worker     bool getWords(const char utf8[],
145*c8dee2aaSAndroid Build Coastguard Worker                   int utf8Units,
146*c8dee2aaSAndroid Build Coastguard Worker                   const char* locale,
147*c8dee2aaSAndroid Build Coastguard Worker                   std::vector<Position>* results) override {
148*c8dee2aaSAndroid Build Coastguard Worker         auto utf16 = SkUnicode::convertUtf8ToUtf16(utf8, utf8Units);
149*c8dee2aaSAndroid Build Coastguard Worker         const diplomat::span<const uint16_t> span((uint16_t*)utf16.data(), utf16.size());
150*c8dee2aaSAndroid Build Coastguard Worker         const auto segmenter = ICU4XWordSegmenter::create_dictionary(fDataProvider).ok().value();
151*c8dee2aaSAndroid Build Coastguard Worker         auto iterator = segmenter.segment_utf16(span);
152*c8dee2aaSAndroid Build Coastguard Worker         while (true) {
153*c8dee2aaSAndroid Build Coastguard Worker             int32_t breakpoint = iterator.next();
154*c8dee2aaSAndroid Build Coastguard Worker             if (breakpoint == -1) {
155*c8dee2aaSAndroid Build Coastguard Worker                 break;
156*c8dee2aaSAndroid Build Coastguard Worker             }
157*c8dee2aaSAndroid Build Coastguard Worker             results->emplace_back(breakpoint);
158*c8dee2aaSAndroid Build Coastguard Worker         }
159*c8dee2aaSAndroid Build Coastguard Worker         return true;
160*c8dee2aaSAndroid Build Coastguard Worker     }
161*c8dee2aaSAndroid Build Coastguard Worker 
toUpper(const SkString & str)162*c8dee2aaSAndroid Build Coastguard Worker     SkString toUpper(const SkString& str) override {
163*c8dee2aaSAndroid Build Coastguard Worker         return toUpper(str, "und");
164*c8dee2aaSAndroid Build Coastguard Worker     }
165*c8dee2aaSAndroid Build Coastguard Worker 
toUpper(const SkString & str,const char * localeStr)166*c8dee2aaSAndroid Build Coastguard Worker     SkString toUpper(const SkString& str, const char* localeStr) override {
167*c8dee2aaSAndroid Build Coastguard Worker         auto locale = ICU4XLocale::create_from_string(localeStr).ok().value();
168*c8dee2aaSAndroid Build Coastguard Worker         std::string std_string(str.data(), str.size());
169*c8dee2aaSAndroid Build Coastguard Worker         // TODO: upper case
170*c8dee2aaSAndroid Build Coastguard Worker         auto result = fCaseMapper.uppercase(std_string, locale).ok().value();
171*c8dee2aaSAndroid Build Coastguard Worker         return SkString(result.data(), result.size());
172*c8dee2aaSAndroid Build Coastguard Worker     }
173*c8dee2aaSAndroid Build Coastguard Worker 
reorderVisual(const BidiLevel runLevels[],int levelsCount,int32_t logicalFromVisual[])174*c8dee2aaSAndroid Build Coastguard Worker     void reorderVisual(const BidiLevel runLevels[],
175*c8dee2aaSAndroid Build Coastguard Worker                        int levelsCount,
176*c8dee2aaSAndroid Build Coastguard Worker                        int32_t logicalFromVisual[]) override {
177*c8dee2aaSAndroid Build Coastguard Worker 
178*c8dee2aaSAndroid Build Coastguard Worker         const auto bidi = ICU4XBidi::create(fDataProvider).ok().value();
179*c8dee2aaSAndroid Build Coastguard Worker         const diplomat::span<const uint8_t> levels(&runLevels[0], levelsCount);
180*c8dee2aaSAndroid Build Coastguard Worker         auto map = bidi.reorder_visual(levels);
181*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(levelsCount == map.len());
182*c8dee2aaSAndroid Build Coastguard Worker         std::vector<int32_t> results;
183*c8dee2aaSAndroid Build Coastguard Worker         for (size_t i = 0; i < map.len(); i++) {
184*c8dee2aaSAndroid Build Coastguard Worker             auto level = map.get(i);
185*c8dee2aaSAndroid Build Coastguard Worker             logicalFromVisual[i] = SkToS32(level);
186*c8dee2aaSAndroid Build Coastguard Worker         }
187*c8dee2aaSAndroid Build Coastguard Worker     }
188*c8dee2aaSAndroid Build Coastguard Worker 
189*c8dee2aaSAndroid Build Coastguard Worker private:
190*c8dee2aaSAndroid Build Coastguard Worker     friend class SkBreakIterator_icu4x;
191*c8dee2aaSAndroid Build Coastguard Worker     friend class SkBidiIterator_icu4x;
192*c8dee2aaSAndroid Build Coastguard Worker 
markHardLineBreaksHack(char utf8[],int utf8Units,skia_private::TArray<SkUnicode::CodeUnitFlags,true> * results)193*c8dee2aaSAndroid Build Coastguard Worker     bool markHardLineBreaksHack(char utf8[],
194*c8dee2aaSAndroid Build Coastguard Worker                                 int utf8Units,
195*c8dee2aaSAndroid Build Coastguard Worker                                 skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) {
196*c8dee2aaSAndroid Build Coastguard Worker         const char* end = utf8 + utf8Units;
197*c8dee2aaSAndroid Build Coastguard Worker         const char* ch = utf8;
198*c8dee2aaSAndroid Build Coastguard Worker         while (ch < end) {
199*c8dee2aaSAndroid Build Coastguard Worker             auto unichar = SkUTF::NextUTF8(&ch, end);
200*c8dee2aaSAndroid Build Coastguard Worker             if (this->isHardBreak(unichar)) {
201*c8dee2aaSAndroid Build Coastguard Worker                 (*results)[ch - utf8] |= CodeUnitFlags::kHardLineBreakBefore;
202*c8dee2aaSAndroid Build Coastguard Worker             }
203*c8dee2aaSAndroid Build Coastguard Worker         }
204*c8dee2aaSAndroid Build Coastguard Worker         return true;
205*c8dee2aaSAndroid Build Coastguard Worker     }
206*c8dee2aaSAndroid Build Coastguard Worker 
getChar32(const char * pointer,const char * end)207*c8dee2aaSAndroid Build Coastguard Worker     SkUnichar getChar32(const char* pointer, const char* end) {
208*c8dee2aaSAndroid Build Coastguard Worker         if (pointer < end) {
209*c8dee2aaSAndroid Build Coastguard Worker             return SkUTF::NextUTF8(&pointer, end);
210*c8dee2aaSAndroid Build Coastguard Worker         }
211*c8dee2aaSAndroid Build Coastguard Worker         return -1;
212*c8dee2aaSAndroid Build Coastguard Worker     }
213*c8dee2aaSAndroid Build Coastguard Worker 
markLineBreaks(char utf8[],int utf8Units,bool hardLineBreaks,skia_private::TArray<SkUnicode::CodeUnitFlags,true> * results)214*c8dee2aaSAndroid Build Coastguard Worker     bool markLineBreaks(char utf8[],
215*c8dee2aaSAndroid Build Coastguard Worker                         int utf8Units,
216*c8dee2aaSAndroid Build Coastguard Worker                         bool hardLineBreaks,
217*c8dee2aaSAndroid Build Coastguard Worker                         skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) {
218*c8dee2aaSAndroid Build Coastguard Worker         if (utf8Units == 0) {
219*c8dee2aaSAndroid Build Coastguard Worker             return true;
220*c8dee2aaSAndroid Build Coastguard Worker         }
221*c8dee2aaSAndroid Build Coastguard Worker         // TODO: Remove hard line break hack and detect it here
222*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(!hardLineBreaks);
223*c8dee2aaSAndroid Build Coastguard Worker         const auto lineBreakingOptions = hardLineBreaks
224*c8dee2aaSAndroid Build Coastguard Worker                                               ? ICU4XLineBreakOptionsV1{ICU4XLineBreakStrictness::Strict, ICU4XLineBreakWordOption::Normal}
225*c8dee2aaSAndroid Build Coastguard Worker                                               : ICU4XLineBreakOptionsV1{ICU4XLineBreakStrictness::Loose, ICU4XLineBreakWordOption::Normal};
226*c8dee2aaSAndroid Build Coastguard Worker         const auto segmenter = ICU4XLineSegmenter::create_auto_with_options_v1(fDataProvider, lineBreakingOptions).ok().value();
227*c8dee2aaSAndroid Build Coastguard Worker         std::string_view string_view(utf8, utf8Units);
228*c8dee2aaSAndroid Build Coastguard Worker         auto iterator = segmenter.segment_utf8(string_view);
229*c8dee2aaSAndroid Build Coastguard Worker 
230*c8dee2aaSAndroid Build Coastguard Worker         while (true) {
231*c8dee2aaSAndroid Build Coastguard Worker             int32_t lineBreak = iterator.next();
232*c8dee2aaSAndroid Build Coastguard Worker             if (lineBreak == -1) {
233*c8dee2aaSAndroid Build Coastguard Worker                 break;
234*c8dee2aaSAndroid Build Coastguard Worker             }
235*c8dee2aaSAndroid Build Coastguard Worker             if (hardLineBreaks) {
236*c8dee2aaSAndroid Build Coastguard Worker                 (*results)[lineBreak] |= CodeUnitFlags::kHardLineBreakBefore;
237*c8dee2aaSAndroid Build Coastguard Worker             } else {
238*c8dee2aaSAndroid Build Coastguard Worker                 (*results)[lineBreak] |= CodeUnitFlags::kSoftLineBreakBefore;
239*c8dee2aaSAndroid Build Coastguard Worker             }
240*c8dee2aaSAndroid Build Coastguard Worker         }
241*c8dee2aaSAndroid Build Coastguard Worker         if (!hardLineBreaks) {
242*c8dee2aaSAndroid Build Coastguard Worker             (*results)[0] |= CodeUnitFlags::kSoftLineBreakBefore;
243*c8dee2aaSAndroid Build Coastguard Worker             (*results)[utf8Units] |= CodeUnitFlags::kSoftLineBreakBefore;
244*c8dee2aaSAndroid Build Coastguard Worker         }
245*c8dee2aaSAndroid Build Coastguard Worker         return true;
246*c8dee2aaSAndroid Build Coastguard Worker     }
247*c8dee2aaSAndroid Build Coastguard Worker 
markGraphemes(const char utf8[],int utf8Units,skia_private::TArray<SkUnicode::CodeUnitFlags,true> * results)248*c8dee2aaSAndroid Build Coastguard Worker     bool markGraphemes(const char utf8[],
249*c8dee2aaSAndroid Build Coastguard Worker                        int utf8Units,
250*c8dee2aaSAndroid Build Coastguard Worker                        skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) {
251*c8dee2aaSAndroid Build Coastguard Worker         const auto segmenter = ICU4XGraphemeClusterSegmenter::create(fDataProvider).ok().value();
252*c8dee2aaSAndroid Build Coastguard Worker         std::string_view string_view(utf8, utf8Units);
253*c8dee2aaSAndroid Build Coastguard Worker         auto iterator = segmenter.segment_utf8(string_view);
254*c8dee2aaSAndroid Build Coastguard Worker         while (true) {
255*c8dee2aaSAndroid Build Coastguard Worker             int32_t graphemeStart = iterator.next();
256*c8dee2aaSAndroid Build Coastguard Worker             if (graphemeStart == -1) {
257*c8dee2aaSAndroid Build Coastguard Worker                 break;
258*c8dee2aaSAndroid Build Coastguard Worker             }
259*c8dee2aaSAndroid Build Coastguard Worker             (*results)[graphemeStart] |= CodeUnitFlags::kGraphemeStart;
260*c8dee2aaSAndroid Build Coastguard Worker         }
261*c8dee2aaSAndroid Build Coastguard Worker         return true;
262*c8dee2aaSAndroid Build Coastguard Worker     }
263*c8dee2aaSAndroid Build Coastguard Worker 
markCharacters(char utf8[],int utf8Units,bool replaceTabs,skia_private::TArray<SkUnicode::CodeUnitFlags,true> * results)264*c8dee2aaSAndroid Build Coastguard Worker     bool markCharacters(char utf8[],
265*c8dee2aaSAndroid Build Coastguard Worker                         int utf8Units,
266*c8dee2aaSAndroid Build Coastguard Worker                         bool replaceTabs,
267*c8dee2aaSAndroid Build Coastguard Worker                         skia_private::TArray<SkUnicode::CodeUnitFlags, true>* results) {
268*c8dee2aaSAndroid Build Coastguard Worker         const char* current = utf8;
269*c8dee2aaSAndroid Build Coastguard Worker         const char* end = utf8 + utf8Units;
270*c8dee2aaSAndroid Build Coastguard Worker         while (current < end) {
271*c8dee2aaSAndroid Build Coastguard Worker             auto before = current - utf8;
272*c8dee2aaSAndroid Build Coastguard Worker             SkUnichar unichar = SkUTF::NextUTF8(&current, end);
273*c8dee2aaSAndroid Build Coastguard Worker             if (unichar < 0) unichar = 0xFFFD;
274*c8dee2aaSAndroid Build Coastguard Worker             auto after = current - utf8;
275*c8dee2aaSAndroid Build Coastguard Worker             if (replaceTabs && SkUnicode_icu4x::isTabulation(unichar)) {
276*c8dee2aaSAndroid Build Coastguard Worker                 results->at(before) |= SkUnicode::kTabulation;
277*c8dee2aaSAndroid Build Coastguard Worker                 if (replaceTabs) {
278*c8dee2aaSAndroid Build Coastguard Worker                     unichar = ' ';
279*c8dee2aaSAndroid Build Coastguard Worker                     utf8[before] = ' ';
280*c8dee2aaSAndroid Build Coastguard Worker                 }
281*c8dee2aaSAndroid Build Coastguard Worker             }
282*c8dee2aaSAndroid Build Coastguard Worker             for (auto i = before; i < after; ++i) {
283*c8dee2aaSAndroid Build Coastguard Worker                 bool isHardBreak = this->isHardBreak(unichar);
284*c8dee2aaSAndroid Build Coastguard Worker                 bool isSpace = this->isSpace(unichar) || isHardBreak;
285*c8dee2aaSAndroid Build Coastguard Worker                 bool isWhitespace = this->isWhitespace(unichar) || isHardBreak;
286*c8dee2aaSAndroid Build Coastguard Worker                 if (isSpace) {
287*c8dee2aaSAndroid Build Coastguard Worker                     results->at(i) |= SkUnicode::kPartOfIntraWordBreak;
288*c8dee2aaSAndroid Build Coastguard Worker                 }
289*c8dee2aaSAndroid Build Coastguard Worker                 if (isWhitespace) {
290*c8dee2aaSAndroid Build Coastguard Worker                     results->at(i) |= SkUnicode::kPartOfWhiteSpaceBreak;
291*c8dee2aaSAndroid Build Coastguard Worker                 }
292*c8dee2aaSAndroid Build Coastguard Worker                 if (this->isControl(unichar)) {
293*c8dee2aaSAndroid Build Coastguard Worker                     results->at(i) |= SkUnicode::kControl;
294*c8dee2aaSAndroid Build Coastguard Worker                 }
295*c8dee2aaSAndroid Build Coastguard Worker             }
296*c8dee2aaSAndroid Build Coastguard Worker         }
297*c8dee2aaSAndroid Build Coastguard Worker         return true;
298*c8dee2aaSAndroid Build Coastguard Worker     }
299*c8dee2aaSAndroid Build Coastguard Worker 
getUtf8Words(const char utf8[],int utf8Units,const char * locale,std::vector<Position> * results)300*c8dee2aaSAndroid Build Coastguard Worker     bool getUtf8Words(const char utf8[],
301*c8dee2aaSAndroid Build Coastguard Worker                       int utf8Units,
302*c8dee2aaSAndroid Build Coastguard Worker                       const char* locale,
303*c8dee2aaSAndroid Build Coastguard Worker                       std::vector<Position>* results) override {
304*c8dee2aaSAndroid Build Coastguard Worker         SkDEBUGF("Method 'getUtf8Words' is not implemented\n");
305*c8dee2aaSAndroid Build Coastguard Worker         return false;
306*c8dee2aaSAndroid Build Coastguard Worker     }
307*c8dee2aaSAndroid Build Coastguard Worker 
getSentences(const char utf8[],int utf8Units,const char * locale,std::vector<SkUnicode::Position> * results)308*c8dee2aaSAndroid Build Coastguard Worker     bool getSentences(const char utf8[],
309*c8dee2aaSAndroid Build Coastguard Worker                       int utf8Units,
310*c8dee2aaSAndroid Build Coastguard Worker                       const char* locale,
311*c8dee2aaSAndroid Build Coastguard Worker                       std::vector<SkUnicode::Position>* results) override {
312*c8dee2aaSAndroid Build Coastguard Worker         SkDEBUGF("Method 'getSentences' is not implemented\n");
313*c8dee2aaSAndroid Build Coastguard Worker         return false;
314*c8dee2aaSAndroid Build Coastguard Worker     }
315*c8dee2aaSAndroid Build Coastguard Worker 
316*c8dee2aaSAndroid Build Coastguard Worker     std::shared_ptr<std::vector<SkUnicode::BidiRegion>> fRegions;
317*c8dee2aaSAndroid Build Coastguard Worker     ICU4XLocale fLocale;
318*c8dee2aaSAndroid Build Coastguard Worker     ICU4XDataProvider fDataProvider;
319*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCaseMapper fCaseMapper;
320*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fWhitespaces;
321*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fSpaces;
322*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fBlanks;
323*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fEmoji;
324*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fEmojiComponent;
325*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fEmojiModifier;
326*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fEmojiModifierBase;
327*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fRegionalIndicator;
328*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fIdeographic;
329*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointSetData fControls;
330*c8dee2aaSAndroid Build Coastguard Worker     ICU4XCodePointMapData8 fLineBreaks;
331*c8dee2aaSAndroid Build Coastguard Worker };
332*c8dee2aaSAndroid Build Coastguard Worker 
333*c8dee2aaSAndroid Build Coastguard Worker class SkBreakIterator_icu4x: public SkBreakIterator {
334*c8dee2aaSAndroid Build Coastguard Worker     Position fLastResult;
335*c8dee2aaSAndroid Build Coastguard Worker     Position fStart;
336*c8dee2aaSAndroid Build Coastguard Worker     Position fEnd;
337*c8dee2aaSAndroid Build Coastguard Worker public:
SkBreakIterator_icu4x()338*c8dee2aaSAndroid Build Coastguard Worker     SkBreakIterator_icu4x() { }
first()339*c8dee2aaSAndroid Build Coastguard Worker     Position first() override { SkASSERT(false); return -1; }
current()340*c8dee2aaSAndroid Build Coastguard Worker     Position current() override { SkASSERT(false); return -1; }
next()341*c8dee2aaSAndroid Build Coastguard Worker     Position next() override { SkASSERT(false); return -1; }
status()342*c8dee2aaSAndroid Build Coastguard Worker     Status status() override { SkASSERT(false); return -1; }
isDone()343*c8dee2aaSAndroid Build Coastguard Worker     bool isDone() override { SkASSERT(false); return false; }
setText(const char utftext8[],int utf8Units)344*c8dee2aaSAndroid Build Coastguard Worker     bool setText(const char utftext8[], int utf8Units) override { SkASSERT(false); return false; }
setText(const char16_t utftext16[],int utf16Units)345*c8dee2aaSAndroid Build Coastguard Worker     bool setText(const char16_t utftext16[], int utf16Units) override { SkASSERT(false); return false; }
346*c8dee2aaSAndroid Build Coastguard Worker };
347*c8dee2aaSAndroid Build Coastguard Worker 
348*c8dee2aaSAndroid Build Coastguard Worker class SkBidiIterator_icu4x : public SkBidiIterator {
349*c8dee2aaSAndroid Build Coastguard Worker     std::shared_ptr<std::vector<SkUnicode::BidiRegion>> fRegions;
350*c8dee2aaSAndroid Build Coastguard Worker public:
SkBidiIterator_icu4x(std::shared_ptr<std::vector<SkUnicode::BidiRegion>> regions)351*c8dee2aaSAndroid Build Coastguard Worker     explicit SkBidiIterator_icu4x(std::shared_ptr<std::vector<SkUnicode::BidiRegion>> regions)
352*c8dee2aaSAndroid Build Coastguard Worker             : fRegions(regions) { }
getLength()353*c8dee2aaSAndroid Build Coastguard Worker     Position getLength() override { return fRegions->size(); }
getLevelAt(Position pos)354*c8dee2aaSAndroid Build Coastguard Worker     Level getLevelAt(Position pos) override {
355*c8dee2aaSAndroid Build Coastguard Worker         auto found = std::lower_bound(
356*c8dee2aaSAndroid Build Coastguard Worker                 fRegions->begin(),
357*c8dee2aaSAndroid Build Coastguard Worker                 fRegions->end(),
358*c8dee2aaSAndroid Build Coastguard Worker                 SkUnicode::BidiRegion(pos, pos, 0),
359*c8dee2aaSAndroid Build Coastguard Worker                 [](const SkUnicode::BidiRegion& a, const SkUnicode::BidiRegion& b) {
360*c8dee2aaSAndroid Build Coastguard Worker                     return a.start <= b.start && a.end <= b.end;
361*c8dee2aaSAndroid Build Coastguard Worker                 });
362*c8dee2aaSAndroid Build Coastguard Worker         return found->level;
363*c8dee2aaSAndroid Build Coastguard Worker     }
364*c8dee2aaSAndroid Build Coastguard Worker };
365*c8dee2aaSAndroid Build Coastguard Worker 
makeBidiIterator(const uint16_t text[],int count,SkBidiIterator::Direction dir)366*c8dee2aaSAndroid Build Coastguard Worker std::unique_ptr<SkBidiIterator> SkUnicode_icu4x::makeBidiIterator(const uint16_t text[], int count,
367*c8dee2aaSAndroid Build Coastguard Worker                                                  SkBidiIterator::Direction dir) {
368*c8dee2aaSAndroid Build Coastguard Worker     if (fRegions) {
369*c8dee2aaSAndroid Build Coastguard Worker         fRegions->clear();
370*c8dee2aaSAndroid Build Coastguard Worker     } else {
371*c8dee2aaSAndroid Build Coastguard Worker         fRegions = std::make_shared<std::vector<SkUnicode::BidiRegion>>();
372*c8dee2aaSAndroid Build Coastguard Worker     }
373*c8dee2aaSAndroid Build Coastguard Worker 
374*c8dee2aaSAndroid Build Coastguard Worker     if (this->getBidiRegions(text, count, dir == SkBidiIterator::Direction::kLTR ? TextDirection::kLTR : TextDirection::kRTL, fRegions.get())) {
375*c8dee2aaSAndroid Build Coastguard Worker         return std::make_unique<SkBidiIterator_icu4x>(fRegions);
376*c8dee2aaSAndroid Build Coastguard Worker     } else {
377*c8dee2aaSAndroid Build Coastguard Worker         return nullptr;
378*c8dee2aaSAndroid Build Coastguard Worker     }
379*c8dee2aaSAndroid Build Coastguard Worker }
380*c8dee2aaSAndroid Build Coastguard Worker 
makeBidiIterator(const char text[],int count,SkBidiIterator::Direction dir)381*c8dee2aaSAndroid Build Coastguard Worker std::unique_ptr<SkBidiIterator> SkUnicode_icu4x::makeBidiIterator(const char text[],
382*c8dee2aaSAndroid Build Coastguard Worker                                                  int count,
383*c8dee2aaSAndroid Build Coastguard Worker                                                  SkBidiIterator::Direction dir) {
384*c8dee2aaSAndroid Build Coastguard Worker     if (fRegions) {
385*c8dee2aaSAndroid Build Coastguard Worker         fRegions->clear();
386*c8dee2aaSAndroid Build Coastguard Worker     } else {
387*c8dee2aaSAndroid Build Coastguard Worker         fRegions = std::make_shared<std::vector<SkUnicode::BidiRegion>>();
388*c8dee2aaSAndroid Build Coastguard Worker     }
389*c8dee2aaSAndroid Build Coastguard Worker     if (this->getBidiRegions(text, count, dir == SkBidiIterator::Direction::kLTR ? TextDirection::kLTR : TextDirection::kRTL, fRegions.get())) {
390*c8dee2aaSAndroid Build Coastguard Worker         return std::make_unique<SkBidiIterator_icu4x>(fRegions);
391*c8dee2aaSAndroid Build Coastguard Worker     } else {
392*c8dee2aaSAndroid Build Coastguard Worker         return nullptr;
393*c8dee2aaSAndroid Build Coastguard Worker     }
394*c8dee2aaSAndroid Build Coastguard Worker }
395*c8dee2aaSAndroid Build Coastguard Worker 
makeBreakIterator(const char locale[],BreakType breakType)396*c8dee2aaSAndroid Build Coastguard Worker std::unique_ptr<SkBreakIterator> SkUnicode_icu4x::makeBreakIterator(const char locale[],
397*c8dee2aaSAndroid Build Coastguard Worker                                                    BreakType breakType) {
398*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(false); return nullptr;
399*c8dee2aaSAndroid Build Coastguard Worker }
400*c8dee2aaSAndroid Build Coastguard Worker 
makeBreakIterator(BreakType breakType)401*c8dee2aaSAndroid Build Coastguard Worker std::unique_ptr<SkBreakIterator> SkUnicode_icu4x::makeBreakIterator(BreakType breakType) {
402*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(false); return nullptr;
403*c8dee2aaSAndroid Build Coastguard Worker }
404*c8dee2aaSAndroid Build Coastguard Worker 
405*c8dee2aaSAndroid Build Coastguard Worker namespace SkUnicodes::ICU4X {
Make()406*c8dee2aaSAndroid Build Coastguard Worker sk_sp<SkUnicode> Make() {
407*c8dee2aaSAndroid Build Coastguard Worker     return sk_make_sp<SkUnicode_icu4x>();
408*c8dee2aaSAndroid Build Coastguard Worker }
409*c8dee2aaSAndroid Build Coastguard Worker }
410