1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker *
4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker *
8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker *
10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker */
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unilib_test-include.h"
18*993b0882SAndroid Build Coastguard Worker
19*993b0882SAndroid Build Coastguard Worker #include "utils/base/logging.h"
20*993b0882SAndroid Build Coastguard Worker #include "gmock/gmock.h"
21*993b0882SAndroid Build Coastguard Worker
22*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
23*993b0882SAndroid Build Coastguard Worker namespace test_internal {
24*993b0882SAndroid Build Coastguard Worker
25*993b0882SAndroid Build Coastguard Worker using ::testing::ElementsAre;
26*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,CharacterClassesAscii)27*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, CharacterClassesAscii) {
28*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsOpeningBracket('('));
29*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsClosingBracket(')'));
30*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsWhitespace(')'));
31*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsWhitespace(' '));
32*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsDigit(')'));
33*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsDigit('0'));
34*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsDigit('9'));
35*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsUpper(')'));
36*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsUpper('A'));
37*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsUpper('Z'));
38*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsLower(')'));
39*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLower('a'));
40*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLower('z'));
41*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPunctuation('!'));
42*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPunctuation('?'));
43*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPunctuation('#'));
44*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPunctuation('('));
45*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsPunctuation('0'));
46*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsPunctuation('$'));
47*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPercentage('%'));
48*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPercentage(u'%'));
49*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsSlash('/'));
50*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsSlash(u'/'));
51*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsMinus('-'));
52*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsMinus(u'-'));
53*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsNumberSign('#'));
54*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsNumberSign(u'#'));
55*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsDot('.'));
56*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsDot(u'.'));
57*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsApostrophe('\''));
58*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsApostrophe(u'ߴ'));
59*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsQuotation(u'"'));
60*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsQuotation(u'”'));
61*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsAmpersand(u'&'));
62*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsAmpersand(u'﹠'));
63*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsAmpersand(u'&'));
64*993b0882SAndroid Build Coastguard Worker
65*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLatinLetter('A'));
66*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsArabicLetter(u'ب')); // ARABIC LETTER BEH
67*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(
68*993b0882SAndroid Build Coastguard Worker unilib_->IsCyrillicLetter(u'ᲀ')); // CYRILLIC SMALL LETTER ROUNDED VE
69*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsChineseLetter(u'豈')); // CJK COMPATIBILITY IDEOGRAPH
70*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsJapaneseLetter(u'ぁ')); // HIRAGANA LETTER SMALL A
71*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsKoreanLetter(u'ㄱ')); // HANGUL LETTER KIYEOK
72*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsThaiLetter(u'ก')); // THAI CHARACTER KO KAI
73*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsCJTletter(u'ก')); // THAI CHARACTER KO KAI
74*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsCJTletter('A'));
75*993b0882SAndroid Build Coastguard Worker
76*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter('A'));
77*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(u'A'));
78*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(u'ト')); // KATAKANA LETTER TO
79*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(u'ト')); // HALFWIDTH KATAKANA LETTER TO
80*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(u'豈')); // CJK COMPATIBILITY IDEOGRAPH
81*993b0882SAndroid Build Coastguard Worker
82*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToLower('A'), 'a');
83*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToLower('Z'), 'z');
84*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToLower(')'), ')');
85*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToLowerText(UTF8ToUnicodeText("Never gonna give you up."))
86*993b0882SAndroid Build Coastguard Worker .ToUTF8String(),
87*993b0882SAndroid Build Coastguard Worker "never gonna give you up.");
88*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpper('a'), 'A');
89*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpper('z'), 'Z');
90*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpper(')'), ')');
91*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpperText(UTF8ToUnicodeText("Never gonna let you down."))
92*993b0882SAndroid Build Coastguard Worker .ToUTF8String(),
93*993b0882SAndroid Build Coastguard Worker "NEVER GONNA LET YOU DOWN.");
94*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->GetPairedBracket(')'), '(');
95*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->GetPairedBracket('}'), '{');
96*993b0882SAndroid Build Coastguard Worker }
97*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,CharacterClassesUnicode)98*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, CharacterClassesUnicode) {
99*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsOpeningBracket(0x0F3C)); // TIBET ANG KHANG GYON
100*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsClosingBracket(0x0F3D)); // TIBET ANG KHANG GYAS
101*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsWhitespace(0x23F0)); // ALARM CLOCK
102*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsWhitespace(0x2003)); // EM SPACE
103*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsDigit(0xA619)); // VAI SYMBOL JONG
104*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsDigit(0xA620)); // VAI DIGIT ZERO
105*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsDigit(0xA629)); // VAI DIGIT NINE
106*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsDigit(0xA62A)); // VAI SYLLABLE NDOLE MA
107*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsUpper(0x0211)); // SMALL R WITH DOUBLE GRAVE
108*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsUpper(0x0212)); // CAPITAL R WITH DOUBLE GRAVE
109*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsUpper(0x0391)); // GREEK CAPITAL ALPHA
110*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsUpper(0x03AB)); // GREEK CAPITAL UPSILON W DIAL
111*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsUpper(0x03AC)); // GREEK SMALL ALPHA WITH TONOS
112*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLower(0x03AC)); // GREEK SMALL ALPHA WITH TONOS
113*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLower(0x03B1)); // GREEK SMALL ALPHA
114*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLower(0x03CB)); // GREEK SMALL UPSILON
115*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLower(0x0211)); // SMALL R WITH DOUBLE GRAVE
116*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLower(0x03C0)); // GREEK SMALL PI
117*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLower(0x007A)); // SMALL Z
118*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsLower(0x005A)); // CAPITAL Z
119*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsLower(0x0212)); // CAPITAL R WITH DOUBLE GRAVE
120*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsLower(0x0391)); // GREEK CAPITAL ALPHA
121*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPunctuation(0x055E)); // ARMENIAN QUESTION MARK
122*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPunctuation(0x066C)); // ARABIC THOUSANDS SEPARATOR
123*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPunctuation(0x07F7)); // NKO SYMBOL GBAKURUNEN
124*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPunctuation(0x10AF2)); // DOUBLE DOT WITHIN DOT
125*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsPunctuation(0x00A3)); // POUND SIGN
126*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsPunctuation(0xA838)); // NORTH INDIC RUPEE MARK
127*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPercentage(0x0025)); // PERCENT SIGN
128*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsPercentage(0xFF05)); // FULLWIDTH PERCENT SIGN
129*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsSlash(0x002F)); // SOLIDUS
130*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsSlash(0xFF0F)); // FULLWIDTH SOLIDUS
131*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsMinus(0x002D)); // HYPHEN-MINUS
132*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsMinus(0xFF0D)); // FULLWIDTH HYPHEN-MINUS
133*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsNumberSign(0x0023)); // NUMBER SIGN
134*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsNumberSign(0xFF03)); // FULLWIDTH NUMBER SIGN
135*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsDot(0x002E)); // FULL STOP
136*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsDot(0xFF0E)); // FULLWIDTH FULL STOP
137*993b0882SAndroid Build Coastguard Worker
138*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLatinLetter(0x0041)); // LATIN CAPITAL LETTER A
139*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsArabicLetter(0x0628)); // ARABIC LETTER BEH
140*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(
141*993b0882SAndroid Build Coastguard Worker unilib_->IsCyrillicLetter(0x1C80)); // CYRILLIC SMALL LETTER ROUNDED VE
142*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsChineseLetter(0xF900)); // CJK COMPATIBILITY IDEOGRAPH
143*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsJapaneseLetter(0x3041)); // HIRAGANA LETTER SMALL A
144*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsKoreanLetter(0x3131)); // HANGUL LETTER KIYEOK
145*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsThaiLetter(0x0E01)); // THAI CHARACTER KO KAI
146*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsCJTletter(0x0E01)); // THAI CHARACTER KO KAI
147*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->IsCJTletter(0x0041)); // LATIN CAPITAL LETTER A
148*993b0882SAndroid Build Coastguard Worker
149*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(0x0041)); // LATIN CAPITAL LETTER A
150*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(0xFF21)); // FULLWIDTH LATIN CAPITAL LETTER A
151*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(0x30C8)); // KATAKANA LETTER TO
152*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(0xFF84)); // HALFWIDTH KATAKANA LETTER TO
153*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLetter(0xF900)); // CJK COMPATIBILITY IDEOGRAPH
154*993b0882SAndroid Build Coastguard Worker
155*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToLower(0x0391), 0x03B1); // GREEK ALPHA
156*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToLower(0x03AB), 0x03CB); // GREEK UPSILON WITH DIALYTIKA
157*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToLower(0x03C0), 0x03C0); // GREEK SMALL PI
158*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToLower(0x03A3), 0x03C3); // GREEK CAPITAL LETTER SIGMA
159*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(
160*993b0882SAndroid Build Coastguard Worker unilib_->ToLowerText(UTF8ToUnicodeText("Κανένας άνθρωπος δεν ξέρει"))
161*993b0882SAndroid Build Coastguard Worker .ToUTF8String(),
162*993b0882SAndroid Build Coastguard Worker "κανένας άνθρωπος δεν ξέρει");
163*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsLowerText(UTF8ToUnicodeText("ξέρει")));
164*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpper(0x03B1), 0x0391); // GREEK ALPHA
165*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpper(0x03CB), 0x03AB); // GREEK UPSILON WITH DIALYTIKA
166*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpper(0x0391), 0x0391); // GREEK CAPITAL ALPHA
167*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpper(0x03C3), 0x03A3); // GREEK CAPITAL LETTER SIGMA
168*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->ToUpper(0x03C2), 0x03A3); // GREEK CAPITAL LETTER SIGMA
169*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(
170*993b0882SAndroid Build Coastguard Worker unilib_->ToUpperText(UTF8ToUnicodeText("Κανένας άνθρωπος δεν ξέρει"))
171*993b0882SAndroid Build Coastguard Worker .ToUTF8String(),
172*993b0882SAndroid Build Coastguard Worker "ΚΑΝΈΝΑΣ ΆΝΘΡΩΠΟΣ ΔΕΝ ΞΈΡΕΙ");
173*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->IsUpperText(UTF8ToUnicodeText("ΚΑΝΈΝΑΣ")));
174*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->GetPairedBracket(0x0F3C), 0x0F3D);
175*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->GetPairedBracket(0x0F3D), 0x0F3C);
176*993b0882SAndroid Build Coastguard Worker }
177*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,RegexInterface)178*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, RegexInterface) {
179*993b0882SAndroid Build Coastguard Worker const UnicodeText regex_pattern =
180*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("[0-9]+", /*do_copy=*/true);
181*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexPattern> pattern =
182*993b0882SAndroid Build Coastguard Worker unilib_->CreateRegexPattern(regex_pattern);
183*993b0882SAndroid Build Coastguard Worker const UnicodeText input = UTF8ToUnicodeText("hello 0123", /*do_copy=*/false);
184*993b0882SAndroid Build Coastguard Worker int status;
185*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexMatcher> matcher = pattern->Matcher(input);
186*993b0882SAndroid Build Coastguard Worker TC3_LOG(INFO) << matcher->Matches(&status);
187*993b0882SAndroid Build Coastguard Worker TC3_LOG(INFO) << matcher->Find(&status);
188*993b0882SAndroid Build Coastguard Worker TC3_LOG(INFO) << matcher->Start(0, &status);
189*993b0882SAndroid Build Coastguard Worker TC3_LOG(INFO) << matcher->End(0, &status);
190*993b0882SAndroid Build Coastguard Worker TC3_LOG(INFO) << matcher->Group(0, &status).size_codepoints();
191*993b0882SAndroid Build Coastguard Worker }
192*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Regex)193*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Regex) {
194*993b0882SAndroid Build Coastguard Worker // The smiley face is a 4-byte UTF8 codepoint 0x1F60B, and it's important to
195*993b0882SAndroid Build Coastguard Worker // test the regex functionality with it to verify we are handling the indices
196*993b0882SAndroid Build Coastguard Worker // correctly.
197*993b0882SAndroid Build Coastguard Worker const UnicodeText regex_pattern =
198*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("[0-9]+", /*do_copy=*/false);
199*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexPattern> pattern =
200*993b0882SAndroid Build Coastguard Worker unilib_->CreateRegexPattern(regex_pattern);
201*993b0882SAndroid Build Coastguard Worker int status;
202*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexMatcher> matcher;
203*993b0882SAndroid Build Coastguard Worker
204*993b0882SAndroid Build Coastguard Worker matcher = pattern->Matcher(UTF8ToUnicodeText("0123", /*do_copy=*/false));
205*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->Matches(&status));
206*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->ApproximatelyMatches(&status));
207*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
208*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->Matches(&status)); // Check that the state is reset.
209*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->ApproximatelyMatches(&status));
210*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
211*993b0882SAndroid Build Coastguard Worker
212*993b0882SAndroid Build Coastguard Worker matcher = pattern->Matcher(
213*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("hello 0123 world", /*do_copy=*/false));
214*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(matcher->Matches(&status));
215*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(matcher->ApproximatelyMatches(&status));
216*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
217*993b0882SAndroid Build Coastguard Worker
218*993b0882SAndroid Build Coastguard Worker matcher = pattern->Matcher(
219*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("hello 0123 world", /*do_copy=*/false));
220*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->Find(&status));
221*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
222*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Start(0, &status), 8);
223*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
224*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->End(0, &status), 13);
225*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
226*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "0123");
227*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
228*993b0882SAndroid Build Coastguard Worker }
229*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,RegexLazy)230*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, RegexLazy) {
231*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexPattern> pattern =
232*993b0882SAndroid Build Coastguard Worker unilib_->CreateLazyRegexPattern(
233*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("[a-z][0-9]", /*do_copy=*/false));
234*993b0882SAndroid Build Coastguard Worker int status;
235*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexMatcher> matcher;
236*993b0882SAndroid Build Coastguard Worker
237*993b0882SAndroid Build Coastguard Worker matcher = pattern->Matcher(UTF8ToUnicodeText("a3", /*do_copy=*/false));
238*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->Matches(&status));
239*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->ApproximatelyMatches(&status));
240*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
241*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->Matches(&status)); // Check that the state is reset.
242*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->ApproximatelyMatches(&status));
243*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
244*993b0882SAndroid Build Coastguard Worker
245*993b0882SAndroid Build Coastguard Worker matcher = pattern->Matcher(UTF8ToUnicodeText("3a", /*do_copy=*/false));
246*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(matcher->Matches(&status));
247*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(matcher->ApproximatelyMatches(&status));
248*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
249*993b0882SAndroid Build Coastguard Worker }
250*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,RegexGroups)251*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, RegexGroups) {
252*993b0882SAndroid Build Coastguard Worker // The smiley face is a 4-byte UTF8 codepoint 0x1F60B, and it's important to
253*993b0882SAndroid Build Coastguard Worker // test the regex functionality with it to verify we are handling the indices
254*993b0882SAndroid Build Coastguard Worker // correctly.
255*993b0882SAndroid Build Coastguard Worker const UnicodeText regex_pattern =
256*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("([0-9])([0-9]+)", /*do_copy=*/false);
257*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexPattern> pattern =
258*993b0882SAndroid Build Coastguard Worker unilib_->CreateRegexPattern(regex_pattern);
259*993b0882SAndroid Build Coastguard Worker int status;
260*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexMatcher> matcher;
261*993b0882SAndroid Build Coastguard Worker
262*993b0882SAndroid Build Coastguard Worker matcher = pattern->Matcher(
263*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("hello 0123 world", /*do_copy=*/false));
264*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->Find(&status));
265*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
266*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Start(0, &status), 8);
267*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
268*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Start(1, &status), 8);
269*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
270*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Start(2, &status), 9);
271*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
272*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->End(0, &status), 13);
273*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
274*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->End(1, &status), 9);
275*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
276*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->End(2, &status), 12);
277*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
278*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "0123");
279*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
280*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(1, &status).ToUTF8String(), "0");
281*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
282*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(2, &status).ToUTF8String(), "123");
283*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
284*993b0882SAndroid Build Coastguard Worker }
285*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,RegexGroupsNotAllGroupsInvolved)286*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, RegexGroupsNotAllGroupsInvolved) {
287*993b0882SAndroid Build Coastguard Worker const UnicodeText regex_pattern =
288*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("([0-9])([a-z])?", /*do_copy=*/false);
289*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexPattern> pattern =
290*993b0882SAndroid Build Coastguard Worker unilib_->CreateRegexPattern(regex_pattern);
291*993b0882SAndroid Build Coastguard Worker int status;
292*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexMatcher> matcher;
293*993b0882SAndroid Build Coastguard Worker
294*993b0882SAndroid Build Coastguard Worker matcher = pattern->Matcher(UTF8ToUnicodeText("7", /*do_copy=*/false));
295*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->Find(&status));
296*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
297*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "7");
298*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
299*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(1, &status).ToUTF8String(), "7");
300*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
301*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(2, &status).ToUTF8String(), "");
302*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
303*993b0882SAndroid Build Coastguard Worker }
304*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,RegexGroupsEmptyResult)305*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, RegexGroupsEmptyResult) {
306*993b0882SAndroid Build Coastguard Worker const UnicodeText regex_pattern =
307*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("(.*)", /*do_copy=*/false);
308*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexPattern> pattern =
309*993b0882SAndroid Build Coastguard Worker unilib_->CreateRegexPattern(regex_pattern);
310*993b0882SAndroid Build Coastguard Worker int status;
311*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::RegexMatcher> matcher;
312*993b0882SAndroid Build Coastguard Worker
313*993b0882SAndroid Build Coastguard Worker matcher = pattern->Matcher(UTF8ToUnicodeText("", /*do_copy=*/false));
314*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(matcher->Find(&status));
315*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
316*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "");
317*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
318*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(matcher->Group(1, &status).ToUTF8String(), "");
319*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
320*993b0882SAndroid Build Coastguard Worker }
321*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,BreakIterator)322*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, BreakIterator) {
323*993b0882SAndroid Build Coastguard Worker const UnicodeText text = UTF8ToUnicodeText("some text", /*do_copy=*/false);
324*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::BreakIterator> iterator =
325*993b0882SAndroid Build Coastguard Worker unilib_->CreateBreakIterator(text);
326*993b0882SAndroid Build Coastguard Worker std::vector<int> break_indices;
327*993b0882SAndroid Build Coastguard Worker int break_index = 0;
328*993b0882SAndroid Build Coastguard Worker while ((break_index = iterator->Next()) != UniLib::BreakIterator::kDone) {
329*993b0882SAndroid Build Coastguard Worker break_indices.push_back(break_index);
330*993b0882SAndroid Build Coastguard Worker }
331*993b0882SAndroid Build Coastguard Worker EXPECT_THAT(break_indices, ElementsAre(4, 5, 9));
332*993b0882SAndroid Build Coastguard Worker }
333*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,BreakIterator4ByteUTF8)334*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, BreakIterator4ByteUTF8) {
335*993b0882SAndroid Build Coastguard Worker const UnicodeText text = UTF8ToUnicodeText("", /*do_copy=*/false);
336*993b0882SAndroid Build Coastguard Worker std::unique_ptr<UniLib::BreakIterator> iterator =
337*993b0882SAndroid Build Coastguard Worker unilib_->CreateBreakIterator(text);
338*993b0882SAndroid Build Coastguard Worker std::vector<int> break_indices;
339*993b0882SAndroid Build Coastguard Worker int break_index = 0;
340*993b0882SAndroid Build Coastguard Worker while ((break_index = iterator->Next()) != UniLib::BreakIterator::kDone) {
341*993b0882SAndroid Build Coastguard Worker break_indices.push_back(break_index);
342*993b0882SAndroid Build Coastguard Worker }
343*993b0882SAndroid Build Coastguard Worker EXPECT_THAT(break_indices, ElementsAre(1, 2, 3));
344*993b0882SAndroid Build Coastguard Worker }
345*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer32Parse)346*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer32Parse) {
347*993b0882SAndroid Build Coastguard Worker int result;
348*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseInt32(UTF8ToUnicodeText("123", /*do_copy=*/false),
349*993b0882SAndroid Build Coastguard Worker &result));
350*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 123);
351*993b0882SAndroid Build Coastguard Worker }
352*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer32ParseFloatNumber)353*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer32ParseFloatNumber) {
354*993b0882SAndroid Build Coastguard Worker int result;
355*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt32(UTF8ToUnicodeText("12.3", /*do_copy=*/false),
356*993b0882SAndroid Build Coastguard Worker &result));
357*993b0882SAndroid Build Coastguard Worker }
358*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer32ParseLongNumber)359*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer32ParseLongNumber) {
360*993b0882SAndroid Build Coastguard Worker int32 result;
361*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseInt32(
362*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("1000000000", /*do_copy=*/false), &result));
363*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 1000000000);
364*993b0882SAndroid Build Coastguard Worker }
365*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer32ParseOverflowNumber)366*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer32ParseOverflowNumber) {
367*993b0882SAndroid Build Coastguard Worker int32 result;
368*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt32(
369*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("9123456789", /*do_copy=*/false), &result));
370*993b0882SAndroid Build Coastguard Worker }
371*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer32ParseEmptyString)372*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer32ParseEmptyString) {
373*993b0882SAndroid Build Coastguard Worker int result;
374*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(
375*993b0882SAndroid Build Coastguard Worker unilib_->ParseInt32(UTF8ToUnicodeText("", /*do_copy=*/false), &result));
376*993b0882SAndroid Build Coastguard Worker }
377*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer32ParseFullWidth)378*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer32ParseFullWidth) {
379*993b0882SAndroid Build Coastguard Worker int result;
380*993b0882SAndroid Build Coastguard Worker // The input string here is full width
381*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseInt32(
382*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("123", /*do_copy=*/false), &result));
383*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 123);
384*993b0882SAndroid Build Coastguard Worker }
385*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer32ParseNotNumber)386*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer32ParseNotNumber) {
387*993b0882SAndroid Build Coastguard Worker int result;
388*993b0882SAndroid Build Coastguard Worker // The input string here is full width
389*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt32(
390*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("1a3", /*do_copy=*/false), &result));
391*993b0882SAndroid Build Coastguard Worker // Strings starting with "nan" are not numbers.
392*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt32(UTF8ToUnicodeText("Nancy",
393*993b0882SAndroid Build Coastguard Worker /*do_copy=*/false),
394*993b0882SAndroid Build Coastguard Worker &result));
395*993b0882SAndroid Build Coastguard Worker // Strings starting with "inf" are not numbers
396*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt32(
397*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("Information", /*do_copy=*/false), &result));
398*993b0882SAndroid Build Coastguard Worker }
399*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer64Parse)400*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer64Parse) {
401*993b0882SAndroid Build Coastguard Worker int64 result;
402*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseInt64(UTF8ToUnicodeText("123", /*do_copy=*/false),
403*993b0882SAndroid Build Coastguard Worker &result));
404*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 123);
405*993b0882SAndroid Build Coastguard Worker }
406*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer64ParseFloatNumber)407*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer64ParseFloatNumber) {
408*993b0882SAndroid Build Coastguard Worker int64 result;
409*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt64(UTF8ToUnicodeText("12.3", /*do_copy=*/false),
410*993b0882SAndroid Build Coastguard Worker &result));
411*993b0882SAndroid Build Coastguard Worker }
412*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer64ParseLongNumber)413*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer64ParseLongNumber) {
414*993b0882SAndroid Build Coastguard Worker int64 result;
415*993b0882SAndroid Build Coastguard Worker // The limitation comes from the javaicu implementation: parseDouble does not
416*993b0882SAndroid Build Coastguard Worker // have ICU support and parseInt limit the size of the number.
417*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseInt64(
418*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("1000000000", /*do_copy=*/false), &result));
419*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 1000000000);
420*993b0882SAndroid Build Coastguard Worker }
421*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer64ParseOverflowNumber)422*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer64ParseOverflowNumber) {
423*993b0882SAndroid Build Coastguard Worker int64 result;
424*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt64(
425*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("92233720368547758099", /*do_copy=*/false), &result));
426*993b0882SAndroid Build Coastguard Worker }
427*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer64ParseOverflowNegativeNumber)428*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer64ParseOverflowNegativeNumber) {
429*993b0882SAndroid Build Coastguard Worker int64 result;
430*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt64(
431*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("-92233720368547758099", /*do_copy=*/false), &result));
432*993b0882SAndroid Build Coastguard Worker }
433*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer64ParseEmptyString)434*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer64ParseEmptyString) {
435*993b0882SAndroid Build Coastguard Worker int64 result;
436*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(
437*993b0882SAndroid Build Coastguard Worker unilib_->ParseInt64(UTF8ToUnicodeText("", /*do_copy=*/false), &result));
438*993b0882SAndroid Build Coastguard Worker }
439*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer64ParseFullWidth)440*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer64ParseFullWidth) {
441*993b0882SAndroid Build Coastguard Worker int64 result;
442*993b0882SAndroid Build Coastguard Worker // The input string here is full width
443*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseInt64(
444*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("123", /*do_copy=*/false), &result));
445*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 123);
446*993b0882SAndroid Build Coastguard Worker }
447*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Integer64ParseNotNumber)448*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Integer64ParseNotNumber) {
449*993b0882SAndroid Build Coastguard Worker int64 result;
450*993b0882SAndroid Build Coastguard Worker // The input string here is full width
451*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt64(
452*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("1a4", /*do_copy=*/false), &result));
453*993b0882SAndroid Build Coastguard Worker // Strings starting with "nan" are not numbers.
454*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt64(UTF8ToUnicodeText("Nancy",
455*993b0882SAndroid Build Coastguard Worker /*do_copy=*/false),
456*993b0882SAndroid Build Coastguard Worker &result));
457*993b0882SAndroid Build Coastguard Worker // Strings starting with "inf" are not numbers
458*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseInt64(
459*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("Information", /*do_copy=*/false), &result));
460*993b0882SAndroid Build Coastguard Worker }
461*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParse)462*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParse) {
463*993b0882SAndroid Build Coastguard Worker double result;
464*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseDouble(UTF8ToUnicodeText("1.23", /*do_copy=*/false),
465*993b0882SAndroid Build Coastguard Worker &result));
466*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 1.23);
467*993b0882SAndroid Build Coastguard Worker }
468*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParseLongNumber)469*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParseLongNumber) {
470*993b0882SAndroid Build Coastguard Worker double result;
471*993b0882SAndroid Build Coastguard Worker // The limitation comes from the javaicu implementation: parseDouble does not
472*993b0882SAndroid Build Coastguard Worker // have ICU support and parseInt limit the size of the number.
473*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseDouble(
474*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("999999999.999999999", /*do_copy=*/false), &result));
475*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 999999999.999999999);
476*993b0882SAndroid Build Coastguard Worker }
477*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParseWithoutFractionalPart)478*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParseWithoutFractionalPart) {
479*993b0882SAndroid Build Coastguard Worker double result;
480*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseDouble(UTF8ToUnicodeText("123", /*do_copy=*/false),
481*993b0882SAndroid Build Coastguard Worker &result));
482*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 123);
483*993b0882SAndroid Build Coastguard Worker }
484*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParseEmptyString)485*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParseEmptyString) {
486*993b0882SAndroid Build Coastguard Worker double result;
487*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(
488*993b0882SAndroid Build Coastguard Worker unilib_->ParseDouble(UTF8ToUnicodeText("", /*do_copy=*/false), &result));
489*993b0882SAndroid Build Coastguard Worker }
490*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParsePrecedingDot)491*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParsePrecedingDot) {
492*993b0882SAndroid Build Coastguard Worker double result;
493*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseDouble(
494*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText(".123", /*do_copy=*/false), &result));
495*993b0882SAndroid Build Coastguard Worker }
496*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParseLeadingDot)497*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParseLeadingDot) {
498*993b0882SAndroid Build Coastguard Worker double result;
499*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseDouble(
500*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("123.", /*do_copy=*/false), &result));
501*993b0882SAndroid Build Coastguard Worker }
502*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParseMultipleDots)503*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParseMultipleDots) {
504*993b0882SAndroid Build Coastguard Worker double result;
505*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseDouble(
506*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("1.2.3", /*do_copy=*/false), &result));
507*993b0882SAndroid Build Coastguard Worker }
508*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParseFullWidth)509*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParseFullWidth) {
510*993b0882SAndroid Build Coastguard Worker double result;
511*993b0882SAndroid Build Coastguard Worker // The input string here is full width
512*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(unilib_->ParseDouble(
513*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("1.23", /*do_copy=*/false), &result));
514*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(result, 1.23);
515*993b0882SAndroid Build Coastguard Worker }
516*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,DoubleParseNotNumber)517*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, DoubleParseNotNumber) {
518*993b0882SAndroid Build Coastguard Worker double result;
519*993b0882SAndroid Build Coastguard Worker // The input string here is full width
520*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseDouble(
521*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("1a5", /*do_copy=*/false), &result));
522*993b0882SAndroid Build Coastguard Worker // Strings starting with "nan" are not numbers.
523*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseDouble(
524*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("Nancy", /*do_copy=*/false), &result));
525*993b0882SAndroid Build Coastguard Worker // Strings starting with "inf" are not numbers
526*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(unilib_->ParseDouble(
527*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("Information", /*do_copy=*/false), &result));
528*993b0882SAndroid Build Coastguard Worker }
529*993b0882SAndroid Build Coastguard Worker
TEST_F(UniLibTest,Length)530*993b0882SAndroid Build Coastguard Worker TEST_F(UniLibTest, Length) {
531*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->Length(UTF8ToUnicodeText("hello", /*do_copy=*/false))
532*993b0882SAndroid Build Coastguard Worker .ValueOrDie(),
533*993b0882SAndroid Build Coastguard Worker 5);
534*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(unilib_->Length(UTF8ToUnicodeText("ěščřž", /*do_copy=*/false))
535*993b0882SAndroid Build Coastguard Worker .ValueOrDie(),
536*993b0882SAndroid Build Coastguard Worker 5);
537*993b0882SAndroid Build Coastguard Worker // Test Invalid UTF8.
538*993b0882SAndroid Build Coastguard Worker // This testing condition needs to be != 1, as Apple character counting seems
539*993b0882SAndroid Build Coastguard Worker // to return 0 when the input is invalid UTF8, while ICU will treat the
540*993b0882SAndroid Build Coastguard Worker // invalid codepoint as 3 separate bytes.
541*993b0882SAndroid Build Coastguard Worker EXPECT_NE(
542*993b0882SAndroid Build Coastguard Worker unilib_->Length(UTF8ToUnicodeText("\xed\xa0\x80", /*do_copy=*/false))
543*993b0882SAndroid Build Coastguard Worker .ValueOrDie(),
544*993b0882SAndroid Build Coastguard Worker 1);
545*993b0882SAndroid Build Coastguard Worker }
546*993b0882SAndroid Build Coastguard Worker
547*993b0882SAndroid Build Coastguard Worker } // namespace test_internal
548*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3
549