1*993b0882SAndroid Build Coastguard Worker /*
2*993b0882SAndroid Build Coastguard Worker * Copyright (C) 2018 The Android Open Source Project
3*993b0882SAndroid Build Coastguard Worker *
4*993b0882SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*993b0882SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*993b0882SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*993b0882SAndroid Build Coastguard Worker *
8*993b0882SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*993b0882SAndroid Build Coastguard Worker *
10*993b0882SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*993b0882SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*993b0882SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*993b0882SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*993b0882SAndroid Build Coastguard Worker * limitations under the License.
15*993b0882SAndroid Build Coastguard Worker */
16*993b0882SAndroid Build Coastguard Worker
17*993b0882SAndroid Build Coastguard Worker #include "utils/utf8/unicodetext.h"
18*993b0882SAndroid Build Coastguard Worker
19*993b0882SAndroid Build Coastguard Worker #include "utils/strings/stringpiece.h"
20*993b0882SAndroid Build Coastguard Worker #include "gtest/gtest.h"
21*993b0882SAndroid Build Coastguard Worker
22*993b0882SAndroid Build Coastguard Worker namespace libtextclassifier3 {
23*993b0882SAndroid Build Coastguard Worker namespace {
24*993b0882SAndroid Build Coastguard Worker
25*993b0882SAndroid Build Coastguard Worker class UnicodeTextTest : public testing::Test {
26*993b0882SAndroid Build Coastguard Worker protected:
UnicodeTextTest()27*993b0882SAndroid Build Coastguard Worker UnicodeTextTest() : empty_text_() {
28*993b0882SAndroid Build Coastguard Worker text_.push_back(0x1C0);
29*993b0882SAndroid Build Coastguard Worker text_.push_back(0x4E8C);
30*993b0882SAndroid Build Coastguard Worker text_.push_back(0xD7DB);
31*993b0882SAndroid Build Coastguard Worker text_.push_back(0x34);
32*993b0882SAndroid Build Coastguard Worker text_.push_back(0x1D11E);
33*993b0882SAndroid Build Coastguard Worker }
34*993b0882SAndroid Build Coastguard Worker
35*993b0882SAndroid Build Coastguard Worker UnicodeText empty_text_;
36*993b0882SAndroid Build Coastguard Worker UnicodeText text_;
37*993b0882SAndroid Build Coastguard Worker };
38*993b0882SAndroid Build Coastguard Worker
TEST(UnicodeTextTest,ConstructionFromUnicodeText)39*993b0882SAndroid Build Coastguard Worker TEST(UnicodeTextTest, ConstructionFromUnicodeText) {
40*993b0882SAndroid Build Coastguard Worker UnicodeText text = UTF8ToUnicodeText("1234hello", /*do_copy=*/false);
41*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(UnicodeText(text).ToUTF8String(), "1234hello");
42*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(UnicodeText(text, /*do_copy=*/false).ToUTF8String(), "1234hello");
43*993b0882SAndroid Build Coastguard Worker }
44*993b0882SAndroid Build Coastguard Worker
45*993b0882SAndroid Build Coastguard Worker // Tests for our modifications of UnicodeText.
TEST(UnicodeTextTest,Custom)46*993b0882SAndroid Build Coastguard Worker TEST(UnicodeTextTest, Custom) {
47*993b0882SAndroid Build Coastguard Worker UnicodeText text = UTF8ToUnicodeText("1234hello", /*do_copy=*/false);
48*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(text.ToUTF8String(), "1234hello");
49*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(text.size_codepoints(), 10);
50*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(text.size_bytes(), 13);
51*993b0882SAndroid Build Coastguard Worker
52*993b0882SAndroid Build Coastguard Worker auto it_begin = text.begin();
53*993b0882SAndroid Build Coastguard Worker std::advance(it_begin, 4);
54*993b0882SAndroid Build Coastguard Worker auto it_end = text.begin();
55*993b0882SAndroid Build Coastguard Worker std::advance(it_end, 6);
56*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "h");
57*993b0882SAndroid Build Coastguard Worker }
58*993b0882SAndroid Build Coastguard Worker
TEST(UnicodeTextTest,StringPieceView)59*993b0882SAndroid Build Coastguard Worker TEST(UnicodeTextTest, StringPieceView) {
60*993b0882SAndroid Build Coastguard Worker std::string raw_text = "1234hello";
61*993b0882SAndroid Build Coastguard Worker UnicodeText text =
62*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText(StringPiece(raw_text), /*do_copy=*/false);
63*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(text.ToUTF8String(), "1234hello");
64*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(text.size_codepoints(), 10);
65*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(text.size_bytes(), 13);
66*993b0882SAndroid Build Coastguard Worker
67*993b0882SAndroid Build Coastguard Worker auto it_begin = text.begin();
68*993b0882SAndroid Build Coastguard Worker std::advance(it_begin, 4);
69*993b0882SAndroid Build Coastguard Worker auto it_end = text.begin();
70*993b0882SAndroid Build Coastguard Worker std::advance(it_end, 6);
71*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "h");
72*993b0882SAndroid Build Coastguard Worker }
73*993b0882SAndroid Build Coastguard Worker
TEST(UnicodeTextTest,Substring)74*993b0882SAndroid Build Coastguard Worker TEST(UnicodeTextTest, Substring) {
75*993b0882SAndroid Build Coastguard Worker UnicodeText text = UTF8ToUnicodeText("1234hello", /*do_copy=*/false);
76*993b0882SAndroid Build Coastguard Worker
77*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(
78*993b0882SAndroid Build Coastguard Worker UnicodeText::Substring(std::next(text.begin(), 4),
79*993b0882SAndroid Build Coastguard Worker std::next(text.begin(), 6), /*do_copy=*/true),
80*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("h"));
81*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(
82*993b0882SAndroid Build Coastguard Worker UnicodeText::Substring(std::next(text.begin(), 4),
83*993b0882SAndroid Build Coastguard Worker std::next(text.begin(), 6), /*do_copy=*/false),
84*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("h"));
85*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/true),
86*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("h"));
87*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/false),
88*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("h"));
89*993b0882SAndroid Build Coastguard Worker }
90*993b0882SAndroid Build Coastguard Worker
TEST(UnicodeTextTest,Ownership)91*993b0882SAndroid Build Coastguard Worker TEST(UnicodeTextTest, Ownership) {
92*993b0882SAndroid Build Coastguard Worker const std::string src = "\u304A\u00B0\u106B";
93*993b0882SAndroid Build Coastguard Worker
94*993b0882SAndroid Build Coastguard Worker UnicodeText alias;
95*993b0882SAndroid Build Coastguard Worker alias.PointToUTF8(src.data(), src.size());
96*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(alias.data(), src.data());
97*993b0882SAndroid Build Coastguard Worker UnicodeText::const_iterator it = alias.begin();
98*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(*it++, 0x304A);
99*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(*it++, 0x00B0);
100*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(*it++, 0x106B);
101*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(it, alias.end());
102*993b0882SAndroid Build Coastguard Worker
103*993b0882SAndroid Build Coastguard Worker UnicodeText t = alias; // Copy initialization copies the data.
104*993b0882SAndroid Build Coastguard Worker EXPECT_NE(t.data(), alias.data());
105*993b0882SAndroid Build Coastguard Worker }
106*993b0882SAndroid Build Coastguard Worker
TEST(UnicodeTextTest,Validation)107*993b0882SAndroid Build Coastguard Worker TEST(UnicodeTextTest, Validation) {
108*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(UTF8ToUnicodeText("1234hello", /*do_copy=*/false).is_valid());
109*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(
110*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("\u304A\u00B0\u106B", /*do_copy=*/false).is_valid());
111*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(
112*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("this is a test", /*do_copy=*/false).is_valid());
113*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(
114*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("\xf0\x9f\x98\x8b", /*do_copy=*/false).is_valid());
115*993b0882SAndroid Build Coastguard Worker // Too short (string is too short).
116*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(UTF8ToUnicodeText("\xf0\x9f", /*do_copy=*/false).is_valid());
117*993b0882SAndroid Build Coastguard Worker // Too long (too many trailing bytes).
118*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(
119*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("\xf0\x9f\x98\x8b\x8b", /*do_copy=*/false).is_valid());
120*993b0882SAndroid Build Coastguard Worker // Too short (too few trailing bytes).
121*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(
122*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("\xf0\x9f\x98\x61\x61", /*do_copy=*/false).is_valid());
123*993b0882SAndroid Build Coastguard Worker // Invalid with context.
124*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(
125*993b0882SAndroid Build Coastguard Worker UTF8ToUnicodeText("hello \xf0\x9f\x98\x61\x61 world1", /*do_copy=*/false)
126*993b0882SAndroid Build Coastguard Worker .is_valid());
127*993b0882SAndroid Build Coastguard Worker }
128*993b0882SAndroid Build Coastguard Worker
129*993b0882SAndroid Build Coastguard Worker class IteratorTest : public UnicodeTextTest {};
130*993b0882SAndroid Build Coastguard Worker
TEST_F(IteratorTest,Iterates)131*993b0882SAndroid Build Coastguard Worker TEST_F(IteratorTest, Iterates) {
132*993b0882SAndroid Build Coastguard Worker UnicodeText::const_iterator iter = text_.begin();
133*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x1C0, *iter);
134*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(&iter, &++iter); // operator++ returns *this.
135*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x4E8C, *iter++);
136*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0xD7DB, *iter);
137*993b0882SAndroid Build Coastguard Worker // Make sure you can dereference more than once.
138*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0xD7DB, *iter);
139*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x34, *++iter);
140*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x1D11E, *++iter);
141*993b0882SAndroid Build Coastguard Worker ASSERT_TRUE(iter != text_.end());
142*993b0882SAndroid Build Coastguard Worker iter++;
143*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(iter == text_.end());
144*993b0882SAndroid Build Coastguard Worker }
145*993b0882SAndroid Build Coastguard Worker
TEST_F(IteratorTest,MultiPass)146*993b0882SAndroid Build Coastguard Worker TEST_F(IteratorTest, MultiPass) {
147*993b0882SAndroid Build Coastguard Worker // Also tests Default Constructible and Assignable.
148*993b0882SAndroid Build Coastguard Worker UnicodeText::const_iterator i1, i2;
149*993b0882SAndroid Build Coastguard Worker i1 = text_.begin();
150*993b0882SAndroid Build Coastguard Worker i2 = i1;
151*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x4E8C, *++i1);
152*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(i1 != i2);
153*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x1C0, *i2);
154*993b0882SAndroid Build Coastguard Worker ++i2;
155*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(i1 == i2);
156*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x4E8C, *i2);
157*993b0882SAndroid Build Coastguard Worker }
158*993b0882SAndroid Build Coastguard Worker
TEST_F(IteratorTest,ReverseIterates)159*993b0882SAndroid Build Coastguard Worker TEST_F(IteratorTest, ReverseIterates) {
160*993b0882SAndroid Build Coastguard Worker UnicodeText::const_iterator iter = text_.end();
161*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(iter == text_.end());
162*993b0882SAndroid Build Coastguard Worker iter--;
163*993b0882SAndroid Build Coastguard Worker ASSERT_TRUE(iter != text_.end());
164*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x1D11E, *iter--);
165*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x34, *iter);
166*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0xD7DB, *--iter);
167*993b0882SAndroid Build Coastguard Worker // Make sure you can dereference more than once.
168*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0xD7DB, *iter);
169*993b0882SAndroid Build Coastguard Worker --iter;
170*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x4E8C, *iter--);
171*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x1C0, *iter);
172*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(iter == text_.begin());
173*993b0882SAndroid Build Coastguard Worker }
174*993b0882SAndroid Build Coastguard Worker
TEST_F(IteratorTest,Comparable)175*993b0882SAndroid Build Coastguard Worker TEST_F(IteratorTest, Comparable) {
176*993b0882SAndroid Build Coastguard Worker UnicodeText::const_iterator i1, i2;
177*993b0882SAndroid Build Coastguard Worker i1 = text_.begin();
178*993b0882SAndroid Build Coastguard Worker i2 = i1;
179*993b0882SAndroid Build Coastguard Worker ++i2;
180*993b0882SAndroid Build Coastguard Worker
181*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(i1 < i2);
182*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(text_.begin() <= i1);
183*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(i1 >= i2);
184*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(i1 > text_.end());
185*993b0882SAndroid Build Coastguard Worker }
186*993b0882SAndroid Build Coastguard Worker
TEST_F(IteratorTest,Advance)187*993b0882SAndroid Build Coastguard Worker TEST_F(IteratorTest, Advance) {
188*993b0882SAndroid Build Coastguard Worker UnicodeText::const_iterator iter = text_.begin();
189*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x1C0, *iter);
190*993b0882SAndroid Build Coastguard Worker std::advance(iter, 4);
191*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0x1D11E, *iter);
192*993b0882SAndroid Build Coastguard Worker ++iter;
193*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(iter == text_.end());
194*993b0882SAndroid Build Coastguard Worker }
195*993b0882SAndroid Build Coastguard Worker
TEST_F(IteratorTest,Distance)196*993b0882SAndroid Build Coastguard Worker TEST_F(IteratorTest, Distance) {
197*993b0882SAndroid Build Coastguard Worker UnicodeText::const_iterator iter = text_.begin();
198*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0, std::distance(text_.begin(), iter));
199*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(5, std::distance(iter, text_.end()));
200*993b0882SAndroid Build Coastguard Worker ++iter;
201*993b0882SAndroid Build Coastguard Worker ++iter;
202*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(2, std::distance(text_.begin(), iter));
203*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(3, std::distance(iter, text_.end()));
204*993b0882SAndroid Build Coastguard Worker ++iter;
205*993b0882SAndroid Build Coastguard Worker ++iter;
206*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(4, std::distance(text_.begin(), iter));
207*993b0882SAndroid Build Coastguard Worker ++iter;
208*993b0882SAndroid Build Coastguard Worker EXPECT_EQ(0, std::distance(iter, text_.end()));
209*993b0882SAndroid Build Coastguard Worker }
210*993b0882SAndroid Build Coastguard Worker
211*993b0882SAndroid Build Coastguard Worker class OperatorTest : public UnicodeTextTest {};
212*993b0882SAndroid Build Coastguard Worker
TEST_F(OperatorTest,Clear)213*993b0882SAndroid Build Coastguard Worker TEST_F(OperatorTest, Clear) {
214*993b0882SAndroid Build Coastguard Worker UnicodeText empty_text(UTF8ToUnicodeText("", /*do_copy=*/false));
215*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(text_ == empty_text);
216*993b0882SAndroid Build Coastguard Worker text_.clear();
217*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(text_ == empty_text);
218*993b0882SAndroid Build Coastguard Worker }
219*993b0882SAndroid Build Coastguard Worker
TEST_F(OperatorTest,Empty)220*993b0882SAndroid Build Coastguard Worker TEST_F(OperatorTest, Empty) {
221*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(empty_text_.empty());
222*993b0882SAndroid Build Coastguard Worker EXPECT_FALSE(text_.empty());
223*993b0882SAndroid Build Coastguard Worker text_.clear();
224*993b0882SAndroid Build Coastguard Worker EXPECT_TRUE(text_.empty());
225*993b0882SAndroid Build Coastguard Worker }
226*993b0882SAndroid Build Coastguard Worker
227*993b0882SAndroid Build Coastguard Worker } // namespace
228*993b0882SAndroid Build Coastguard Worker } // namespace libtextclassifier3
229