1*635a8641SAndroid Build Coastguard Worker // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file.
4*635a8641SAndroid Build Coastguard Worker
5*635a8641SAndroid Build Coastguard Worker #include <stddef.h>
6*635a8641SAndroid Build Coastguard Worker
7*635a8641SAndroid Build Coastguard Worker #include <string>
8*635a8641SAndroid Build Coastguard Worker
9*635a8641SAndroid Build Coastguard Worker #include "base/i18n/rtl.h"
10*635a8641SAndroid Build Coastguard Worker #include "base/i18n/string_search.h"
11*635a8641SAndroid Build Coastguard Worker #include "base/strings/string16.h"
12*635a8641SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
13*635a8641SAndroid Build Coastguard Worker #include "testing/gtest/include/gtest/gtest.h"
14*635a8641SAndroid Build Coastguard Worker #include "third_party/icu/source/i18n/unicode/usearch.h"
15*635a8641SAndroid Build Coastguard Worker
16*635a8641SAndroid Build Coastguard Worker namespace base {
17*635a8641SAndroid Build Coastguard Worker namespace i18n {
18*635a8641SAndroid Build Coastguard Worker
19*635a8641SAndroid Build Coastguard Worker // Note on setting default locale for testing: The current default locale on
20*635a8641SAndroid Build Coastguard Worker // the Mac trybot is en_US_POSIX, with which primary-level collation strength
21*635a8641SAndroid Build Coastguard Worker // string search is case-sensitive, when normally it should be
22*635a8641SAndroid Build Coastguard Worker // case-insensitive. In other locales (including en_US which English speakers
23*635a8641SAndroid Build Coastguard Worker // in the U.S. use), this search would be case-insensitive as expected.
24*635a8641SAndroid Build Coastguard Worker
TEST(StringSearchTest,ASCII)25*635a8641SAndroid Build Coastguard Worker TEST(StringSearchTest, ASCII) {
26*635a8641SAndroid Build Coastguard Worker std::string default_locale(uloc_getDefault());
27*635a8641SAndroid Build Coastguard Worker bool locale_is_posix = (default_locale == "en_US_POSIX");
28*635a8641SAndroid Build Coastguard Worker if (locale_is_posix)
29*635a8641SAndroid Build Coastguard Worker SetICUDefaultLocale("en_US");
30*635a8641SAndroid Build Coastguard Worker
31*635a8641SAndroid Build Coastguard Worker size_t index = 0;
32*635a8641SAndroid Build Coastguard Worker size_t length = 0;
33*635a8641SAndroid Build Coastguard Worker
34*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
35*635a8641SAndroid Build Coastguard Worker ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
36*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
37*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(5U, length);
38*635a8641SAndroid Build Coastguard Worker
39*635a8641SAndroid Build Coastguard Worker EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
40*635a8641SAndroid Build Coastguard Worker ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),
41*635a8641SAndroid Build Coastguard Worker &index, &length));
42*635a8641SAndroid Build Coastguard Worker
43*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
44*635a8641SAndroid Build Coastguard Worker ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
45*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(4U, index);
46*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(6U, length);
47*635a8641SAndroid Build Coastguard Worker
48*635a8641SAndroid Build Coastguard Worker EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
49*635a8641SAndroid Build Coastguard Worker ASCIIToUTF16("searching within empty string"), string16(),
50*635a8641SAndroid Build Coastguard Worker &index, &length));
51*635a8641SAndroid Build Coastguard Worker
52*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
53*635a8641SAndroid Build Coastguard Worker string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
54*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
55*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, length);
56*635a8641SAndroid Build Coastguard Worker
57*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
58*635a8641SAndroid Build Coastguard Worker ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
59*635a8641SAndroid Build Coastguard Worker &index, &length));
60*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
61*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(18U, length);
62*635a8641SAndroid Build Coastguard Worker
63*635a8641SAndroid Build Coastguard Worker if (locale_is_posix)
64*635a8641SAndroid Build Coastguard Worker SetICUDefaultLocale(default_locale.data());
65*635a8641SAndroid Build Coastguard Worker }
66*635a8641SAndroid Build Coastguard Worker
TEST(StringSearchTest,UnicodeLocaleIndependent)67*635a8641SAndroid Build Coastguard Worker TEST(StringSearchTest, UnicodeLocaleIndependent) {
68*635a8641SAndroid Build Coastguard Worker // Base characters
69*635a8641SAndroid Build Coastguard Worker const string16 e_base = WideToUTF16(L"e");
70*635a8641SAndroid Build Coastguard Worker const string16 E_base = WideToUTF16(L"E");
71*635a8641SAndroid Build Coastguard Worker const string16 a_base = WideToUTF16(L"a");
72*635a8641SAndroid Build Coastguard Worker
73*635a8641SAndroid Build Coastguard Worker // Composed characters
74*635a8641SAndroid Build Coastguard Worker const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
75*635a8641SAndroid Build Coastguard Worker const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
76*635a8641SAndroid Build Coastguard Worker const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
77*635a8641SAndroid Build Coastguard Worker const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
78*635a8641SAndroid Build Coastguard Worker const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
79*635a8641SAndroid Build Coastguard Worker
80*635a8641SAndroid Build Coastguard Worker // Decomposed characters
81*635a8641SAndroid Build Coastguard Worker const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
82*635a8641SAndroid Build Coastguard Worker const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
83*635a8641SAndroid Build Coastguard Worker const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
84*635a8641SAndroid Build Coastguard Worker const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
85*635a8641SAndroid Build Coastguard Worker const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
86*635a8641SAndroid Build Coastguard Worker
87*635a8641SAndroid Build Coastguard Worker std::string default_locale(uloc_getDefault());
88*635a8641SAndroid Build Coastguard Worker bool locale_is_posix = (default_locale == "en_US_POSIX");
89*635a8641SAndroid Build Coastguard Worker if (locale_is_posix)
90*635a8641SAndroid Build Coastguard Worker SetICUDefaultLocale("en_US");
91*635a8641SAndroid Build Coastguard Worker
92*635a8641SAndroid Build Coastguard Worker size_t index = 0;
93*635a8641SAndroid Build Coastguard Worker size_t length = 0;
94*635a8641SAndroid Build Coastguard Worker
95*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
96*635a8641SAndroid Build Coastguard Worker e_base, e_with_acute_accent, &index, &length));
97*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
98*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_accent.size(), length);
99*635a8641SAndroid Build Coastguard Worker
100*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
101*635a8641SAndroid Build Coastguard Worker e_with_acute_accent, e_base, &index, &length));
102*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
103*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_base.size(), length);
104*635a8641SAndroid Build Coastguard Worker
105*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
106*635a8641SAndroid Build Coastguard Worker e_base, e_with_acute_combining_mark, &index, &length));
107*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
108*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_combining_mark.size(), length);
109*635a8641SAndroid Build Coastguard Worker
110*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
111*635a8641SAndroid Build Coastguard Worker e_with_acute_combining_mark, e_base, &index, &length));
112*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
113*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_base.size(), length);
114*635a8641SAndroid Build Coastguard Worker
115*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
116*635a8641SAndroid Build Coastguard Worker e_with_acute_combining_mark, e_with_acute_accent,
117*635a8641SAndroid Build Coastguard Worker &index, &length));
118*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
119*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_accent.size(), length);
120*635a8641SAndroid Build Coastguard Worker
121*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
122*635a8641SAndroid Build Coastguard Worker e_with_acute_accent, e_with_acute_combining_mark,
123*635a8641SAndroid Build Coastguard Worker &index, &length));
124*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
125*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_combining_mark.size(), length);
126*635a8641SAndroid Build Coastguard Worker
127*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
128*635a8641SAndroid Build Coastguard Worker e_with_acute_combining_mark, e_with_grave_combining_mark,
129*635a8641SAndroid Build Coastguard Worker &index, &length));
130*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
131*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_grave_combining_mark.size(), length);
132*635a8641SAndroid Build Coastguard Worker
133*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
134*635a8641SAndroid Build Coastguard Worker e_with_grave_combining_mark, e_with_acute_combining_mark,
135*635a8641SAndroid Build Coastguard Worker &index, &length));
136*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
137*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_combining_mark.size(), length);
138*635a8641SAndroid Build Coastguard Worker
139*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
140*635a8641SAndroid Build Coastguard Worker e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
141*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
142*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_grave_accent.size(), length);
143*635a8641SAndroid Build Coastguard Worker
144*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
145*635a8641SAndroid Build Coastguard Worker e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
146*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
147*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_combining_mark.size(), length);
148*635a8641SAndroid Build Coastguard Worker
149*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
150*635a8641SAndroid Build Coastguard Worker E_with_acute_accent, e_with_acute_accent, &index, &length));
151*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
152*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_accent.size(), length);
153*635a8641SAndroid Build Coastguard Worker
154*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
155*635a8641SAndroid Build Coastguard Worker E_with_grave_accent, e_with_acute_accent, &index, &length));
156*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
157*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_accent.size(), length);
158*635a8641SAndroid Build Coastguard Worker
159*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
160*635a8641SAndroid Build Coastguard Worker E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
161*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
162*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_grave_accent.size(), length);
163*635a8641SAndroid Build Coastguard Worker
164*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
165*635a8641SAndroid Build Coastguard Worker E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
166*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
167*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_acute_accent.size(), length);
168*635a8641SAndroid Build Coastguard Worker
169*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
170*635a8641SAndroid Build Coastguard Worker E_base, e_with_grave_accent, &index, &length));
171*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
172*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(e_with_grave_accent.size(), length);
173*635a8641SAndroid Build Coastguard Worker
174*635a8641SAndroid Build Coastguard Worker EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
175*635a8641SAndroid Build Coastguard Worker a_with_acute_accent, e_with_acute_accent, &index, &length));
176*635a8641SAndroid Build Coastguard Worker
177*635a8641SAndroid Build Coastguard Worker EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
178*635a8641SAndroid Build Coastguard Worker a_with_acute_combining_mark, e_with_acute_combining_mark,
179*635a8641SAndroid Build Coastguard Worker &index, &length));
180*635a8641SAndroid Build Coastguard Worker
181*635a8641SAndroid Build Coastguard Worker if (locale_is_posix)
182*635a8641SAndroid Build Coastguard Worker SetICUDefaultLocale(default_locale.data());
183*635a8641SAndroid Build Coastguard Worker }
184*635a8641SAndroid Build Coastguard Worker
TEST(StringSearchTest,UnicodeLocaleDependent)185*635a8641SAndroid Build Coastguard Worker TEST(StringSearchTest, UnicodeLocaleDependent) {
186*635a8641SAndroid Build Coastguard Worker // Base characters
187*635a8641SAndroid Build Coastguard Worker const string16 a_base = WideToUTF16(L"a");
188*635a8641SAndroid Build Coastguard Worker
189*635a8641SAndroid Build Coastguard Worker // Composed characters
190*635a8641SAndroid Build Coastguard Worker const string16 a_with_ring = WideToUTF16(L"\u00e5");
191*635a8641SAndroid Build Coastguard Worker
192*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
193*635a8641SAndroid Build Coastguard Worker nullptr));
194*635a8641SAndroid Build Coastguard Worker
195*635a8641SAndroid Build Coastguard Worker const char* default_locale = uloc_getDefault();
196*635a8641SAndroid Build Coastguard Worker SetICUDefaultLocale("da");
197*635a8641SAndroid Build Coastguard Worker
198*635a8641SAndroid Build Coastguard Worker EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
199*635a8641SAndroid Build Coastguard Worker nullptr));
200*635a8641SAndroid Build Coastguard Worker
201*635a8641SAndroid Build Coastguard Worker SetICUDefaultLocale(default_locale);
202*635a8641SAndroid Build Coastguard Worker }
203*635a8641SAndroid Build Coastguard Worker
TEST(StringSearchTest,FixedPatternMultipleSearch)204*635a8641SAndroid Build Coastguard Worker TEST(StringSearchTest, FixedPatternMultipleSearch) {
205*635a8641SAndroid Build Coastguard Worker std::string default_locale(uloc_getDefault());
206*635a8641SAndroid Build Coastguard Worker bool locale_is_posix = (default_locale == "en_US_POSIX");
207*635a8641SAndroid Build Coastguard Worker if (locale_is_posix)
208*635a8641SAndroid Build Coastguard Worker SetICUDefaultLocale("en_US");
209*635a8641SAndroid Build Coastguard Worker
210*635a8641SAndroid Build Coastguard Worker size_t index = 0;
211*635a8641SAndroid Build Coastguard Worker size_t length = 0;
212*635a8641SAndroid Build Coastguard Worker
213*635a8641SAndroid Build Coastguard Worker // Search "hello" over multiple texts.
214*635a8641SAndroid Build Coastguard Worker FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
215*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
216*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(2U, index);
217*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(5U, length);
218*635a8641SAndroid Build Coastguard Worker EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
219*635a8641SAndroid Build Coastguard Worker EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
220*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(0U, index);
221*635a8641SAndroid Build Coastguard Worker EXPECT_EQ(5U, length);
222*635a8641SAndroid Build Coastguard Worker
223*635a8641SAndroid Build Coastguard Worker if (locale_is_posix)
224*635a8641SAndroid Build Coastguard Worker SetICUDefaultLocale(default_locale.data());
225*635a8641SAndroid Build Coastguard Worker }
226*635a8641SAndroid Build Coastguard Worker
227*635a8641SAndroid Build Coastguard Worker } // namespace i18n
228*635a8641SAndroid Build Coastguard Worker } // namespace base
229