xref: /aosp_15_r20/external/libchrome/base/i18n/string_search_unittest.cc (revision 635a864187cb8b6c713ff48b7e790a6b21769273)
1*635a8641SAndroid Build Coastguard Worker // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file.
4*635a8641SAndroid Build Coastguard Worker 
5*635a8641SAndroid Build Coastguard Worker #include <stddef.h>
6*635a8641SAndroid Build Coastguard Worker 
7*635a8641SAndroid Build Coastguard Worker #include <string>
8*635a8641SAndroid Build Coastguard Worker 
9*635a8641SAndroid Build Coastguard Worker #include "base/i18n/rtl.h"
10*635a8641SAndroid Build Coastguard Worker #include "base/i18n/string_search.h"
11*635a8641SAndroid Build Coastguard Worker #include "base/strings/string16.h"
12*635a8641SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
13*635a8641SAndroid Build Coastguard Worker #include "testing/gtest/include/gtest/gtest.h"
14*635a8641SAndroid Build Coastguard Worker #include "third_party/icu/source/i18n/unicode/usearch.h"
15*635a8641SAndroid Build Coastguard Worker 
16*635a8641SAndroid Build Coastguard Worker namespace base {
17*635a8641SAndroid Build Coastguard Worker namespace i18n {
18*635a8641SAndroid Build Coastguard Worker 
19*635a8641SAndroid Build Coastguard Worker // Note on setting default locale for testing: The current default locale on
20*635a8641SAndroid Build Coastguard Worker // the Mac trybot is en_US_POSIX, with which primary-level collation strength
21*635a8641SAndroid Build Coastguard Worker // string search is case-sensitive, when normally it should be
22*635a8641SAndroid Build Coastguard Worker // case-insensitive. In other locales (including en_US which English speakers
23*635a8641SAndroid Build Coastguard Worker // in the U.S. use), this search would be case-insensitive as expected.
24*635a8641SAndroid Build Coastguard Worker 
TEST(StringSearchTest,ASCII)25*635a8641SAndroid Build Coastguard Worker TEST(StringSearchTest, ASCII) {
26*635a8641SAndroid Build Coastguard Worker   std::string default_locale(uloc_getDefault());
27*635a8641SAndroid Build Coastguard Worker   bool locale_is_posix = (default_locale == "en_US_POSIX");
28*635a8641SAndroid Build Coastguard Worker   if (locale_is_posix)
29*635a8641SAndroid Build Coastguard Worker     SetICUDefaultLocale("en_US");
30*635a8641SAndroid Build Coastguard Worker 
31*635a8641SAndroid Build Coastguard Worker   size_t index = 0;
32*635a8641SAndroid Build Coastguard Worker   size_t length = 0;
33*635a8641SAndroid Build Coastguard Worker 
34*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
35*635a8641SAndroid Build Coastguard Worker       ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
36*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
37*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(5U, length);
38*635a8641SAndroid Build Coastguard Worker 
39*635a8641SAndroid Build Coastguard Worker   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
40*635a8641SAndroid Build Coastguard Worker       ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
41*635a8641SAndroid Build Coastguard Worker       &index, &length));
42*635a8641SAndroid Build Coastguard Worker 
43*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
44*635a8641SAndroid Build Coastguard Worker       ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
45*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(4U, index);
46*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(6U, length);
47*635a8641SAndroid Build Coastguard Worker 
48*635a8641SAndroid Build Coastguard Worker   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
49*635a8641SAndroid Build Coastguard Worker       ASCIIToUTF16("searching within empty string"), string16(),
50*635a8641SAndroid Build Coastguard Worker       &index, &length));
51*635a8641SAndroid Build Coastguard Worker 
52*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
53*635a8641SAndroid Build Coastguard Worker       string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
54*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
55*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, length);
56*635a8641SAndroid Build Coastguard Worker 
57*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
58*635a8641SAndroid Build Coastguard Worker       ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
59*635a8641SAndroid Build Coastguard Worker       &index, &length));
60*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
61*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(18U, length);
62*635a8641SAndroid Build Coastguard Worker 
63*635a8641SAndroid Build Coastguard Worker   if (locale_is_posix)
64*635a8641SAndroid Build Coastguard Worker     SetICUDefaultLocale(default_locale.data());
65*635a8641SAndroid Build Coastguard Worker }
66*635a8641SAndroid Build Coastguard Worker 
TEST(StringSearchTest,UnicodeLocaleIndependent)67*635a8641SAndroid Build Coastguard Worker TEST(StringSearchTest, UnicodeLocaleIndependent) {
68*635a8641SAndroid Build Coastguard Worker   // Base characters
69*635a8641SAndroid Build Coastguard Worker   const string16 e_base = WideToUTF16(L"e");
70*635a8641SAndroid Build Coastguard Worker   const string16 E_base = WideToUTF16(L"E");
71*635a8641SAndroid Build Coastguard Worker   const string16 a_base = WideToUTF16(L"a");
72*635a8641SAndroid Build Coastguard Worker 
73*635a8641SAndroid Build Coastguard Worker   // Composed characters
74*635a8641SAndroid Build Coastguard Worker   const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
75*635a8641SAndroid Build Coastguard Worker   const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
76*635a8641SAndroid Build Coastguard Worker   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
77*635a8641SAndroid Build Coastguard Worker   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
78*635a8641SAndroid Build Coastguard Worker   const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
79*635a8641SAndroid Build Coastguard Worker 
80*635a8641SAndroid Build Coastguard Worker   // Decomposed characters
81*635a8641SAndroid Build Coastguard Worker   const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
82*635a8641SAndroid Build Coastguard Worker   const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
83*635a8641SAndroid Build Coastguard Worker   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
84*635a8641SAndroid Build Coastguard Worker   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
85*635a8641SAndroid Build Coastguard Worker   const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
86*635a8641SAndroid Build Coastguard Worker 
87*635a8641SAndroid Build Coastguard Worker   std::string default_locale(uloc_getDefault());
88*635a8641SAndroid Build Coastguard Worker   bool locale_is_posix = (default_locale == "en_US_POSIX");
89*635a8641SAndroid Build Coastguard Worker   if (locale_is_posix)
90*635a8641SAndroid Build Coastguard Worker     SetICUDefaultLocale("en_US");
91*635a8641SAndroid Build Coastguard Worker 
92*635a8641SAndroid Build Coastguard Worker   size_t index = 0;
93*635a8641SAndroid Build Coastguard Worker   size_t length = 0;
94*635a8641SAndroid Build Coastguard Worker 
95*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
96*635a8641SAndroid Build Coastguard Worker       e_base, e_with_acute_accent, &index, &length));
97*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
98*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_accent.size(), length);
99*635a8641SAndroid Build Coastguard Worker 
100*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
101*635a8641SAndroid Build Coastguard Worker       e_with_acute_accent, e_base, &index, &length));
102*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
103*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_base.size(), length);
104*635a8641SAndroid Build Coastguard Worker 
105*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
106*635a8641SAndroid Build Coastguard Worker       e_base, e_with_acute_combining_mark, &index, &length));
107*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
108*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
109*635a8641SAndroid Build Coastguard Worker 
110*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
111*635a8641SAndroid Build Coastguard Worker       e_with_acute_combining_mark, e_base, &index, &length));
112*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
113*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_base.size(), length);
114*635a8641SAndroid Build Coastguard Worker 
115*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
116*635a8641SAndroid Build Coastguard Worker       e_with_acute_combining_mark, e_with_acute_accent,
117*635a8641SAndroid Build Coastguard Worker       &index, &length));
118*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
119*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_accent.size(), length);
120*635a8641SAndroid Build Coastguard Worker 
121*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
122*635a8641SAndroid Build Coastguard Worker       e_with_acute_accent, e_with_acute_combining_mark,
123*635a8641SAndroid Build Coastguard Worker       &index, &length));
124*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
125*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
126*635a8641SAndroid Build Coastguard Worker 
127*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
128*635a8641SAndroid Build Coastguard Worker       e_with_acute_combining_mark, e_with_grave_combining_mark,
129*635a8641SAndroid Build Coastguard Worker       &index, &length));
130*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
131*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_grave_combining_mark.size(), length);
132*635a8641SAndroid Build Coastguard Worker 
133*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
134*635a8641SAndroid Build Coastguard Worker       e_with_grave_combining_mark, e_with_acute_combining_mark,
135*635a8641SAndroid Build Coastguard Worker       &index, &length));
136*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
137*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
138*635a8641SAndroid Build Coastguard Worker 
139*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
140*635a8641SAndroid Build Coastguard Worker       e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
141*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
142*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_grave_accent.size(), length);
143*635a8641SAndroid Build Coastguard Worker 
144*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
145*635a8641SAndroid Build Coastguard Worker       e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
146*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
147*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
148*635a8641SAndroid Build Coastguard Worker 
149*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
150*635a8641SAndroid Build Coastguard Worker       E_with_acute_accent, e_with_acute_accent, &index, &length));
151*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
152*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_accent.size(), length);
153*635a8641SAndroid Build Coastguard Worker 
154*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
155*635a8641SAndroid Build Coastguard Worker       E_with_grave_accent, e_with_acute_accent, &index, &length));
156*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
157*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_accent.size(), length);
158*635a8641SAndroid Build Coastguard Worker 
159*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
160*635a8641SAndroid Build Coastguard Worker       E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
161*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
162*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_grave_accent.size(), length);
163*635a8641SAndroid Build Coastguard Worker 
164*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
165*635a8641SAndroid Build Coastguard Worker       E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
166*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
167*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_acute_accent.size(), length);
168*635a8641SAndroid Build Coastguard Worker 
169*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
170*635a8641SAndroid Build Coastguard Worker       E_base, e_with_grave_accent, &index, &length));
171*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
172*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(e_with_grave_accent.size(), length);
173*635a8641SAndroid Build Coastguard Worker 
174*635a8641SAndroid Build Coastguard Worker   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
175*635a8641SAndroid Build Coastguard Worker       a_with_acute_accent, e_with_acute_accent, &index, &length));
176*635a8641SAndroid Build Coastguard Worker 
177*635a8641SAndroid Build Coastguard Worker   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
178*635a8641SAndroid Build Coastguard Worker       a_with_acute_combining_mark, e_with_acute_combining_mark,
179*635a8641SAndroid Build Coastguard Worker       &index, &length));
180*635a8641SAndroid Build Coastguard Worker 
181*635a8641SAndroid Build Coastguard Worker   if (locale_is_posix)
182*635a8641SAndroid Build Coastguard Worker     SetICUDefaultLocale(default_locale.data());
183*635a8641SAndroid Build Coastguard Worker }
184*635a8641SAndroid Build Coastguard Worker 
TEST(StringSearchTest,UnicodeLocaleDependent)185*635a8641SAndroid Build Coastguard Worker TEST(StringSearchTest, UnicodeLocaleDependent) {
186*635a8641SAndroid Build Coastguard Worker   // Base characters
187*635a8641SAndroid Build Coastguard Worker   const string16 a_base = WideToUTF16(L"a");
188*635a8641SAndroid Build Coastguard Worker 
189*635a8641SAndroid Build Coastguard Worker   // Composed characters
190*635a8641SAndroid Build Coastguard Worker   const string16 a_with_ring = WideToUTF16(L"\u00e5");
191*635a8641SAndroid Build Coastguard Worker 
192*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
193*635a8641SAndroid Build Coastguard Worker                                                  nullptr));
194*635a8641SAndroid Build Coastguard Worker 
195*635a8641SAndroid Build Coastguard Worker   const char* default_locale = uloc_getDefault();
196*635a8641SAndroid Build Coastguard Worker   SetICUDefaultLocale("da");
197*635a8641SAndroid Build Coastguard Worker 
198*635a8641SAndroid Build Coastguard Worker   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
199*635a8641SAndroid Build Coastguard Worker                                                   nullptr));
200*635a8641SAndroid Build Coastguard Worker 
201*635a8641SAndroid Build Coastguard Worker   SetICUDefaultLocale(default_locale);
202*635a8641SAndroid Build Coastguard Worker }
203*635a8641SAndroid Build Coastguard Worker 
TEST(StringSearchTest,FixedPatternMultipleSearch)204*635a8641SAndroid Build Coastguard Worker TEST(StringSearchTest, FixedPatternMultipleSearch) {
205*635a8641SAndroid Build Coastguard Worker   std::string default_locale(uloc_getDefault());
206*635a8641SAndroid Build Coastguard Worker   bool locale_is_posix = (default_locale == "en_US_POSIX");
207*635a8641SAndroid Build Coastguard Worker   if (locale_is_posix)
208*635a8641SAndroid Build Coastguard Worker     SetICUDefaultLocale("en_US");
209*635a8641SAndroid Build Coastguard Worker 
210*635a8641SAndroid Build Coastguard Worker   size_t index = 0;
211*635a8641SAndroid Build Coastguard Worker   size_t length = 0;
212*635a8641SAndroid Build Coastguard Worker 
213*635a8641SAndroid Build Coastguard Worker   // Search "hello" over multiple texts.
214*635a8641SAndroid Build Coastguard Worker   FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
215*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
216*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(2U, index);
217*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(5U, length);
218*635a8641SAndroid Build Coastguard Worker   EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
219*635a8641SAndroid Build Coastguard Worker   EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
220*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(0U, index);
221*635a8641SAndroid Build Coastguard Worker   EXPECT_EQ(5U, length);
222*635a8641SAndroid Build Coastguard Worker 
223*635a8641SAndroid Build Coastguard Worker   if (locale_is_posix)
224*635a8641SAndroid Build Coastguard Worker     SetICUDefaultLocale(default_locale.data());
225*635a8641SAndroid Build Coastguard Worker }
226*635a8641SAndroid Build Coastguard Worker 
227*635a8641SAndroid Build Coastguard Worker }  // namespace i18n
228*635a8641SAndroid Build Coastguard Worker }  // namespace base
229