xref: /aosp_15_r20/external/libchrome/base/i18n/string_search_unittest.cc (revision 635a864187cb8b6c713ff48b7e790a6b21769273)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <stddef.h>
6 
7 #include <string>
8 
9 #include "base/i18n/rtl.h"
10 #include "base/i18n/string_search.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "third_party/icu/source/i18n/unicode/usearch.h"
15 
16 namespace base {
17 namespace i18n {
18 
19 // Note on setting default locale for testing: The current default locale on
20 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
21 // string search is case-sensitive, when normally it should be
22 // case-insensitive. In other locales (including en_US which English speakers
23 // in the U.S. use), this search would be case-insensitive as expected.
24 
TEST(StringSearchTest,ASCII)25 TEST(StringSearchTest, ASCII) {
26   std::string default_locale(uloc_getDefault());
27   bool locale_is_posix = (default_locale == "en_US_POSIX");
28   if (locale_is_posix)
29     SetICUDefaultLocale("en_US");
30 
31   size_t index = 0;
32   size_t length = 0;
33 
34   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
35       ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
36   EXPECT_EQ(0U, index);
37   EXPECT_EQ(5U, length);
38 
39   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
40       ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
41       &index, &length));
42 
43   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
44       ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
45   EXPECT_EQ(4U, index);
46   EXPECT_EQ(6U, length);
47 
48   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
49       ASCIIToUTF16("searching within empty string"), string16(),
50       &index, &length));
51 
52   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
53       string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
54   EXPECT_EQ(0U, index);
55   EXPECT_EQ(0U, length);
56 
57   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
58       ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
59       &index, &length));
60   EXPECT_EQ(0U, index);
61   EXPECT_EQ(18U, length);
62 
63   if (locale_is_posix)
64     SetICUDefaultLocale(default_locale.data());
65 }
66 
TEST(StringSearchTest,UnicodeLocaleIndependent)67 TEST(StringSearchTest, UnicodeLocaleIndependent) {
68   // Base characters
69   const string16 e_base = WideToUTF16(L"e");
70   const string16 E_base = WideToUTF16(L"E");
71   const string16 a_base = WideToUTF16(L"a");
72 
73   // Composed characters
74   const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
75   const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
76   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
77   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
78   const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
79 
80   // Decomposed characters
81   const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
82   const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
83   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
84   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
85   const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
86 
87   std::string default_locale(uloc_getDefault());
88   bool locale_is_posix = (default_locale == "en_US_POSIX");
89   if (locale_is_posix)
90     SetICUDefaultLocale("en_US");
91 
92   size_t index = 0;
93   size_t length = 0;
94 
95   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
96       e_base, e_with_acute_accent, &index, &length));
97   EXPECT_EQ(0U, index);
98   EXPECT_EQ(e_with_acute_accent.size(), length);
99 
100   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
101       e_with_acute_accent, e_base, &index, &length));
102   EXPECT_EQ(0U, index);
103   EXPECT_EQ(e_base.size(), length);
104 
105   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
106       e_base, e_with_acute_combining_mark, &index, &length));
107   EXPECT_EQ(0U, index);
108   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
109 
110   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
111       e_with_acute_combining_mark, e_base, &index, &length));
112   EXPECT_EQ(0U, index);
113   EXPECT_EQ(e_base.size(), length);
114 
115   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
116       e_with_acute_combining_mark, e_with_acute_accent,
117       &index, &length));
118   EXPECT_EQ(0U, index);
119   EXPECT_EQ(e_with_acute_accent.size(), length);
120 
121   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
122       e_with_acute_accent, e_with_acute_combining_mark,
123       &index, &length));
124   EXPECT_EQ(0U, index);
125   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
126 
127   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
128       e_with_acute_combining_mark, e_with_grave_combining_mark,
129       &index, &length));
130   EXPECT_EQ(0U, index);
131   EXPECT_EQ(e_with_grave_combining_mark.size(), length);
132 
133   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
134       e_with_grave_combining_mark, e_with_acute_combining_mark,
135       &index, &length));
136   EXPECT_EQ(0U, index);
137   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
138 
139   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
140       e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
141   EXPECT_EQ(0U, index);
142   EXPECT_EQ(e_with_grave_accent.size(), length);
143 
144   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
145       e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
146   EXPECT_EQ(0U, index);
147   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
148 
149   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
150       E_with_acute_accent, e_with_acute_accent, &index, &length));
151   EXPECT_EQ(0U, index);
152   EXPECT_EQ(e_with_acute_accent.size(), length);
153 
154   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
155       E_with_grave_accent, e_with_acute_accent, &index, &length));
156   EXPECT_EQ(0U, index);
157   EXPECT_EQ(e_with_acute_accent.size(), length);
158 
159   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
160       E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
161   EXPECT_EQ(0U, index);
162   EXPECT_EQ(e_with_grave_accent.size(), length);
163 
164   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
165       E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
166   EXPECT_EQ(0U, index);
167   EXPECT_EQ(e_with_acute_accent.size(), length);
168 
169   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
170       E_base, e_with_grave_accent, &index, &length));
171   EXPECT_EQ(0U, index);
172   EXPECT_EQ(e_with_grave_accent.size(), length);
173 
174   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
175       a_with_acute_accent, e_with_acute_accent, &index, &length));
176 
177   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
178       a_with_acute_combining_mark, e_with_acute_combining_mark,
179       &index, &length));
180 
181   if (locale_is_posix)
182     SetICUDefaultLocale(default_locale.data());
183 }
184 
TEST(StringSearchTest,UnicodeLocaleDependent)185 TEST(StringSearchTest, UnicodeLocaleDependent) {
186   // Base characters
187   const string16 a_base = WideToUTF16(L"a");
188 
189   // Composed characters
190   const string16 a_with_ring = WideToUTF16(L"\u00e5");
191 
192   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
193                                                  nullptr));
194 
195   const char* default_locale = uloc_getDefault();
196   SetICUDefaultLocale("da");
197 
198   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
199                                                   nullptr));
200 
201   SetICUDefaultLocale(default_locale);
202 }
203 
TEST(StringSearchTest,FixedPatternMultipleSearch)204 TEST(StringSearchTest, FixedPatternMultipleSearch) {
205   std::string default_locale(uloc_getDefault());
206   bool locale_is_posix = (default_locale == "en_US_POSIX");
207   if (locale_is_posix)
208     SetICUDefaultLocale("en_US");
209 
210   size_t index = 0;
211   size_t length = 0;
212 
213   // Search "hello" over multiple texts.
214   FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
215   EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
216   EXPECT_EQ(2U, index);
217   EXPECT_EQ(5U, length);
218   EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
219   EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
220   EXPECT_EQ(0U, index);
221   EXPECT_EQ(5U, length);
222 
223   if (locale_is_posix)
224     SetICUDefaultLocale(default_locale.data());
225 }
226 
227 }  // namespace i18n
228 }  // namespace base
229