1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stddef.h>
6
7 #include <string>
8
9 #include "base/i18n/rtl.h"
10 #include "base/i18n/string_search.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 #include "third_party/icu/source/i18n/unicode/usearch.h"
15
16 namespace base {
17 namespace i18n {
18
19 // Note on setting default locale for testing: The current default locale on
20 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
21 // string search is case-sensitive, when normally it should be
22 // case-insensitive. In other locales (including en_US which English speakers
23 // in the U.S. use), this search would be case-insensitive as expected.
24
TEST(StringSearchTest,ASCII)25 TEST(StringSearchTest, ASCII) {
26 std::string default_locale(uloc_getDefault());
27 bool locale_is_posix = (default_locale == "en_US_POSIX");
28 if (locale_is_posix)
29 SetICUDefaultLocale("en_US");
30
31 size_t index = 0;
32 size_t length = 0;
33
34 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
35 ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
36 EXPECT_EQ(0U, index);
37 EXPECT_EQ(5U, length);
38
39 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
40 ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),
41 &index, &length));
42
43 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
44 ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
45 EXPECT_EQ(4U, index);
46 EXPECT_EQ(6U, length);
47
48 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
49 ASCIIToUTF16("searching within empty string"), string16(),
50 &index, &length));
51
52 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
53 string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
54 EXPECT_EQ(0U, index);
55 EXPECT_EQ(0U, length);
56
57 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
58 ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
59 &index, &length));
60 EXPECT_EQ(0U, index);
61 EXPECT_EQ(18U, length);
62
63 if (locale_is_posix)
64 SetICUDefaultLocale(default_locale.data());
65 }
66
TEST(StringSearchTest,UnicodeLocaleIndependent)67 TEST(StringSearchTest, UnicodeLocaleIndependent) {
68 // Base characters
69 const string16 e_base = WideToUTF16(L"e");
70 const string16 E_base = WideToUTF16(L"E");
71 const string16 a_base = WideToUTF16(L"a");
72
73 // Composed characters
74 const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
75 const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
76 const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
77 const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
78 const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
79
80 // Decomposed characters
81 const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
82 const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
83 const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
84 const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
85 const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
86
87 std::string default_locale(uloc_getDefault());
88 bool locale_is_posix = (default_locale == "en_US_POSIX");
89 if (locale_is_posix)
90 SetICUDefaultLocale("en_US");
91
92 size_t index = 0;
93 size_t length = 0;
94
95 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
96 e_base, e_with_acute_accent, &index, &length));
97 EXPECT_EQ(0U, index);
98 EXPECT_EQ(e_with_acute_accent.size(), length);
99
100 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
101 e_with_acute_accent, e_base, &index, &length));
102 EXPECT_EQ(0U, index);
103 EXPECT_EQ(e_base.size(), length);
104
105 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
106 e_base, e_with_acute_combining_mark, &index, &length));
107 EXPECT_EQ(0U, index);
108 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
109
110 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
111 e_with_acute_combining_mark, e_base, &index, &length));
112 EXPECT_EQ(0U, index);
113 EXPECT_EQ(e_base.size(), length);
114
115 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
116 e_with_acute_combining_mark, e_with_acute_accent,
117 &index, &length));
118 EXPECT_EQ(0U, index);
119 EXPECT_EQ(e_with_acute_accent.size(), length);
120
121 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
122 e_with_acute_accent, e_with_acute_combining_mark,
123 &index, &length));
124 EXPECT_EQ(0U, index);
125 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
126
127 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
128 e_with_acute_combining_mark, e_with_grave_combining_mark,
129 &index, &length));
130 EXPECT_EQ(0U, index);
131 EXPECT_EQ(e_with_grave_combining_mark.size(), length);
132
133 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
134 e_with_grave_combining_mark, e_with_acute_combining_mark,
135 &index, &length));
136 EXPECT_EQ(0U, index);
137 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
138
139 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
140 e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
141 EXPECT_EQ(0U, index);
142 EXPECT_EQ(e_with_grave_accent.size(), length);
143
144 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
145 e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
146 EXPECT_EQ(0U, index);
147 EXPECT_EQ(e_with_acute_combining_mark.size(), length);
148
149 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
150 E_with_acute_accent, e_with_acute_accent, &index, &length));
151 EXPECT_EQ(0U, index);
152 EXPECT_EQ(e_with_acute_accent.size(), length);
153
154 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
155 E_with_grave_accent, e_with_acute_accent, &index, &length));
156 EXPECT_EQ(0U, index);
157 EXPECT_EQ(e_with_acute_accent.size(), length);
158
159 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
160 E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
161 EXPECT_EQ(0U, index);
162 EXPECT_EQ(e_with_grave_accent.size(), length);
163
164 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
165 E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
166 EXPECT_EQ(0U, index);
167 EXPECT_EQ(e_with_acute_accent.size(), length);
168
169 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
170 E_base, e_with_grave_accent, &index, &length));
171 EXPECT_EQ(0U, index);
172 EXPECT_EQ(e_with_grave_accent.size(), length);
173
174 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
175 a_with_acute_accent, e_with_acute_accent, &index, &length));
176
177 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
178 a_with_acute_combining_mark, e_with_acute_combining_mark,
179 &index, &length));
180
181 if (locale_is_posix)
182 SetICUDefaultLocale(default_locale.data());
183 }
184
TEST(StringSearchTest,UnicodeLocaleDependent)185 TEST(StringSearchTest, UnicodeLocaleDependent) {
186 // Base characters
187 const string16 a_base = WideToUTF16(L"a");
188
189 // Composed characters
190 const string16 a_with_ring = WideToUTF16(L"\u00e5");
191
192 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
193 nullptr));
194
195 const char* default_locale = uloc_getDefault();
196 SetICUDefaultLocale("da");
197
198 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(a_base, a_with_ring, nullptr,
199 nullptr));
200
201 SetICUDefaultLocale(default_locale);
202 }
203
TEST(StringSearchTest,FixedPatternMultipleSearch)204 TEST(StringSearchTest, FixedPatternMultipleSearch) {
205 std::string default_locale(uloc_getDefault());
206 bool locale_is_posix = (default_locale == "en_US_POSIX");
207 if (locale_is_posix)
208 SetICUDefaultLocale("en_US");
209
210 size_t index = 0;
211 size_t length = 0;
212
213 // Search "hello" over multiple texts.
214 FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
215 EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
216 EXPECT_EQ(2U, index);
217 EXPECT_EQ(5U, length);
218 EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
219 EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
220 EXPECT_EQ(0U, index);
221 EXPECT_EQ(5U, length);
222
223 if (locale_is_posix)
224 SetICUDefaultLocale(default_locale.data());
225 }
226
227 } // namespace i18n
228 } // namespace base
229