1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <string>
7
8 #include "base/strings/escape.h"
9
10 #include "base/strings/string_util.h"
11 #include "base/strings/stringprintf.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14
15 namespace base {
16 namespace {
17
18 struct EscapeCase {
19 const char* input;
20 const char* output;
21 };
22
23 struct EscapeForHTMLCase {
24 const char* input;
25 const char* expected_output;
26 };
27
28 struct UnescapeURLCase {
29 const char* input;
30 UnescapeRule::Type rules;
31 const char* output;
32 };
33
34 struct UnescapeAndDecodeCase {
35 const char* input;
36
37 // The expected output when run through UnescapeURL.
38 const char* url_unescaped;
39
40 // The expected output when run through UnescapeQuery.
41 const char* query_unescaped;
42
43 // The expected output when run through UnescapeAndDecodeURLComponent.
44 const wchar_t* decoded;
45 };
46
47 struct AdjustOffsetCase {
48 const char* input;
49 size_t input_offset;
50 size_t output_offset;
51 };
52
TEST(EscapeTest,EscapeTextForFormSubmission)53 TEST(EscapeTest, EscapeTextForFormSubmission) {
54 const EscapeCase escape_cases[] = {
55 {"foo", "foo"}, {"foo bar", "foo+bar"}, {"foo++", "foo%2B%2B"}};
56 for (const auto& escape_case : escape_cases) {
57 EXPECT_EQ(escape_case.output,
58 EscapeQueryParamValue(escape_case.input, true));
59 }
60
61 const EscapeCase escape_cases_no_plus[] = {
62 {"foo", "foo"}, {"foo bar", "foo%20bar"}, {"foo++", "foo%2B%2B"}};
63 for (const auto& escape_case : escape_cases_no_plus) {
64 EXPECT_EQ(escape_case.output,
65 EscapeQueryParamValue(escape_case.input, false));
66 }
67
68 // Test all the values in we're supposed to be escaping.
69 const std::string no_escape(
70 "abcdefghijklmnopqrstuvwxyz"
71 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
72 "0123456789"
73 "!'()*-._~");
74 for (int i = 0; i < 256; ++i) {
75 std::string in;
76 in.push_back(i);
77 std::string out = EscapeQueryParamValue(in, true);
78 if (0 == i) {
79 EXPECT_EQ(out, std::string("%00"));
80 } else if (32 == i) {
81 // Spaces are plus escaped like web forms.
82 EXPECT_EQ(out, std::string("+"));
83 } else if (no_escape.find(in) == std::string::npos) {
84 // Check %hex escaping
85 std::string expected = StringPrintf("%%%02X", i);
86 EXPECT_EQ(expected, out);
87 } else {
88 // No change for things in the no_escape list.
89 EXPECT_EQ(out, in);
90 }
91 }
92 }
93
TEST(EscapeTest,EscapePath)94 TEST(EscapeTest, EscapePath) {
95 ASSERT_EQ(
96 // Most of the character space we care about, un-escaped
97 EscapePath("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
98 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
99 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
100 "{|}~\x7f\x80\xff"),
101 // Escaped
102 "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
103 "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
104 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
105 "%7B%7C%7D~%7F%80%FF");
106 }
107
TEST(EscapeTest,EscapeUrlEncodedData)108 TEST(EscapeTest, EscapeUrlEncodedData) {
109 ASSERT_EQ(
110 // Most of the character space we care about, un-escaped
111 EscapeUrlEncodedData("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
112 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
113 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
114 "{|}~\x7f\x80\xff",
115 true),
116 // Escaped
117 "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
118 "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
119 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
120 "%7B%7C%7D~%7F%80%FF");
121 }
122
TEST(EscapeTest,EscapeUrlEncodedDataSpace)123 TEST(EscapeTest, EscapeUrlEncodedDataSpace) {
124 ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
125 ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
126 }
127
TEST(EscapeTest,EscapeForHTML)128 TEST(EscapeTest, EscapeForHTML) {
129 const EscapeForHTMLCase tests[] = {
130 {"hello", "hello"},
131 {"<hello>", "<hello>"},
132 {"don\'t mess with me", "don't mess with me"},
133 };
134 for (const auto& test : tests) {
135 std::string result = EscapeForHTML(std::string(test.input));
136 EXPECT_EQ(std::string(test.expected_output), result);
137 }
138 }
139
TEST(EscapeTest,UnescapeForHTML)140 TEST(EscapeTest, UnescapeForHTML) {
141 const EscapeForHTMLCase tests[] = {
142 {"", ""},
143 {"<hello>", "<hello>"},
144 {"don't mess with me", "don\'t mess with me"},
145 {"<>&"'", "<>&\"'"},
146 {"& lt; & ; &; '", "& lt; & ; &; '"},
147 {"&", "&"},
148 {""", "\""},
149 {"'", "'"},
150 {"<", "<"},
151 {">", ">"},
152 {"& &", "& &"},
153 };
154 for (const auto& test : tests) {
155 std::u16string result = UnescapeForHTML(ASCIIToUTF16(test.input));
156 EXPECT_EQ(ASCIIToUTF16(test.expected_output), result);
157 }
158 }
159
TEST(EscapeTest,EscapeExternalHandlerValue)160 TEST(EscapeTest, EscapeExternalHandlerValue) {
161 ASSERT_EQ(
162 // Escaped
163 "%02%0A%1D%20!%22#$%25&'()*+,-./0123456789:;"
164 "%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
165 "[%5C]%5E_%60abcdefghijklmnopqrstuvwxyz"
166 "%7B%7C%7D~%7F%80%FF",
167 // Most of the character space we care about, un-escaped
168 EscapeExternalHandlerValue("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
169 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
170 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
171 "{|}~\x7f\x80\xff"));
172
173 ASSERT_EQ(
174 "!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_"
175 "abcdefghijklmnopqrstuvwxyz~",
176 EscapeExternalHandlerValue(
177 "!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_"
178 "abcdefghijklmnopqrstuvwxyz~"));
179
180 ASSERT_EQ("%258k", EscapeExternalHandlerValue("%8k"));
181 ASSERT_EQ("a%25", EscapeExternalHandlerValue("a%"));
182 ASSERT_EQ("%25a", EscapeExternalHandlerValue("%a"));
183 ASSERT_EQ("a%258", EscapeExternalHandlerValue("a%8"));
184 ASSERT_EQ("%ab", EscapeExternalHandlerValue("%ab"));
185 ASSERT_EQ("%AB", EscapeExternalHandlerValue("%AB"));
186
187 ASSERT_EQ("http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C",
188 EscapeExternalHandlerValue(
189 "http://example.com/path/sub?q=a|b|c&q=1|2|3#ref|"));
190 ASSERT_EQ("http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C",
191 EscapeExternalHandlerValue(
192 "http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C"));
193 ASSERT_EQ("http://[2001:db8:0:1]:80",
194 EscapeExternalHandlerValue("http://[2001:db8:0:1]:80"));
195 }
196
TEST(EscapeTest,EscapeNonASCII)197 TEST(EscapeTest, EscapeNonASCII) {
198 EXPECT_EQ("abc\n%2580%80", EscapeNonASCIIAndPercent("abc\n%80\x80"));
199 EXPECT_EQ("abc\n%80%80", EscapeNonASCII("abc\n%80\x80"));
200 }
201
TEST(EscapeTest,DataURLWithAccentedCharacters)202 TEST(EscapeTest, DataURLWithAccentedCharacters) {
203 const std::string url =
204 "text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3"
205 "%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20";
206
207 OffsetAdjuster::Adjustments adjustments;
208 UnescapeAndDecodeUTF8URLComponentWithAdjustments(url, UnescapeRule::SPACES,
209 &adjustments);
210 }
211
TEST(EscapeTest,UnescapeURLComponent)212 TEST(EscapeTest, UnescapeURLComponent) {
213 const UnescapeURLCase kUnescapeCases[] = {
214 {"", UnescapeRule::NORMAL, ""},
215 {"%2", UnescapeRule::NORMAL, "%2"},
216 {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
217 {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
218 {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
219 {"Some%20random text %25%2dOK", UnescapeRule::NONE,
220 "Some%20random text %25%2dOK"},
221 {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
222 "Some%20random text %25-OK"},
223 {"Some%20random text %25%E1%A6", UnescapeRule::NORMAL,
224 "Some%20random text %25\xE1\xA6"},
225 {"Some%20random text %25%E1%A6OK", UnescapeRule::NORMAL,
226 "Some%20random text %25\xE1\xA6OK"},
227 {"Some%20random text %25%E1%A6%99OK", UnescapeRule::NORMAL,
228 "Some%20random text %25\xE1\xA6\x99OK"},
229
230 // BiDi Control characters should not be unescaped.
231 {"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL,
232 "Some%20random text %25%D8%9COK"},
233 {"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL,
234 "Some%20random text %25%E2%80%8EOK"},
235 {"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL,
236 "Some%20random text %25%E2%80%8FOK"},
237 {"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL,
238 "Some%20random text %25%E2%80%AAOK"},
239 {"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL,
240 "Some%20random text %25%E2%80%ABOK"},
241 {"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL,
242 "Some%20random text %25%E2%80%AEOK"},
243 {"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL,
244 "Some%20random text %25%E2%81%A6OK"},
245 {"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL,
246 "Some%20random text %25%E2%81%A9OK"},
247
248 // Certain banned characters should not be unescaped.
249 // U+1F50F LOCK WITH INK PEN
250 {"Some%20random text %25%F0%9F%94%8FOK", UnescapeRule::NORMAL,
251 "Some%20random text %25%F0%9F%94%8FOK"},
252 // U+1F510 CLOSED LOCK WITH KEY
253 {"Some%20random text %25%F0%9F%94%90OK", UnescapeRule::NORMAL,
254 "Some%20random text %25%F0%9F%94%90OK"},
255 // U+1F512 LOCK
256 {"Some%20random text %25%F0%9F%94%92OK", UnescapeRule::NORMAL,
257 "Some%20random text %25%F0%9F%94%92OK"},
258 // U+1F513 OPEN LOCK
259 {"Some%20random text %25%F0%9F%94%93OK", UnescapeRule::NORMAL,
260 "Some%20random text %25%F0%9F%94%93OK"},
261
262 // Spaces
263 {"(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)", UnescapeRule::NORMAL,
264 "(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)"},
265 {"(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)", UnescapeRule::NORMAL,
266 "(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)"},
267 {"(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)", UnescapeRule::NORMAL,
268 "(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)"},
269 {"(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)", UnescapeRule::NORMAL,
270 "(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)"},
271 {"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)", UnescapeRule::NORMAL,
272 "(%E2%80%AF)(%E2%81%9F)(%E3%80%80)"},
273 {"(%E2%A0%80)", UnescapeRule::NORMAL, "(%E2%A0%80)"},
274
275 // Default Ignorable and Formatting characters should not be unescaped.
276 {"(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)", UnescapeRule::NORMAL,
277 "(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)"},
278 {"(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)", UnescapeRule::NORMAL,
279 "(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)"},
280 {"(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)", UnescapeRule::NORMAL,
281 "(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)"},
282 {"(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)", UnescapeRule::NORMAL,
283 "(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)"},
284 {"(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)", UnescapeRule::NORMAL,
285 "(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)"},
286 {"(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)", UnescapeRule::NORMAL,
287 "(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)"},
288 {"(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)", UnescapeRule::NORMAL,
289 "(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)"},
290 {"(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)", UnescapeRule::NORMAL,
291 "(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)"},
292 {"(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)", UnescapeRule::NORMAL,
293 "(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)"},
294 {"(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)", UnescapeRule::NORMAL,
295 "(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)"},
296 {"(%F0%93%90%B0)(%F0%93%90%B8)", UnescapeRule::NORMAL,
297 "(%F0%93%90%B0)(%F0%93%90%B8)"},
298 // General Punctuation - Deprecated (U+206A--206F)
299 {"(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)", UnescapeRule::NORMAL,
300 "(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)"},
301 // Variation selectors (U+FE00--FE0F)
302 {"(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)", UnescapeRule::NORMAL,
303 "(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)"},
304 // Shorthand format controls (U+1BCA0--1BCA3)
305 {"(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)", UnescapeRule::NORMAL,
306 "(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)"},
307 // Musical symbols beams and slurs (U+1D173--1D17A)
308 {"(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)", UnescapeRule::NORMAL,
309 "(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)"},
310 // Tags block (U+E0000--E007F), includes unassigned points
311 {"(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)", UnescapeRule::NORMAL,
312 "(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)"},
313 // Ideographic-specific variation selectors (U+E0100--E01EF)
314 {"(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)", UnescapeRule::NORMAL,
315 "(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)"},
316
317 // Two spoofing characters in a row should not be unescaped.
318 {"%D8%9C%D8%9C", UnescapeRule::NORMAL, "%D8%9C%D8%9C"},
319 // Non-spoofing characters surrounded by spoofing characters should be
320 // unescaped.
321 {"%D8%9C%C2%A1%D8%9C%C2%A1", UnescapeRule::NORMAL,
322 "%D8%9C\xC2\xA1%D8%9C\xC2\xA1"},
323 // Invalid UTF-8 characters surrounded by spoofing characters should be
324 // unescaped.
325 {"%D8%9C%85%D8%9C%85", UnescapeRule::NORMAL, "%D8%9C\x85%D8%9C\x85"},
326 // Test with enough trail bytes to overflow the CBU8_MAX_LENGTH-byte
327 // buffer. The first two bytes are a spoofing character as well.
328 {"%D8%9C%9C%9C%9C%9C%9C%9C%9C%9C%9C", UnescapeRule::NORMAL,
329 "%D8%9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C"},
330
331 {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
332 "Some random text %25-OK"},
333 {"Some%20random text %25%2dOK", UnescapeRule::PATH_SEPARATORS,
334 "Some%20random text %25-OK"},
335 {"Some%20random text %25%2dOK",
336 UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
337 "Some%20random text %-OK"},
338 {"Some%20random text %25%2dOK",
339 UnescapeRule::SPACES |
340 UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
341 "Some random text %-OK"},
342 {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
343 {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
344 // Certain URL-sensitive characters should not be unescaped unless asked.
345 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
346 UnescapeRule::SPACES, "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
347 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
348 UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
349 "Hello%20%13%10world ## ?? == && %% ++"},
350 // We can neither escape nor unescape '@' since some websites expect it to
351 // be preserved as either '@' or "%40".
352 // See http://b/996720 and http://crbug.com/23933 .
353 {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"},
354 // Control characters.
355 {"%01%02%03%04%05%06%07%08%09 %25",
356 UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
357 "%01%02%03%04%05%06%07%08%09 %"},
358 {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
359
360 // '/' and '\\' should only be unescaped by PATH_SEPARATORS.
361 {"%2F%5C", UnescapeRule::PATH_SEPARATORS, "/\\"},
362 };
363
364 for (const auto unescape_case : kUnescapeCases) {
365 EXPECT_EQ(unescape_case.output,
366 UnescapeURLComponent(unescape_case.input, unescape_case.rules));
367 }
368
369 // Test NULL character unescaping, which can't be tested above since those are
370 // just char pointers.
371 std::string input("Null");
372 input.push_back(0); // Also have a NULL in the input.
373 input.append("%00%39Test");
374
375 std::string expected = "Null";
376 expected.push_back(0);
377 expected.append("%009Test");
378 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
379 }
380
TEST(EscapeTest,UnescapeAndDecodeUTF8URLComponentWithAdjustments)381 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponentWithAdjustments) {
382 const UnescapeAndDecodeCase unescape_cases[] = {
383 {"%", "%", "%", L"%"},
384 {"+", "+", " ", L"+"},
385 {"%2+", "%2+", "%2 ", L"%2+"},
386 {"+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"},
387 {"Don't escape anything", "Don't escape anything",
388 "Don't escape anything", L"Don't escape anything"},
389 {"+Invalid %escape %2+", "+Invalid %escape %2+", " Invalid %escape %2 ",
390 L"+Invalid %escape %2+"},
391 {"Some random text %25%2dOK", "Some random text %25-OK",
392 "Some random text %25-OK", L"Some random text %25-OK"},
393 {"%01%02%03%04%05%06%07%08%09", "%01%02%03%04%05%06%07%08%09",
394 "%01%02%03%04%05%06%07%08%09", L"%01%02%03%04%05%06%07%08%09"},
395 {"%E4%BD%A0+%E5%A5%BD", "\xE4\xBD\xA0+\xE5\xA5\xBD",
396 "\xE4\xBD\xA0 \xE5\xA5\xBD", L"\x4f60+\x597d"},
397 {"%ED%ED", // Invalid UTF-8.
398 "\xED\xED", "\xED\xED", L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
399 };
400
401 for (const auto& unescape_case : unescape_cases) {
402 std::string unescaped =
403 UnescapeURLComponent(unescape_case.input, UnescapeRule::NORMAL);
404 EXPECT_EQ(std::string(unescape_case.url_unescaped), unescaped);
405
406 unescaped = UnescapeURLComponent(unescape_case.input,
407 UnescapeRule::REPLACE_PLUS_WITH_SPACE);
408 EXPECT_EQ(std::string(unescape_case.query_unescaped), unescaped);
409
410 // The adjustments argument is covered by the next test.
411 //
412 // TODO: Need to test unescape_spaces and unescape_percent.
413 std::u16string decoded = UnescapeAndDecodeUTF8URLComponentWithAdjustments(
414 unescape_case.input, UnescapeRule::NORMAL, nullptr);
415 EXPECT_EQ(WideToUTF16(unescape_case.decoded), decoded);
416 }
417 }
418
TEST(EscapeTest,AdjustOffset)419 TEST(EscapeTest, AdjustOffset) {
420 const AdjustOffsetCase adjust_cases[] = {
421 {"", 0, 0},
422 {"test", 0, 0},
423 {"test", 2, 2},
424 {"test", 4, 4},
425 {"test", std::string::npos, std::string::npos},
426 {"%2dtest", 6, 4},
427 {"%2dtest", 3, 1},
428 {"%2dtest", 2, std::string::npos},
429 {"%2dtest", 1, std::string::npos},
430 {"%2dtest", 0, 0},
431 {"test%2d", 2, 2},
432 {"test%2e", 2, 2},
433 {"%E4%BD%A0+%E5%A5%BD", 9, 1},
434 {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos},
435 {"%E4%BD%A0+%E5%A5%BD", 0, 0},
436 {"%E4%BD%A0+%E5%A5%BD", 10, 2},
437 {"%E4%BD%A0+%E5%A5%BD", 19, 3},
438
439 {"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8},
440 {"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos},
441 {"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7},
442 {"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9},
443 {"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10},
444 {"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0},
445 {"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2},
446 {"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos},
447 {"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3},
448
449 {"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1},
450 {"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos},
451 {"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0},
452 {"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2},
453 {"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3},
454 {"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5},
455 {"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos},
456 {"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6},
457 {"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10},
458
459 {"%ED%B0%80+%E5%A5%BD", 6, 6}, // not convertible to UTF-8
460 };
461
462 for (const auto& adjust_case : adjust_cases) {
463 size_t offset = adjust_case.input_offset;
464 OffsetAdjuster::Adjustments adjustments;
465 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
466 adjust_case.input, UnescapeRule::NORMAL, &adjustments);
467 OffsetAdjuster::AdjustOffset(adjustments, &offset);
468 EXPECT_EQ(adjust_case.output_offset, offset)
469 << "input=" << adjust_case.input
470 << " offset=" << adjust_case.input_offset;
471 }
472 }
473
TEST(EscapeTest,UnescapeBinaryURLComponent)474 TEST(EscapeTest, UnescapeBinaryURLComponent) {
475 const UnescapeURLCase kTestCases[] = {
476 // Check that ASCII characters with special handling in
477 // UnescapeURLComponent() are still unescaped.
478 {"%09%20%25foo%2F", UnescapeRule::NORMAL, "\x09 %foo/"},
479
480 // UTF-8 Characters banned by UnescapeURLComponent() should also be
481 // unescaped.
482 {"Some random text %D8%9COK", UnescapeRule::NORMAL,
483 "Some random text \xD8\x9COK"},
484 {"Some random text %F0%9F%94%8FOK", UnescapeRule::NORMAL,
485 "Some random text \xF0\x9F\x94\x8FOK"},
486
487 // As should invalid UTF-8 characters.
488 {"%A0%A0%E9%E9%A0%A0%A0%A0", UnescapeRule::NORMAL,
489 "\xA0\xA0\xE9\xE9\xA0\xA0\xA0\xA0"},
490
491 // And valid UTF-8 characters that are not banned by
492 // UnescapeURLComponent() should be unescaped, too!
493 {"%C2%A1%C2%A1", UnescapeRule::NORMAL, "\xC2\xA1\xC2\xA1"},
494
495 // '+' should be left alone by default
496 {"++%2B++", UnescapeRule::NORMAL, "+++++"},
497 // But should magically be turned into a space if requested.
498 {"++%2B++", UnescapeRule::REPLACE_PLUS_WITH_SPACE, " + "},
499 };
500
501 for (const auto& test_case : kTestCases) {
502 EXPECT_EQ(test_case.output,
503 UnescapeBinaryURLComponent(test_case.input, test_case.rules));
504 }
505
506 // Test NULL character unescaping, which can't be tested above since those are
507 // just char pointers.
508 std::string input("Null");
509 input.push_back(0); // Also have a NULL in the input.
510 input.append("%00%39Test");
511
512 std::string expected("Null");
513 expected.push_back(0);
514 expected.push_back(0);
515 expected.append("9Test");
516 EXPECT_EQ(expected, UnescapeBinaryURLComponent(input));
517 }
518
TEST(EscapeTest,UnescapeBinaryURLComponentSafe)519 TEST(EscapeTest, UnescapeBinaryURLComponentSafe) {
520 const struct TestCase {
521 const char* input;
522 // Expected output. Null if call is expected to fail when
523 // |fail_on_path_separators| is false.
524 const char* expected_output;
525 // Whether |input| has any escaped path separators.
526 bool has_path_separators;
527 } kTestCases[] = {
528 // Spaces, percents, and invalid UTF-8 characters are all successfully
529 // unescaped.
530 {"%20%25foo%81", " %foo\x81", false},
531
532 // Characters disallowed unconditionally.
533 {"foo%00", nullptr, false},
534 {"foo%01", nullptr, false},
535 {"foo%0A", nullptr, false},
536 {"foo%0D", nullptr, false},
537
538 // Path separators.
539 {"foo%2F", "foo/", true},
540 {"foo%5C", "foo\\", true},
541
542 // Characters that are considered invalid to escape are ignored if passed
543 // in unescaped.
544 {"foo\x01\r/\\", "foo\x01\r/\\", false},
545 };
546
547 for (const auto& test_case : kTestCases) {
548 SCOPED_TRACE(test_case.input);
549
550 std::string output = "foo";
551 if (!test_case.expected_output) {
552 EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
553 test_case.input, false /* fail_on_path_separators */, &output));
554 EXPECT_TRUE(output.empty());
555 EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
556 test_case.input, true /* fail_on_path_separators */, &output));
557 EXPECT_TRUE(output.empty());
558 continue;
559 }
560 EXPECT_TRUE(UnescapeBinaryURLComponentSafe(
561 test_case.input, false /* fail_on_path_separators */, &output));
562 EXPECT_EQ(test_case.expected_output, output);
563 if (test_case.has_path_separators) {
564 EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
565 test_case.input, true /* fail_on_path_separators */, &output));
566 EXPECT_TRUE(output.empty());
567 } else {
568 output = "foo";
569 EXPECT_TRUE(UnescapeBinaryURLComponentSafe(
570 test_case.input, true /* fail_on_path_separators */, &output));
571 EXPECT_EQ(test_case.expected_output, output);
572 }
573 }
574 }
575
TEST(EscapeTest,ContainsEncodedBytes)576 TEST(EscapeTest, ContainsEncodedBytes) {
577 EXPECT_FALSE(ContainsEncodedBytes("abc/def", {'/', '\\'}));
578 EXPECT_FALSE(ContainsEncodedBytes("abc%2Fdef", {'%'}));
579 EXPECT_TRUE(ContainsEncodedBytes("abc%252Fdef", {'%'}));
580 EXPECT_TRUE(ContainsEncodedBytes("abc%2Fdef", {'/', '\\'}));
581 EXPECT_TRUE(ContainsEncodedBytes("abc%5Cdef", {'/', '\\'}));
582 EXPECT_TRUE(ContainsEncodedBytes("abc%2fdef", {'/', '\\'}));
583
584 // Should be looking for byte values, not UTF-8 character values.
585 EXPECT_TRUE(
586 ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xc3')}));
587 EXPECT_FALSE(
588 ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xe9')}));
589 }
590
591 } // namespace
592 } // namespace base
593