xref: /aosp_15_r20/external/cronet/base/strings/escape_unittest.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <string>
7 
8 #include "base/strings/escape.h"
9 
10 #include "base/strings/string_util.h"
11 #include "base/strings/stringprintf.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 
15 namespace base {
16 namespace {
17 
18 struct EscapeCase {
19   const char* input;
20   const char* output;
21 };
22 
23 struct EscapeForHTMLCase {
24   const char* input;
25   const char* expected_output;
26 };
27 
28 struct UnescapeURLCase {
29   const char* input;
30   UnescapeRule::Type rules;
31   const char* output;
32 };
33 
34 struct UnescapeAndDecodeCase {
35   const char* input;
36 
37   // The expected output when run through UnescapeURL.
38   const char* url_unescaped;
39 
40   // The expected output when run through UnescapeQuery.
41   const char* query_unescaped;
42 
43   // The expected output when run through UnescapeAndDecodeURLComponent.
44   const wchar_t* decoded;
45 };
46 
47 struct AdjustOffsetCase {
48   const char* input;
49   size_t input_offset;
50   size_t output_offset;
51 };
52 
TEST(EscapeTest,EscapeTextForFormSubmission)53 TEST(EscapeTest, EscapeTextForFormSubmission) {
54   const EscapeCase escape_cases[] = {
55       {"foo", "foo"}, {"foo bar", "foo+bar"}, {"foo++", "foo%2B%2B"}};
56   for (const auto& escape_case : escape_cases) {
57     EXPECT_EQ(escape_case.output,
58               EscapeQueryParamValue(escape_case.input, true));
59   }
60 
61   const EscapeCase escape_cases_no_plus[] = {
62       {"foo", "foo"}, {"foo bar", "foo%20bar"}, {"foo++", "foo%2B%2B"}};
63   for (const auto& escape_case : escape_cases_no_plus) {
64     EXPECT_EQ(escape_case.output,
65               EscapeQueryParamValue(escape_case.input, false));
66   }
67 
68   // Test all the values in we're supposed to be escaping.
69   const std::string no_escape(
70       "abcdefghijklmnopqrstuvwxyz"
71       "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
72       "0123456789"
73       "!'()*-._~");
74   for (int i = 0; i < 256; ++i) {
75     std::string in;
76     in.push_back(i);
77     std::string out = EscapeQueryParamValue(in, true);
78     if (0 == i) {
79       EXPECT_EQ(out, std::string("%00"));
80     } else if (32 == i) {
81       // Spaces are plus escaped like web forms.
82       EXPECT_EQ(out, std::string("+"));
83     } else if (no_escape.find(in) == std::string::npos) {
84       // Check %hex escaping
85       std::string expected = StringPrintf("%%%02X", i);
86       EXPECT_EQ(expected, out);
87     } else {
88       // No change for things in the no_escape list.
89       EXPECT_EQ(out, in);
90     }
91   }
92 }
93 
TEST(EscapeTest,EscapePath)94 TEST(EscapeTest, EscapePath) {
95   ASSERT_EQ(
96       // Most of the character space we care about, un-escaped
97       EscapePath("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
98                  "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
99                  "[\\]^_`abcdefghijklmnopqrstuvwxyz"
100                  "{|}~\x7f\x80\xff"),
101       // Escaped
102       "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
103       "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
104       "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
105       "%7B%7C%7D~%7F%80%FF");
106 }
107 
TEST(EscapeTest,EscapeUrlEncodedData)108 TEST(EscapeTest, EscapeUrlEncodedData) {
109   ASSERT_EQ(
110       // Most of the character space we care about, un-escaped
111       EscapeUrlEncodedData("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
112                            "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
113                            "[\\]^_`abcdefghijklmnopqrstuvwxyz"
114                            "{|}~\x7f\x80\xff",
115                            true),
116       // Escaped
117       "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
118       "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
119       "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
120       "%7B%7C%7D~%7F%80%FF");
121 }
122 
TEST(EscapeTest,EscapeUrlEncodedDataSpace)123 TEST(EscapeTest, EscapeUrlEncodedDataSpace) {
124   ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
125   ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
126 }
127 
TEST(EscapeTest,EscapeForHTML)128 TEST(EscapeTest, EscapeForHTML) {
129   const EscapeForHTMLCase tests[] = {
130       {"hello", "hello"},
131       {"<hello>", "&lt;hello&gt;"},
132       {"don\'t mess with me", "don&#39;t mess with me"},
133   };
134   for (const auto& test : tests) {
135     std::string result = EscapeForHTML(std::string(test.input));
136     EXPECT_EQ(std::string(test.expected_output), result);
137   }
138 }
139 
TEST(EscapeTest,UnescapeForHTML)140 TEST(EscapeTest, UnescapeForHTML) {
141   const EscapeForHTMLCase tests[] = {
142       {"", ""},
143       {"&lt;hello&gt;", "<hello>"},
144       {"don&#39;t mess with me", "don\'t mess with me"},
145       {"&lt;&gt;&amp;&quot;&#39;", "<>&\"'"},
146       {"& lt; &amp ; &; '", "& lt; &amp ; &; '"},
147       {"&amp;", "&"},
148       {"&quot;", "\""},
149       {"&#39;", "'"},
150       {"&lt;", "<"},
151       {"&gt;", ">"},
152       {"&amp; &", "& &"},
153   };
154   for (const auto& test : tests) {
155     std::u16string result = UnescapeForHTML(ASCIIToUTF16(test.input));
156     EXPECT_EQ(ASCIIToUTF16(test.expected_output), result);
157   }
158 }
159 
TEST(EscapeTest,EscapeExternalHandlerValue)160 TEST(EscapeTest, EscapeExternalHandlerValue) {
161   ASSERT_EQ(
162       // Escaped
163       "%02%0A%1D%20!%22#$%25&'()*+,-./0123456789:;"
164       "%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
165       "[%5C]%5E_%60abcdefghijklmnopqrstuvwxyz"
166       "%7B%7C%7D~%7F%80%FF",
167       // Most of the character space we care about, un-escaped
168       EscapeExternalHandlerValue("\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
169                                  "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
170                                  "[\\]^_`abcdefghijklmnopqrstuvwxyz"
171                                  "{|}~\x7f\x80\xff"));
172 
173   ASSERT_EQ(
174       "!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_"
175       "abcdefghijklmnopqrstuvwxyz~",
176       EscapeExternalHandlerValue(
177           "!#$&'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_"
178           "abcdefghijklmnopqrstuvwxyz~"));
179 
180   ASSERT_EQ("%258k", EscapeExternalHandlerValue("%8k"));
181   ASSERT_EQ("a%25", EscapeExternalHandlerValue("a%"));
182   ASSERT_EQ("%25a", EscapeExternalHandlerValue("%a"));
183   ASSERT_EQ("a%258", EscapeExternalHandlerValue("a%8"));
184   ASSERT_EQ("%ab", EscapeExternalHandlerValue("%ab"));
185   ASSERT_EQ("%AB", EscapeExternalHandlerValue("%AB"));
186 
187   ASSERT_EQ("http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C",
188             EscapeExternalHandlerValue(
189                 "http://example.com/path/sub?q=a|b|c&q=1|2|3#ref|"));
190   ASSERT_EQ("http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C",
191             EscapeExternalHandlerValue(
192                 "http://example.com/path/sub?q=a%7Cb%7Cc&q=1%7C2%7C3#ref%7C"));
193   ASSERT_EQ("http://[2001:db8:0:1]:80",
194             EscapeExternalHandlerValue("http://[2001:db8:0:1]:80"));
195 }
196 
TEST(EscapeTest,EscapeNonASCII)197 TEST(EscapeTest, EscapeNonASCII) {
198   EXPECT_EQ("abc\n%2580%80", EscapeNonASCIIAndPercent("abc\n%80\x80"));
199   EXPECT_EQ("abc\n%80%80", EscapeNonASCII("abc\n%80\x80"));
200 }
201 
TEST(EscapeTest,DataURLWithAccentedCharacters)202 TEST(EscapeTest, DataURLWithAccentedCharacters) {
203   const std::string url =
204       "text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3"
205       "%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20";
206 
207   OffsetAdjuster::Adjustments adjustments;
208   UnescapeAndDecodeUTF8URLComponentWithAdjustments(url, UnescapeRule::SPACES,
209                                                    &adjustments);
210 }
211 
TEST(EscapeTest,UnescapeURLComponent)212 TEST(EscapeTest, UnescapeURLComponent) {
213   const UnescapeURLCase kUnescapeCases[] = {
214       {"", UnescapeRule::NORMAL, ""},
215       {"%2", UnescapeRule::NORMAL, "%2"},
216       {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
217       {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
218       {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
219       {"Some%20random text %25%2dOK", UnescapeRule::NONE,
220        "Some%20random text %25%2dOK"},
221       {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
222        "Some%20random text %25-OK"},
223       {"Some%20random text %25%E1%A6", UnescapeRule::NORMAL,
224        "Some%20random text %25\xE1\xA6"},
225       {"Some%20random text %25%E1%A6OK", UnescapeRule::NORMAL,
226        "Some%20random text %25\xE1\xA6OK"},
227       {"Some%20random text %25%E1%A6%99OK", UnescapeRule::NORMAL,
228        "Some%20random text %25\xE1\xA6\x99OK"},
229 
230       // BiDi Control characters should not be unescaped.
231       {"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL,
232        "Some%20random text %25%D8%9COK"},
233       {"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL,
234        "Some%20random text %25%E2%80%8EOK"},
235       {"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL,
236        "Some%20random text %25%E2%80%8FOK"},
237       {"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL,
238        "Some%20random text %25%E2%80%AAOK"},
239       {"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL,
240        "Some%20random text %25%E2%80%ABOK"},
241       {"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL,
242        "Some%20random text %25%E2%80%AEOK"},
243       {"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL,
244        "Some%20random text %25%E2%81%A6OK"},
245       {"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL,
246        "Some%20random text %25%E2%81%A9OK"},
247 
248       // Certain banned characters should not be unescaped.
249       // U+1F50F LOCK WITH INK PEN
250       {"Some%20random text %25%F0%9F%94%8FOK", UnescapeRule::NORMAL,
251        "Some%20random text %25%F0%9F%94%8FOK"},
252       // U+1F510 CLOSED LOCK WITH KEY
253       {"Some%20random text %25%F0%9F%94%90OK", UnescapeRule::NORMAL,
254        "Some%20random text %25%F0%9F%94%90OK"},
255       // U+1F512 LOCK
256       {"Some%20random text %25%F0%9F%94%92OK", UnescapeRule::NORMAL,
257        "Some%20random text %25%F0%9F%94%92OK"},
258       // U+1F513 OPEN LOCK
259       {"Some%20random text %25%F0%9F%94%93OK", UnescapeRule::NORMAL,
260        "Some%20random text %25%F0%9F%94%93OK"},
261 
262       // Spaces
263       {"(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)", UnescapeRule::NORMAL,
264        "(%C2%85)(%C2%A0)(%E1%9A%80)(%E2%80%80)"},
265       {"(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)", UnescapeRule::NORMAL,
266        "(%E2%80%81)(%E2%80%82)(%E2%80%83)(%E2%80%84)"},
267       {"(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)", UnescapeRule::NORMAL,
268        "(%E2%80%85)(%E2%80%86)(%E2%80%87)(%E2%80%88)"},
269       {"(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)", UnescapeRule::NORMAL,
270        "(%E2%80%89)(%E2%80%8A)(%E2%80%A8)(%E2%80%A9)"},
271       {"(%E2%80%AF)(%E2%81%9F)(%E3%80%80)", UnescapeRule::NORMAL,
272        "(%E2%80%AF)(%E2%81%9F)(%E3%80%80)"},
273       {"(%E2%A0%80)", UnescapeRule::NORMAL, "(%E2%A0%80)"},
274 
275       // Default Ignorable and Formatting characters should not be unescaped.
276       {"(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)", UnescapeRule::NORMAL,
277        "(%E2%81%A5)(%EF%BF%B0)(%EF%BF%B8)"},
278       {"(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)", UnescapeRule::NORMAL,
279        "(%F3%A0%82%80)(%F3%A0%83%BF)(%F3%A0%87%B0)"},
280       {"(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)", UnescapeRule::NORMAL,
281        "(%F3%A0%BF%BF)(%C2%AD)(%CD%8F)"},
282       {"(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)", UnescapeRule::NORMAL,
283        "(%D8%80%20)(%D8%85)(%DB%9D)(%DC%8F)(%E0%A3%A2)"},
284       {"(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)", UnescapeRule::NORMAL,
285        "(%E1%85%9F)(%E1%85%A0)(%E1%9E%B4)(%E1%9E%B5)"},
286       {"(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)", UnescapeRule::NORMAL,
287        "(%E1%A0%8B)(%E1%A0%8C)(%E1%A0%8D)(%E1%A0%8E)"},
288       {"(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)", UnescapeRule::NORMAL,
289        "(%E2%80%8B)(%E2%80%8C)(%E2%80%8D)(%E2%81%A0)"},
290       {"(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)", UnescapeRule::NORMAL,
291        "(%E2%81%A1)(%E2%81%A2)(%E2%81%A3)(%E2%81%A4)"},
292       {"(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)", UnescapeRule::NORMAL,
293        "(%E3%85%A4)(%EF%BB%BF)(%EF%BE%A0)(%EF%BF%B9)"},
294       {"(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)", UnescapeRule::NORMAL,
295        "(%EF%BF%BB)(%F0%91%82%BD)(%F0%91%83%8D)"},
296       {"(%F0%93%90%B0)(%F0%93%90%B8)", UnescapeRule::NORMAL,
297        "(%F0%93%90%B0)(%F0%93%90%B8)"},
298       // General Punctuation - Deprecated (U+206A--206F)
299       {"(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)", UnescapeRule::NORMAL,
300        "(%E2%81%AA)(%E2%81%AD)(%E2%81%AF)"},
301       // Variation selectors (U+FE00--FE0F)
302       {"(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)", UnescapeRule::NORMAL,
303        "(%EF%B8%80)(%EF%B8%8C)(%EF%B8%8D)"},
304       // Shorthand format controls (U+1BCA0--1BCA3)
305       {"(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)", UnescapeRule::NORMAL,
306        "(%F0%9B%B2%A0)(%F0%9B%B2%A1)(%F0%9B%B2%A3)"},
307       // Musical symbols beams and slurs (U+1D173--1D17A)
308       {"(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)", UnescapeRule::NORMAL,
309        "(%F0%9D%85%B3)(%F0%9D%85%B9)(%F0%9D%85%BA)"},
310       // Tags block (U+E0000--E007F), includes unassigned points
311       {"(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)", UnescapeRule::NORMAL,
312        "(%F3%A0%80%80)(%F3%A0%80%81)(%F3%A0%81%8F)"},
313       // Ideographic-specific variation selectors (U+E0100--E01EF)
314       {"(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)", UnescapeRule::NORMAL,
315        "(%F3%A0%84%80)(%F3%A0%84%90)(%F3%A0%87%AF)"},
316 
317       // Two spoofing characters in a row should not be unescaped.
318       {"%D8%9C%D8%9C", UnescapeRule::NORMAL, "%D8%9C%D8%9C"},
319       // Non-spoofing characters surrounded by spoofing characters should be
320       // unescaped.
321       {"%D8%9C%C2%A1%D8%9C%C2%A1", UnescapeRule::NORMAL,
322        "%D8%9C\xC2\xA1%D8%9C\xC2\xA1"},
323       // Invalid UTF-8 characters surrounded by spoofing characters should be
324       // unescaped.
325       {"%D8%9C%85%D8%9C%85", UnescapeRule::NORMAL, "%D8%9C\x85%D8%9C\x85"},
326       // Test with enough trail bytes to overflow the CBU8_MAX_LENGTH-byte
327       // buffer. The first two bytes are a spoofing character as well.
328       {"%D8%9C%9C%9C%9C%9C%9C%9C%9C%9C%9C", UnescapeRule::NORMAL,
329        "%D8%9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C\x9C"},
330 
331       {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
332        "Some random text %25-OK"},
333       {"Some%20random text %25%2dOK", UnescapeRule::PATH_SEPARATORS,
334        "Some%20random text %25-OK"},
335       {"Some%20random text %25%2dOK",
336        UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
337        "Some%20random text %-OK"},
338       {"Some%20random text %25%2dOK",
339        UnescapeRule::SPACES |
340            UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
341        "Some random text %-OK"},
342       {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
343       {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
344       // Certain URL-sensitive characters should not be unescaped unless asked.
345       {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
346        UnescapeRule::SPACES, "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
347       {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
348        UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
349        "Hello%20%13%10world ## ?? == && %% ++"},
350       // We can neither escape nor unescape '@' since some websites expect it to
351       // be preserved as either '@' or "%40".
352       // See http://b/996720 and http://crbug.com/23933 .
353       {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"},
354       // Control characters.
355       {"%01%02%03%04%05%06%07%08%09 %25",
356        UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS,
357        "%01%02%03%04%05%06%07%08%09 %"},
358       {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
359 
360       // '/' and '\\' should only be unescaped by PATH_SEPARATORS.
361       {"%2F%5C", UnescapeRule::PATH_SEPARATORS, "/\\"},
362   };
363 
364   for (const auto unescape_case : kUnescapeCases) {
365     EXPECT_EQ(unescape_case.output,
366               UnescapeURLComponent(unescape_case.input, unescape_case.rules));
367   }
368 
369   // Test NULL character unescaping, which can't be tested above since those are
370   // just char pointers.
371   std::string input("Null");
372   input.push_back(0);  // Also have a NULL in the input.
373   input.append("%00%39Test");
374 
375   std::string expected = "Null";
376   expected.push_back(0);
377   expected.append("%009Test");
378   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
379 }
380 
TEST(EscapeTest,UnescapeAndDecodeUTF8URLComponentWithAdjustments)381 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponentWithAdjustments) {
382   const UnescapeAndDecodeCase unescape_cases[] = {
383       {"%", "%", "%", L"%"},
384       {"+", "+", " ", L"+"},
385       {"%2+", "%2+", "%2 ", L"%2+"},
386       {"+%%%+%%%", "+%%%+%%%", " %%% %%%", L"+%%%+%%%"},
387       {"Don't escape anything", "Don't escape anything",
388        "Don't escape anything", L"Don't escape anything"},
389       {"+Invalid %escape %2+", "+Invalid %escape %2+", " Invalid %escape %2 ",
390        L"+Invalid %escape %2+"},
391       {"Some random text %25%2dOK", "Some random text %25-OK",
392        "Some random text %25-OK", L"Some random text %25-OK"},
393       {"%01%02%03%04%05%06%07%08%09", "%01%02%03%04%05%06%07%08%09",
394        "%01%02%03%04%05%06%07%08%09", L"%01%02%03%04%05%06%07%08%09"},
395       {"%E4%BD%A0+%E5%A5%BD", "\xE4\xBD\xA0+\xE5\xA5\xBD",
396        "\xE4\xBD\xA0 \xE5\xA5\xBD", L"\x4f60+\x597d"},
397       {"%ED%ED",                            // Invalid UTF-8.
398        "\xED\xED", "\xED\xED", L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped.
399   };
400 
401   for (const auto& unescape_case : unescape_cases) {
402     std::string unescaped =
403         UnescapeURLComponent(unescape_case.input, UnescapeRule::NORMAL);
404     EXPECT_EQ(std::string(unescape_case.url_unescaped), unescaped);
405 
406     unescaped = UnescapeURLComponent(unescape_case.input,
407                                      UnescapeRule::REPLACE_PLUS_WITH_SPACE);
408     EXPECT_EQ(std::string(unescape_case.query_unescaped), unescaped);
409 
410     // The adjustments argument is covered by the next test.
411     //
412     // TODO: Need to test unescape_spaces and unescape_percent.
413     std::u16string decoded = UnescapeAndDecodeUTF8URLComponentWithAdjustments(
414         unescape_case.input, UnescapeRule::NORMAL, nullptr);
415     EXPECT_EQ(WideToUTF16(unescape_case.decoded), decoded);
416   }
417 }
418 
TEST(EscapeTest,AdjustOffset)419 TEST(EscapeTest, AdjustOffset) {
420   const AdjustOffsetCase adjust_cases[] = {
421       {"", 0, 0},
422       {"test", 0, 0},
423       {"test", 2, 2},
424       {"test", 4, 4},
425       {"test", std::string::npos, std::string::npos},
426       {"%2dtest", 6, 4},
427       {"%2dtest", 3, 1},
428       {"%2dtest", 2, std::string::npos},
429       {"%2dtest", 1, std::string::npos},
430       {"%2dtest", 0, 0},
431       {"test%2d", 2, 2},
432       {"test%2e", 2, 2},
433       {"%E4%BD%A0+%E5%A5%BD", 9, 1},
434       {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos},
435       {"%E4%BD%A0+%E5%A5%BD", 0, 0},
436       {"%E4%BD%A0+%E5%A5%BD", 10, 2},
437       {"%E4%BD%A0+%E5%A5%BD", 19, 3},
438 
439       {"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8},
440       {"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos},
441       {"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7},
442       {"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9},
443       {"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10},
444       {"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0},
445       {"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2},
446       {"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos},
447       {"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3},
448 
449       {"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1},
450       {"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos},
451       {"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0},
452       {"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2},
453       {"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3},
454       {"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5},
455       {"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos},
456       {"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6},
457       {"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10},
458 
459       {"%ED%B0%80+%E5%A5%BD", 6, 6},  // not convertible to UTF-8
460   };
461 
462   for (const auto& adjust_case : adjust_cases) {
463     size_t offset = adjust_case.input_offset;
464     OffsetAdjuster::Adjustments adjustments;
465     UnescapeAndDecodeUTF8URLComponentWithAdjustments(
466         adjust_case.input, UnescapeRule::NORMAL, &adjustments);
467     OffsetAdjuster::AdjustOffset(adjustments, &offset);
468     EXPECT_EQ(adjust_case.output_offset, offset)
469         << "input=" << adjust_case.input
470         << " offset=" << adjust_case.input_offset;
471   }
472 }
473 
TEST(EscapeTest,UnescapeBinaryURLComponent)474 TEST(EscapeTest, UnescapeBinaryURLComponent) {
475   const UnescapeURLCase kTestCases[] = {
476       // Check that ASCII characters with special handling in
477       // UnescapeURLComponent() are still unescaped.
478       {"%09%20%25foo%2F", UnescapeRule::NORMAL, "\x09 %foo/"},
479 
480       // UTF-8 Characters banned by UnescapeURLComponent() should also be
481       // unescaped.
482       {"Some random text %D8%9COK", UnescapeRule::NORMAL,
483        "Some random text \xD8\x9COK"},
484       {"Some random text %F0%9F%94%8FOK", UnescapeRule::NORMAL,
485        "Some random text \xF0\x9F\x94\x8FOK"},
486 
487       // As should invalid UTF-8 characters.
488       {"%A0%A0%E9%E9%A0%A0%A0%A0", UnescapeRule::NORMAL,
489        "\xA0\xA0\xE9\xE9\xA0\xA0\xA0\xA0"},
490 
491       // And valid UTF-8 characters that are not banned by
492       // UnescapeURLComponent() should be unescaped, too!
493       {"%C2%A1%C2%A1", UnescapeRule::NORMAL, "\xC2\xA1\xC2\xA1"},
494 
495       // '+' should be left alone by default
496       {"++%2B++", UnescapeRule::NORMAL, "+++++"},
497       // But should magically be turned into a space if requested.
498       {"++%2B++", UnescapeRule::REPLACE_PLUS_WITH_SPACE, "  +  "},
499   };
500 
501   for (const auto& test_case : kTestCases) {
502     EXPECT_EQ(test_case.output,
503               UnescapeBinaryURLComponent(test_case.input, test_case.rules));
504   }
505 
506   // Test NULL character unescaping, which can't be tested above since those are
507   // just char pointers.
508   std::string input("Null");
509   input.push_back(0);  // Also have a NULL in the input.
510   input.append("%00%39Test");
511 
512   std::string expected("Null");
513   expected.push_back(0);
514   expected.push_back(0);
515   expected.append("9Test");
516   EXPECT_EQ(expected, UnescapeBinaryURLComponent(input));
517 }
518 
TEST(EscapeTest,UnescapeBinaryURLComponentSafe)519 TEST(EscapeTest, UnescapeBinaryURLComponentSafe) {
520   const struct TestCase {
521     const char* input;
522     // Expected output. Null if call is expected to fail when
523     // |fail_on_path_separators| is false.
524     const char* expected_output;
525     // Whether |input| has any escaped path separators.
526     bool has_path_separators;
527   } kTestCases[] = {
528       // Spaces, percents, and invalid UTF-8 characters are all successfully
529       // unescaped.
530       {"%20%25foo%81", " %foo\x81", false},
531 
532       // Characters disallowed unconditionally.
533       {"foo%00", nullptr, false},
534       {"foo%01", nullptr, false},
535       {"foo%0A", nullptr, false},
536       {"foo%0D", nullptr, false},
537 
538       // Path separators.
539       {"foo%2F", "foo/", true},
540       {"foo%5C", "foo\\", true},
541 
542       // Characters that are considered invalid to escape are ignored if passed
543       // in unescaped.
544       {"foo\x01\r/\\", "foo\x01\r/\\", false},
545   };
546 
547   for (const auto& test_case : kTestCases) {
548     SCOPED_TRACE(test_case.input);
549 
550     std::string output = "foo";
551     if (!test_case.expected_output) {
552       EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
553           test_case.input, false /* fail_on_path_separators */, &output));
554       EXPECT_TRUE(output.empty());
555       EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
556           test_case.input, true /* fail_on_path_separators */, &output));
557       EXPECT_TRUE(output.empty());
558       continue;
559     }
560     EXPECT_TRUE(UnescapeBinaryURLComponentSafe(
561         test_case.input, false /* fail_on_path_separators */, &output));
562     EXPECT_EQ(test_case.expected_output, output);
563     if (test_case.has_path_separators) {
564       EXPECT_FALSE(UnescapeBinaryURLComponentSafe(
565           test_case.input, true /* fail_on_path_separators */, &output));
566       EXPECT_TRUE(output.empty());
567     } else {
568       output = "foo";
569       EXPECT_TRUE(UnescapeBinaryURLComponentSafe(
570           test_case.input, true /* fail_on_path_separators */, &output));
571       EXPECT_EQ(test_case.expected_output, output);
572     }
573   }
574 }
575 
TEST(EscapeTest,ContainsEncodedBytes)576 TEST(EscapeTest, ContainsEncodedBytes) {
577   EXPECT_FALSE(ContainsEncodedBytes("abc/def", {'/', '\\'}));
578   EXPECT_FALSE(ContainsEncodedBytes("abc%2Fdef", {'%'}));
579   EXPECT_TRUE(ContainsEncodedBytes("abc%252Fdef", {'%'}));
580   EXPECT_TRUE(ContainsEncodedBytes("abc%2Fdef", {'/', '\\'}));
581   EXPECT_TRUE(ContainsEncodedBytes("abc%5Cdef", {'/', '\\'}));
582   EXPECT_TRUE(ContainsEncodedBytes("abc%2fdef", {'/', '\\'}));
583 
584   // Should be looking for byte values, not UTF-8 character values.
585   EXPECT_TRUE(
586       ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xc3')}));
587   EXPECT_FALSE(
588       ContainsEncodedBytes("caf%C3%A9", {static_cast<uint8_t>('\xe9')}));
589 }
590 
591 }  // namespace
592 }  // namespace base
593