xref: /aosp_15_r20/external/abseil-cpp/absl/strings/escaping_test.cc (revision 9356374a3709195abf420251b3e825997ff56c0f)
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/escaping.h"
16 
17 #include <array>
18 #include <cstddef>
19 #include <cstdio>
20 #include <cstring>
21 #include <initializer_list>
22 #include <memory>
23 #include <string>
24 #include <vector>
25 
26 #include "gtest/gtest.h"
27 #include "absl/log/check.h"
28 #include "absl/strings/str_cat.h"
29 
30 #include "absl/strings/internal/escaping_test_common.h"
31 #include "absl/strings/string_view.h"
32 
33 namespace {
34 
35 struct epair {
36   std::string escaped;
37   std::string unescaped;
38 };
39 
TEST(CEscape,EscapeAndUnescape)40 TEST(CEscape, EscapeAndUnescape) {
41   const std::string inputs[] = {
42       std::string("foo\nxx\r\b\0023"),
43       std::string(""),
44       std::string("abc"),
45       std::string("\1chad_rules"),
46       std::string("\1arnar_drools"),
47       std::string("xxxx\r\t'\"\\"),
48       std::string("\0xx\0", 4),
49       std::string("\x01\x31"),
50       std::string("abc\xb\x42\141bc"),
51       std::string("123\1\x31\x32\x33"),
52       std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"),
53       std::string(
54           "\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"),
55   };
56   // Do this twice, once for octal escapes and once for hex escapes.
57   for (int kind = 0; kind < 4; kind++) {
58     for (const std::string& original : inputs) {
59       std::string escaped;
60       switch (kind) {
61         case 0:
62           escaped = absl::CEscape(original);
63           break;
64         case 1:
65           escaped = absl::CHexEscape(original);
66           break;
67         case 2:
68           escaped = absl::Utf8SafeCEscape(original);
69           break;
70         case 3:
71           escaped = absl::Utf8SafeCHexEscape(original);
72           break;
73       }
74       std::string unescaped_str;
75       EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str));
76       EXPECT_EQ(unescaped_str, original);
77 
78       unescaped_str.erase();
79       std::string error;
80       EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str, &error));
81       EXPECT_EQ(error, "");
82 
83       // Check in-place unescaping
84       std::string s = escaped;
85       EXPECT_TRUE(absl::CUnescape(s, &s));
86       ASSERT_EQ(s, original);
87     }
88   }
89   // Check that all possible two character strings can be escaped then
90   // unescaped successfully.
91   for (int char0 = 0; char0 < 256; char0++) {
92     for (int char1 = 0; char1 < 256; char1++) {
93       char chars[2];
94       chars[0] = char0;
95       chars[1] = char1;
96       std::string s(chars, 2);
97       std::string escaped = absl::CHexEscape(s);
98       std::string unescaped;
99       EXPECT_TRUE(absl::CUnescape(escaped, &unescaped));
100       EXPECT_EQ(s, unescaped);
101     }
102   }
103 }
104 
TEST(CEscape,BasicEscaping)105 TEST(CEscape, BasicEscaping) {
106   epair oct_values[] = {
107       {"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"},
108       {"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'",
109        "'full of \"sound\" and \"fury\"'"},
110       {"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"},
111       {"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"}
112   };
113   epair hex_values[] = {
114       {"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"},
115       {"I\\\'ve just seen a \\\"face\\\"",
116        "I've just seen a \"face\""},
117       {"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"},
118       {"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"}
119   };
120   epair utf8_oct_values[] = {
121       {"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t",
122        "\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"},
123       {"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name",
124        "\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"},
125       {"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\",
126        "\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"},
127       {"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r",
128        "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
129   };
130   epair utf8_hex_values[] = {
131       {"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n",
132        "\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"},
133       {"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"",
134        "\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""},
135       {"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\",
136        "\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"},
137       {"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r",
138        "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
139   };
140 
141   for (const epair& val : oct_values) {
142     std::string escaped = absl::CEscape(val.unescaped);
143     EXPECT_EQ(escaped, val.escaped);
144   }
145   for (const epair& val : hex_values) {
146     std::string escaped = absl::CHexEscape(val.unescaped);
147     EXPECT_EQ(escaped, val.escaped);
148   }
149   for (const epair& val : utf8_oct_values) {
150     std::string escaped = absl::Utf8SafeCEscape(val.unescaped);
151     EXPECT_EQ(escaped, val.escaped);
152   }
153   for (const epair& val : utf8_hex_values) {
154     std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped);
155     EXPECT_EQ(escaped, val.escaped);
156   }
157 }
158 
TEST(Unescape,BasicFunction)159 TEST(Unescape, BasicFunction) {
160   epair tests[] =
161     {{"", ""},
162      {"\\u0030", "0"},
163      {"\\u00A3", "\xC2\xA3"},
164      {"\\u22FD", "\xE2\x8B\xBD"},
165      {"\\U00010000", "\xF0\x90\x80\x80"},
166      {"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}};
167   for (const epair& val : tests) {
168     std::string out;
169     EXPECT_TRUE(absl::CUnescape(val.escaped, &out));
170     EXPECT_EQ(out, val.unescaped);
171   }
172   std::string bad[] = {"\\u1",         // too short
173                        "\\U1",         // too short
174                        "\\Uffffff",    // exceeds 0x10ffff (largest Unicode)
175                        "\\U00110000",  // exceeds 0x10ffff (largest Unicode)
176                        "\\uD835",      // surrogate character (D800-DFFF)
177                        "\\U0000DD04",  // surrogate character (D800-DFFF)
178                        "\\777",        // exceeds 0xff
179                        "\\xABCD"};     // exceeds 0xff
180   for (const std::string& e : bad) {
181     std::string error;
182     std::string out;
183     EXPECT_FALSE(absl::CUnescape(e, &out, &error));
184     EXPECT_FALSE(error.empty());
185 
186     out.erase();
187     EXPECT_FALSE(absl::CUnescape(e, &out));
188   }
189 }
190 
191 class CUnescapeTest : public testing::Test {
192  protected:
193   static const char kStringWithMultipleOctalNulls[];
194   static const char kStringWithMultipleHexNulls[];
195   static const char kStringWithMultipleUnicodeNulls[];
196 
197   std::string result_string_;
198 };
199 
200 const char CUnescapeTest::kStringWithMultipleOctalNulls[] =
201     "\\0\\n"    // null escape \0 plus newline
202     "0\\n"      // just a number 0 (not a null escape) plus newline
203     "\\00\\12"  // null escape \00 plus octal newline code
204     "\\000";    // null escape \000
205 
206 // This has the same ingredients as kStringWithMultipleOctalNulls
207 // but with \x hex escapes instead of octal escapes.
208 const char CUnescapeTest::kStringWithMultipleHexNulls[] =
209     "\\x0\\n"
210     "0\\n"
211     "\\x00\\xa"
212     "\\x000";
213 
214 const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] =
215     "\\u0000\\n"    // short-form (4-digit) null escape plus newline
216     "0\\n"          // just a number 0 (not a null escape) plus newline
217     "\\U00000000";  // long-form (8-digit) null escape
218 
TEST_F(CUnescapeTest,Unescapes1CharOctalNull)219 TEST_F(CUnescapeTest, Unescapes1CharOctalNull) {
220   std::string original_string = "\\0";
221   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
222   EXPECT_EQ(std::string("\0", 1), result_string_);
223 }
224 
TEST_F(CUnescapeTest,Unescapes2CharOctalNull)225 TEST_F(CUnescapeTest, Unescapes2CharOctalNull) {
226   std::string original_string = "\\00";
227   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
228   EXPECT_EQ(std::string("\0", 1), result_string_);
229 }
230 
TEST_F(CUnescapeTest,Unescapes3CharOctalNull)231 TEST_F(CUnescapeTest, Unescapes3CharOctalNull) {
232   std::string original_string = "\\000";
233   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
234   EXPECT_EQ(std::string("\0", 1), result_string_);
235 }
236 
TEST_F(CUnescapeTest,Unescapes1CharHexNull)237 TEST_F(CUnescapeTest, Unescapes1CharHexNull) {
238   std::string original_string = "\\x0";
239   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
240   EXPECT_EQ(std::string("\0", 1), result_string_);
241 }
242 
TEST_F(CUnescapeTest,Unescapes2CharHexNull)243 TEST_F(CUnescapeTest, Unescapes2CharHexNull) {
244   std::string original_string = "\\x00";
245   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
246   EXPECT_EQ(std::string("\0", 1), result_string_);
247 }
248 
TEST_F(CUnescapeTest,Unescapes3CharHexNull)249 TEST_F(CUnescapeTest, Unescapes3CharHexNull) {
250   std::string original_string = "\\x000";
251   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
252   EXPECT_EQ(std::string("\0", 1), result_string_);
253 }
254 
TEST_F(CUnescapeTest,Unescapes4CharUnicodeNull)255 TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) {
256   std::string original_string = "\\u0000";
257   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
258   EXPECT_EQ(std::string("\0", 1), result_string_);
259 }
260 
TEST_F(CUnescapeTest,Unescapes8CharUnicodeNull)261 TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) {
262   std::string original_string = "\\U00000000";
263   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
264   EXPECT_EQ(std::string("\0", 1), result_string_);
265 }
266 
TEST_F(CUnescapeTest,UnescapesMultipleOctalNulls)267 TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) {
268   std::string original_string(kStringWithMultipleOctalNulls);
269   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
270   // All escapes, including newlines and null escapes, should have been
271   // converted to the equivalent characters.
272   EXPECT_EQ(std::string("\0\n"
273                         "0\n"
274                         "\0\n"
275                         "\0",
276                         7),
277             result_string_);
278 }
279 
280 
TEST_F(CUnescapeTest,UnescapesMultipleHexNulls)281 TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) {
282   std::string original_string(kStringWithMultipleHexNulls);
283   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
284   EXPECT_EQ(std::string("\0\n"
285                         "0\n"
286                         "\0\n"
287                         "\0",
288                         7),
289             result_string_);
290 }
291 
TEST_F(CUnescapeTest,UnescapesMultipleUnicodeNulls)292 TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) {
293   std::string original_string(kStringWithMultipleUnicodeNulls);
294   EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
295   EXPECT_EQ(std::string("\0\n"
296                         "0\n"
297                         "\0",
298                         5),
299             result_string_);
300 }
301 
302 static struct {
303   absl::string_view plaintext;
304   absl::string_view cyphertext;
305 } const base64_tests[] = {
306     // Empty string.
307     {{"", 0}, {"", 0}},
308     {{nullptr, 0},
309      {"", 0}},  // if length is zero, plaintext ptr must be ignored!
310 
311     // Basic bit patterns;
312     // values obtained with "echo -n '...' | uuencode -m test"
313 
314     {{"\000", 1}, "AA=="},
315     {{"\001", 1}, "AQ=="},
316     {{"\002", 1}, "Ag=="},
317     {{"\004", 1}, "BA=="},
318     {{"\010", 1}, "CA=="},
319     {{"\020", 1}, "EA=="},
320     {{"\040", 1}, "IA=="},
321     {{"\100", 1}, "QA=="},
322     {{"\200", 1}, "gA=="},
323 
324     {{"\377", 1}, "/w=="},
325     {{"\376", 1}, "/g=="},
326     {{"\375", 1}, "/Q=="},
327     {{"\373", 1}, "+w=="},
328     {{"\367", 1}, "9w=="},
329     {{"\357", 1}, "7w=="},
330     {{"\337", 1}, "3w=="},
331     {{"\277", 1}, "vw=="},
332     {{"\177", 1}, "fw=="},
333     {{"\000\000", 2}, "AAA="},
334     {{"\000\001", 2}, "AAE="},
335     {{"\000\002", 2}, "AAI="},
336     {{"\000\004", 2}, "AAQ="},
337     {{"\000\010", 2}, "AAg="},
338     {{"\000\020", 2}, "ABA="},
339     {{"\000\040", 2}, "ACA="},
340     {{"\000\100", 2}, "AEA="},
341     {{"\000\200", 2}, "AIA="},
342     {{"\001\000", 2}, "AQA="},
343     {{"\002\000", 2}, "AgA="},
344     {{"\004\000", 2}, "BAA="},
345     {{"\010\000", 2}, "CAA="},
346     {{"\020\000", 2}, "EAA="},
347     {{"\040\000", 2}, "IAA="},
348     {{"\100\000", 2}, "QAA="},
349     {{"\200\000", 2}, "gAA="},
350 
351     {{"\377\377", 2}, "//8="},
352     {{"\377\376", 2}, "//4="},
353     {{"\377\375", 2}, "//0="},
354     {{"\377\373", 2}, "//s="},
355     {{"\377\367", 2}, "//c="},
356     {{"\377\357", 2}, "/+8="},
357     {{"\377\337", 2}, "/98="},
358     {{"\377\277", 2}, "/78="},
359     {{"\377\177", 2}, "/38="},
360     {{"\376\377", 2}, "/v8="},
361     {{"\375\377", 2}, "/f8="},
362     {{"\373\377", 2}, "+/8="},
363     {{"\367\377", 2}, "9/8="},
364     {{"\357\377", 2}, "7/8="},
365     {{"\337\377", 2}, "3/8="},
366     {{"\277\377", 2}, "v/8="},
367     {{"\177\377", 2}, "f/8="},
368 
369     {{"\000\000\000", 3}, "AAAA"},
370     {{"\000\000\001", 3}, "AAAB"},
371     {{"\000\000\002", 3}, "AAAC"},
372     {{"\000\000\004", 3}, "AAAE"},
373     {{"\000\000\010", 3}, "AAAI"},
374     {{"\000\000\020", 3}, "AAAQ"},
375     {{"\000\000\040", 3}, "AAAg"},
376     {{"\000\000\100", 3}, "AABA"},
377     {{"\000\000\200", 3}, "AACA"},
378     {{"\000\001\000", 3}, "AAEA"},
379     {{"\000\002\000", 3}, "AAIA"},
380     {{"\000\004\000", 3}, "AAQA"},
381     {{"\000\010\000", 3}, "AAgA"},
382     {{"\000\020\000", 3}, "ABAA"},
383     {{"\000\040\000", 3}, "ACAA"},
384     {{"\000\100\000", 3}, "AEAA"},
385     {{"\000\200\000", 3}, "AIAA"},
386     {{"\001\000\000", 3}, "AQAA"},
387     {{"\002\000\000", 3}, "AgAA"},
388     {{"\004\000\000", 3}, "BAAA"},
389     {{"\010\000\000", 3}, "CAAA"},
390     {{"\020\000\000", 3}, "EAAA"},
391     {{"\040\000\000", 3}, "IAAA"},
392     {{"\100\000\000", 3}, "QAAA"},
393     {{"\200\000\000", 3}, "gAAA"},
394 
395     {{"\377\377\377", 3}, "////"},
396     {{"\377\377\376", 3}, "///+"},
397     {{"\377\377\375", 3}, "///9"},
398     {{"\377\377\373", 3}, "///7"},
399     {{"\377\377\367", 3}, "///3"},
400     {{"\377\377\357", 3}, "///v"},
401     {{"\377\377\337", 3}, "///f"},
402     {{"\377\377\277", 3}, "//+/"},
403     {{"\377\377\177", 3}, "//9/"},
404     {{"\377\376\377", 3}, "//7/"},
405     {{"\377\375\377", 3}, "//3/"},
406     {{"\377\373\377", 3}, "//v/"},
407     {{"\377\367\377", 3}, "//f/"},
408     {{"\377\357\377", 3}, "/+//"},
409     {{"\377\337\377", 3}, "/9//"},
410     {{"\377\277\377", 3}, "/7//"},
411     {{"\377\177\377", 3}, "/3//"},
412     {{"\376\377\377", 3}, "/v//"},
413     {{"\375\377\377", 3}, "/f//"},
414     {{"\373\377\377", 3}, "+///"},
415     {{"\367\377\377", 3}, "9///"},
416     {{"\357\377\377", 3}, "7///"},
417     {{"\337\377\377", 3}, "3///"},
418     {{"\277\377\377", 3}, "v///"},
419     {{"\177\377\377", 3}, "f///"},
420 
421     // Random numbers: values obtained with
422     //
423     //  #! /bin/bash
424     //  dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
425     //  od -N $1 -t o1 /tmp/bar.random
426     //  uuencode -m test < /tmp/bar.random
427     //
428     // where $1 is the number of bytes (2, 3)
429 
430     {{"\243\361", 2}, "o/E="},
431     {{"\024\167", 2}, "FHc="},
432     {{"\313\252", 2}, "y6o="},
433     {{"\046\041", 2}, "JiE="},
434     {{"\145\236", 2}, "ZZ4="},
435     {{"\254\325", 2}, "rNU="},
436     {{"\061\330", 2}, "Mdg="},
437     {{"\245\032", 2}, "pRo="},
438     {{"\006\000", 2}, "BgA="},
439     {{"\375\131", 2}, "/Vk="},
440     {{"\303\210", 2}, "w4g="},
441     {{"\040\037", 2}, "IB8="},
442     {{"\261\372", 2}, "sfo="},
443     {{"\335\014", 2}, "3Qw="},
444     {{"\233\217", 2}, "m48="},
445     {{"\373\056", 2}, "+y4="},
446     {{"\247\232", 2}, "p5o="},
447     {{"\107\053", 2}, "Rys="},
448     {{"\204\077", 2}, "hD8="},
449     {{"\276\211", 2}, "vok="},
450     {{"\313\110", 2}, "y0g="},
451     {{"\363\376", 2}, "8/4="},
452     {{"\251\234", 2}, "qZw="},
453     {{"\103\262", 2}, "Q7I="},
454     {{"\142\312", 2}, "Yso="},
455     {{"\067\211", 2}, "N4k="},
456     {{"\220\001", 2}, "kAE="},
457     {{"\152\240", 2}, "aqA="},
458     {{"\367\061", 2}, "9zE="},
459     {{"\133\255", 2}, "W60="},
460     {{"\176\035", 2}, "fh0="},
461     {{"\032\231", 2}, "Gpk="},
462 
463     {{"\013\007\144", 3}, "Cwdk"},
464     {{"\030\112\106", 3}, "GEpG"},
465     {{"\047\325\046", 3}, "J9Um"},
466     {{"\310\160\022", 3}, "yHAS"},
467     {{"\131\100\237", 3}, "WUCf"},
468     {{"\064\342\134", 3}, "NOJc"},
469     {{"\010\177\004", 3}, "CH8E"},
470     {{"\345\147\205", 3}, "5WeF"},
471     {{"\300\343\360", 3}, "wOPw"},
472     {{"\061\240\201", 3}, "MaCB"},
473     {{"\225\333\044", 3}, "ldsk"},
474     {{"\215\137\352", 3}, "jV/q"},
475     {{"\371\147\160", 3}, "+Wdw"},
476     {{"\030\320\051", 3}, "GNAp"},
477     {{"\044\174\241", 3}, "JHyh"},
478     {{"\260\127\037", 3}, "sFcf"},
479     {{"\111\045\033", 3}, "SSUb"},
480     {{"\202\114\107", 3}, "gkxH"},
481     {{"\057\371\042", 3}, "L/ki"},
482     {{"\223\247\244", 3}, "k6ek"},
483     {{"\047\216\144", 3}, "J45k"},
484     {{"\203\070\327", 3}, "gzjX"},
485     {{"\247\140\072", 3}, "p2A6"},
486     {{"\124\115\116", 3}, "VE1O"},
487     {{"\157\162\050", 3}, "b3Io"},
488     {{"\357\223\004", 3}, "75ME"},
489     {{"\052\117\156", 3}, "Kk9u"},
490     {{"\347\154\000", 3}, "52wA"},
491     {{"\303\012\142", 3}, "wwpi"},
492     {{"\060\035\362", 3}, "MB3y"},
493     {{"\130\226\361", 3}, "WJbx"},
494     {{"\173\013\071", 3}, "ews5"},
495     {{"\336\004\027", 3}, "3gQX"},
496     {{"\357\366\234", 3}, "7/ac"},
497     {{"\353\304\111", 3}, "68RJ"},
498     {{"\024\264\131", 3}, "FLRZ"},
499     {{"\075\114\251", 3}, "PUyp"},
500     {{"\315\031\225", 3}, "zRmV"},
501     {{"\154\201\276", 3}, "bIG+"},
502     {{"\200\066\072", 3}, "gDY6"},
503     {{"\142\350\267", 3}, "Yui3"},
504     {{"\033\000\166", 3}, "GwB2"},
505     {{"\210\055\077", 3}, "iC0/"},
506     {{"\341\037\124", 3}, "4R9U"},
507     {{"\161\103\152", 3}, "cUNq"},
508     {{"\270\142\131", 3}, "uGJZ"},
509     {{"\337\076\074", 3}, "3z48"},
510     {{"\375\106\362", 3}, "/Uby"},
511     {{"\227\301\127", 3}, "l8FX"},
512     {{"\340\002\234", 3}, "4AKc"},
513     {{"\121\064\033", 3}, "UTQb"},
514     {{"\157\134\143", 3}, "b1xj"},
515     {{"\247\055\327", 3}, "py3X"},
516     {{"\340\142\005", 3}, "4GIF"},
517     {{"\060\260\143", 3}, "MLBj"},
518     {{"\075\203\170", 3}, "PYN4"},
519     {{"\143\160\016", 3}, "Y3AO"},
520     {{"\313\013\063", 3}, "ywsz"},
521     {{"\174\236\135", 3}, "fJ5d"},
522     {{"\103\047\026", 3}, "QycW"},
523     {{"\365\005\343", 3}, "9QXj"},
524     {{"\271\160\223", 3}, "uXCT"},
525     {{"\362\255\172", 3}, "8q16"},
526     {{"\113\012\015", 3}, "SwoN"},
527 
528     // various lengths, generated by this python script:
529     //
530     // from std::string import lowercase as lc
531     // for i in range(27):
532     //   print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
533     //                                     lc[:i].encode('base64').strip())
534 
535     {{"", 0}, {"", 0}},
536     {"a", "YQ=="},
537     {"ab", "YWI="},
538     {"abc", "YWJj"},
539     {"abcd", "YWJjZA=="},
540     {"abcde", "YWJjZGU="},
541     {"abcdef", "YWJjZGVm"},
542     {"abcdefg", "YWJjZGVmZw=="},
543     {"abcdefgh", "YWJjZGVmZ2g="},
544     {"abcdefghi", "YWJjZGVmZ2hp"},
545     {"abcdefghij", "YWJjZGVmZ2hpag=="},
546     {"abcdefghijk", "YWJjZGVmZ2hpams="},
547     {"abcdefghijkl", "YWJjZGVmZ2hpamts"},
548     {"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="},
549     {"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="},
550     {"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"},
551     {"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="},
552     {"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="},
553     {"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"},
554     {"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="},
555     {"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="},
556     {"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"},
557     {"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="},
558     {"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="},
559     {"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"},
560     {"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="},
561     {"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="},
562 };
563 
564 template <typename StringType>
TestEscapeAndUnescape()565 void TestEscapeAndUnescape() {
566   // Check the short strings; this tests the math (and boundaries)
567   for (const auto& tc : base64_tests) {
568     // Test plain base64.
569     StringType encoded("this junk should be ignored");
570     absl::Base64Escape(tc.plaintext, &encoded);
571     EXPECT_EQ(encoded, tc.cyphertext);
572     EXPECT_EQ(absl::Base64Escape(tc.plaintext), tc.cyphertext);
573 
574     StringType decoded("this junk should be ignored");
575     EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
576     EXPECT_EQ(decoded, tc.plaintext);
577 
578     StringType websafe_with_padding(tc.cyphertext);
579     for (unsigned int c = 0; c < websafe_with_padding.size(); ++c) {
580       if ('+' == websafe_with_padding[c]) websafe_with_padding[c] = '-';
581       if ('/' == websafe_with_padding[c]) websafe_with_padding[c] = '_';
582       // Intentionally keeping padding aka '='.
583     }
584 
585     // Test plain websafe (aka without padding).
586     StringType websafe(websafe_with_padding);
587     for (unsigned int c = 0; c < websafe.size(); ++c) {
588       if ('=' == websafe[c]) {
589         websafe.resize(c);
590         break;
591       }
592     }
593     encoded = "this junk should be ignored";
594     absl::WebSafeBase64Escape(tc.plaintext, &encoded);
595     EXPECT_EQ(encoded, websafe);
596     EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe);
597 
598     decoded = "this junk should be ignored";
599     EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
600     EXPECT_EQ(decoded, tc.plaintext);
601   }
602 
603   // Now try the long strings, this tests the streaming
604   for (const auto& tc : absl::strings_internal::base64_strings()) {
605     StringType buffer;
606     absl::WebSafeBase64Escape(tc.plaintext, &buffer);
607     EXPECT_EQ(tc.cyphertext, buffer);
608     EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), tc.cyphertext);
609   }
610 
611   // Verify the behavior when decoding bad data
612   {
613     absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4),
614                                     absl::string_view("abc.\0", 5)};
615     for (absl::string_view bad_data : data_set) {
616       StringType buf;
617       EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf));
618       EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf));
619       EXPECT_TRUE(buf.empty());
620     }
621   }
622 }
623 
TEST(Base64,EscapeAndUnescape)624 TEST(Base64, EscapeAndUnescape) {
625   TestEscapeAndUnescape<std::string>();
626 }
627 
TEST(Base64,Padding)628 TEST(Base64, Padding) {
629   // Padding is optional.
630   // '.' is an acceptable padding character, just like '='.
631   std::initializer_list<absl::string_view> good_padding = {
632     "YQ",
633     "YQ==",
634     "YQ=.",
635     "YQ.=",
636     "YQ..",
637   };
638   for (absl::string_view b64 : good_padding) {
639     std::string decoded;
640     EXPECT_TRUE(absl::Base64Unescape(b64, &decoded));
641     EXPECT_EQ(decoded, "a");
642     std::string websafe_decoded;
643     EXPECT_TRUE(absl::WebSafeBase64Unescape(b64, &websafe_decoded));
644     EXPECT_EQ(websafe_decoded, "a");
645   }
646   std::initializer_list<absl::string_view> bad_padding = {
647     "YQ=",
648     "YQ.",
649     "YQ===",
650     "YQ==.",
651     "YQ=.=",
652     "YQ=..",
653     "YQ.==",
654     "YQ.=.",
655     "YQ..=",
656     "YQ...",
657     "YQ====",
658     "YQ....",
659     "YQ=====",
660     "YQ.....",
661   };
662   for (absl::string_view b64 : bad_padding) {
663     std::string decoded;
664     EXPECT_FALSE(absl::Base64Unescape(b64, &decoded));
665     std::string websafe_decoded;
666     EXPECT_FALSE(absl::WebSafeBase64Unescape(b64, &websafe_decoded));
667   }
668 }
669 
TEST(Base64,DISABLED_HugeData)670 TEST(Base64, DISABLED_HugeData) {
671   const size_t kSize = size_t(3) * 1000 * 1000 * 1000;
672   static_assert(kSize % 3 == 0, "kSize must be divisible by 3");
673   const std::string huge(kSize, 'x');
674 
675   std::string escaped;
676   absl::Base64Escape(huge, &escaped);
677 
678   // Generates the string that should match a base64 encoded "xxx..." string.
679   // "xxx" in base64 is "eHh4".
680   std::string expected_encoding;
681   expected_encoding.reserve(kSize / 3 * 4);
682   for (size_t i = 0; i < kSize / 3; ++i) {
683     expected_encoding.append("eHh4");
684   }
685   EXPECT_EQ(expected_encoding, escaped);
686 
687   std::string unescaped;
688   EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped));
689   EXPECT_EQ(huge, unescaped);
690 }
691 
TEST(Escaping,HexStringToBytesBackToHex)692 TEST(Escaping, HexStringToBytesBackToHex) {
693   std::string bytes, hex;
694 
695   constexpr absl::string_view kTestHexLower =  "1c2f0032f40123456789abcdef";
696   constexpr absl::string_view kTestHexUpper =  "1C2F0032F40123456789ABCDEF";
697   constexpr absl::string_view kTestBytes = absl::string_view(
698       "\x1c\x2f\x00\x32\xf4\x01\x23\x45\x67\x89\xab\xcd\xef", 13);
699 
700   EXPECT_TRUE(absl::HexStringToBytes(kTestHexLower, &bytes));
701   EXPECT_EQ(bytes, kTestBytes);
702 
703   EXPECT_TRUE(absl::HexStringToBytes(kTestHexUpper, &bytes));
704   EXPECT_EQ(bytes, kTestBytes);
705 
706   hex = absl::BytesToHexString(kTestBytes);
707   EXPECT_EQ(hex, kTestHexLower);
708 
709   // Same buffer.
710   // We do not care if this works since we do not promise it in the contract.
711   // The purpose of this test is to to see if the program will crash or if
712   // sanitizers will catch anything.
713   bytes = std::string(kTestHexUpper);
714   (void)absl::HexStringToBytes(bytes, &bytes);
715 
716   // Length not a multiple of two.
717   EXPECT_FALSE(absl::HexStringToBytes("1c2f003", &bytes));
718 
719   // Not hex.
720   EXPECT_FALSE(absl::HexStringToBytes("1c2f00ft", &bytes));
721 
722   // Empty input.
723   bytes = "abc";
724   EXPECT_TRUE(absl::HexStringToBytes("", &bytes));
725   EXPECT_EQ("", bytes);  // Results in empty output.
726 }
727 
TEST(HexAndBack,HexStringToBytes_and_BytesToHexString)728 TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) {
729   std::string hex_mixed = "0123456789abcdefABCDEF";
730   std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF";
731   std::string hex_only_lower = "0123456789abcdefabcdef";
732 
733   std::string bytes_result = absl::HexStringToBytes(hex_mixed);
734   EXPECT_EQ(bytes_expected, bytes_result);
735 
736   std::string prefix_valid = hex_mixed + "?";
737   std::string prefix_valid_result = absl::HexStringToBytes(
738       absl::string_view(prefix_valid.data(), prefix_valid.size() - 1));
739   EXPECT_EQ(bytes_expected, prefix_valid_result);
740 
741   std::string infix_valid = "?" + hex_mixed + "???";
742   std::string infix_valid_result = absl::HexStringToBytes(
743       absl::string_view(infix_valid.data() + 1, hex_mixed.size()));
744   EXPECT_EQ(bytes_expected, infix_valid_result);
745 
746   std::string hex_result = absl::BytesToHexString(bytes_expected);
747   EXPECT_EQ(hex_only_lower, hex_result);
748 }
749 
750 }  // namespace
751