// Copyright 2013 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "base/json/string_escape.h" #include #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" namespace base { TEST(JSONStringEscapeTest, EscapeUTF8) { const struct { const char* to_escape; const char* escaped; } cases[] = { {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, {"a\b\f\n\r\t\v\1\\.\"z", "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, {"b\x0f\x7f\xf0\xff!", // \xf0\xff is not a valid UTF-8 unit. "b\\u000F\x7F\xEF\xBF\xBD\xEF\xBF\xBD!"}, {"c<>d", "c\\u003C>d"}, {"Hello\xE2\x80\xA8world", "Hello\\u2028world"}, // U+2028 {"\xE2\x80\xA9purple", "\\u2029purple"}, // U+2029 // Unicode non-characters. {"\xEF\xB7\x90", "\xEF\xB7\x90"}, // U+FDD0 {"\xEF\xB7\x9F", "\xEF\xB7\x9F"}, // U+FDDF {"\xEF\xB7\xAF", "\xEF\xB7\xAF"}, // U+FDEF {"\xEF\xBF\xBE", "\xEF\xBF\xBE"}, // U+FFFE {"\xEF\xBF\xBF", "\xEF\xBF\xBF"}, // U+FFFF {"\xF0\x9F\xBF\xBE", "\xF0\x9F\xBF\xBE"}, // U+01FFFE {"\xF0\x9F\xBF\xBF", "\xF0\x9F\xBF\xBF"}, // U+01FFFF {"\xF0\xAF\xBF\xBE", "\xF0\xAF\xBF\xBE"}, // U+02FFFE {"\xF0\xAF\xBF\xBF", "\xF0\xAF\xBF\xBF"}, // U+02FFFF {"\xF0\xBF\xBF\xBE", "\xF0\xBF\xBF\xBE"}, // U+03FFFE {"\xF0\xBF\xBF\xBF", "\xF0\xBF\xBF\xBF"}, // U+03FFFF {"\xF1\x8F\xBF\xBE", "\xF1\x8F\xBF\xBE"}, // U+04FFFE {"\xF1\x8F\xBF\xBF", "\xF1\x8F\xBF\xBF"}, // U+04FFFF {"\xF1\x9F\xBF\xBE", "\xF1\x9F\xBF\xBE"}, // U+05FFFE {"\xF1\x9F\xBF\xBF", "\xF1\x9F\xBF\xBF"}, // U+05FFFF {"\xF1\xAF\xBF\xBE", "\xF1\xAF\xBF\xBE"}, // U+06FFFE {"\xF1\xAF\xBF\xBF", "\xF1\xAF\xBF\xBF"}, // U+06FFFF {"\xF1\xBF\xBF\xBE", "\xF1\xBF\xBF\xBE"}, // U+07FFFE {"\xF1\xBF\xBF\xBF", "\xF1\xBF\xBF\xBF"}, // U+07FFFF {"\xF2\x8F\xBF\xBE", "\xF2\x8F\xBF\xBE"}, // U+08FFFE {"\xF2\x8F\xBF\xBF", "\xF2\x8F\xBF\xBF"}, // U+08FFFF {"\xF2\x9F\xBF\xBE", "\xF2\x9F\xBF\xBE"}, // U+09FFFE {"\xF2\x9F\xBF\xBF", "\xF2\x9F\xBF\xBF"}, // U+09FFFF {"\xF2\xAF\xBF\xBE", "\xF2\xAF\xBF\xBE"}, // U+0AFFFE {"\xF2\xAF\xBF\xBF", "\xF2\xAF\xBF\xBF"}, // U+0AFFFF {"\xF2\xBF\xBF\xBE", "\xF2\xBF\xBF\xBE"}, // U+0BFFFE {"\xF2\xBF\xBF\xBF", "\xF2\xBF\xBF\xBF"}, // U+0BFFFF {"\xF3\x8F\xBF\xBE", "\xF3\x8F\xBF\xBE"}, // U+0CFFFE {"\xF3\x8F\xBF\xBF", "\xF3\x8F\xBF\xBF"}, // U+0CFFFF {"\xF3\x9F\xBF\xBE", "\xF3\x9F\xBF\xBE"}, // U+0DFFFE {"\xF3\x9F\xBF\xBF", "\xF3\x9F\xBF\xBF"}, // U+0DFFFF {"\xF3\xAF\xBF\xBE", "\xF3\xAF\xBF\xBE"}, // U+0EFFFE {"\xF3\xAF\xBF\xBF", "\xF3\xAF\xBF\xBF"}, // U+0EFFFF {"\xF3\xBF\xBF\xBE", "\xF3\xBF\xBF\xBE"}, // U+0FFFFE {"\xF3\xBF\xBF\xBF", "\xF3\xBF\xBF\xBF"}, // U+0FFFFF {"\xF4\x8F\xBF\xBE", "\xF4\x8F\xBF\xBE"}, // U+10FFFE {"\xF4\x8F\xBF\xBF", "\xF4\x8F\xBF\xBF"}, // U+10FFFF }; for (const auto& i : cases) { const char* in_ptr = i.to_escape; std::string in_str = in_ptr; std::string out; EscapeJSONString(in_ptr, false, &out); EXPECT_EQ(std::string(i.escaped), out); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); out.erase(); EscapeJSONString(in_str, false, &out); EXPECT_EQ(std::string(i.escaped), out); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); std::string fooout = GetQuotedJSONString(in_str); EXPECT_EQ("\"" + std::string(i.escaped) + "\"", fooout); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); } std::string in = cases[0].to_escape; std::string out; EscapeJSONString(in, false, &out); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); // test quoting std::string out_quoted; EscapeJSONString(in, true, &out_quoted); EXPECT_EQ(out.length() + 2, out_quoted.length()); EXPECT_EQ(out_quoted.find(out), 1U); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out_quoted)); // now try with a NULL in the string std::string null_prepend = "test"; null_prepend.push_back(0); in = null_prepend + in; std::string expected = "test\\u0000"; expected += cases[0].escaped; out.clear(); EscapeJSONString(in, false, &out); EXPECT_EQ(expected, out); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); } TEST(JSONStringEscapeTest, EscapeUTF16) { const struct { const wchar_t* to_escape; const char* escaped; } cases[] = { {L"b\uffb1\u00ff", "b\xEF\xBE\xB1\xC3\xBF"}, {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, {L"a\b\f\n\r\t\v\1\\.\"z", "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, {L"b\x0F\x7F\xF0\xFF!", "b\\u000F\x7F\xC3\xB0\xC3\xBF!"}, {L"c<>d", "c\\u003C>d"}, {L"Hello\u2028world", "Hello\\u2028world"}, {L"\u2029purple", "\\u2029purple"}, // Unicode non-characters. {L"\uFDD0", "\xEF\xB7\x90"}, // U+FDD0 {L"\uFDDF", "\xEF\xB7\x9F"}, // U+FDDF {L"\uFDEF", "\xEF\xB7\xAF"}, // U+FDEF {L"\uFFFE", "\xEF\xBF\xBE"}, // U+FFFE {L"\uFFFF", "\xEF\xBF\xBF"}, // U+FFFF {L"\U0001FFFE", "\xF0\x9F\xBF\xBE"}, // U+01FFFE {L"\U0001FFFF", "\xF0\x9F\xBF\xBF"}, // U+01FFFF {L"\U0002FFFE", "\xF0\xAF\xBF\xBE"}, // U+02FFFE {L"\U0002FFFF", "\xF0\xAF\xBF\xBF"}, // U+02FFFF {L"\U0003FFFE", "\xF0\xBF\xBF\xBE"}, // U+03FFFE {L"\U0003FFFF", "\xF0\xBF\xBF\xBF"}, // U+03FFFF {L"\U0004FFFE", "\xF1\x8F\xBF\xBE"}, // U+04FFFE {L"\U0004FFFF", "\xF1\x8F\xBF\xBF"}, // U+04FFFF {L"\U0005FFFE", "\xF1\x9F\xBF\xBE"}, // U+05FFFE {L"\U0005FFFF", "\xF1\x9F\xBF\xBF"}, // U+05FFFF {L"\U0006FFFE", "\xF1\xAF\xBF\xBE"}, // U+06FFFE {L"\U0006FFFF", "\xF1\xAF\xBF\xBF"}, // U+06FFFF {L"\U0007FFFE", "\xF1\xBF\xBF\xBE"}, // U+07FFFE {L"\U0007FFFF", "\xF1\xBF\xBF\xBF"}, // U+07FFFF {L"\U0008FFFE", "\xF2\x8F\xBF\xBE"}, // U+08FFFE {L"\U0008FFFF", "\xF2\x8F\xBF\xBF"}, // U+08FFFF {L"\U0009FFFE", "\xF2\x9F\xBF\xBE"}, // U+09FFFE {L"\U0009FFFF", "\xF2\x9F\xBF\xBF"}, // U+09FFFF {L"\U000AFFFE", "\xF2\xAF\xBF\xBE"}, // U+0AFFFE {L"\U000AFFFF", "\xF2\xAF\xBF\xBF"}, // U+0AFFFF {L"\U000BFFFE", "\xF2\xBF\xBF\xBE"}, // U+0BFFFE {L"\U000BFFFF", "\xF2\xBF\xBF\xBF"}, // U+0BFFFF {L"\U000CFFFE", "\xF3\x8F\xBF\xBE"}, // U+0CFFFE {L"\U000CFFFF", "\xF3\x8F\xBF\xBF"}, // U+0CFFFF {L"\U000DFFFE", "\xF3\x9F\xBF\xBE"}, // U+0DFFFE {L"\U000DFFFF", "\xF3\x9F\xBF\xBF"}, // U+0DFFFF {L"\U000EFFFE", "\xF3\xAF\xBF\xBE"}, // U+0EFFFE {L"\U000EFFFF", "\xF3\xAF\xBF\xBF"}, // U+0EFFFF {L"\U000FFFFE", "\xF3\xBF\xBF\xBE"}, // U+0FFFFE {L"\U000FFFFF", "\xF3\xBF\xBF\xBF"}, // U+0FFFFF {L"\U0010FFFE", "\xF4\x8F\xBF\xBE"}, // U+10FFFE {L"\U0010FFFF", "\xF4\x8F\xBF\xBF"}, // U+10FFFF }; for (const auto& i : cases) { std::u16string in = WideToUTF16(i.to_escape); std::string out; EscapeJSONString(in, false, &out); EXPECT_EQ(std::string(i.escaped), out); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); out = GetQuotedJSONString(in); EXPECT_EQ("\"" + std::string(i.escaped) + "\"", out); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); } std::u16string in = WideToUTF16(cases[0].to_escape); std::string out; EscapeJSONString(in, false, &out); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); // test quoting std::string out_quoted; EscapeJSONString(in, true, &out_quoted); EXPECT_EQ(out.length() + 2, out_quoted.length()); EXPECT_EQ(out_quoted.find(out), 1U); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); // now try with a NULL in the string std::u16string null_prepend = u"test"; null_prepend.push_back(0); in = null_prepend + in; std::string expected = "test\\u0000"; expected += cases[0].escaped; out.clear(); EscapeJSONString(in, false, &out); EXPECT_EQ(expected, out); EXPECT_TRUE(IsStringUTF8AllowingNoncharacters(out)); } TEST(JSONStringEscapeTest, EscapeUTF16OutsideBMP) { { // {a, U+10300, !}, SMP. std::u16string test; test.push_back('a'); test.push_back(0xD800); test.push_back(0xDF00); test.push_back('!'); std::string actual; EXPECT_TRUE(EscapeJSONString(test, false, &actual)); EXPECT_EQ("a\xF0\x90\x8C\x80!", actual); } { // {U+20021, U+2002B}, SIP. std::u16string test; test.push_back(0xD840); test.push_back(0xDC21); test.push_back(0xD840); test.push_back(0xDC2B); std::string actual; EXPECT_TRUE(EscapeJSONString(test, false, &actual)); EXPECT_EQ("\xF0\xA0\x80\xA1\xF0\xA0\x80\xAB", actual); } { // {?, U+D800, @}, lone surrogate. std::u16string test; test.push_back('?'); test.push_back(0xD800); test.push_back('@'); std::string actual; EXPECT_FALSE(EscapeJSONString(test, false, &actual)); EXPECT_EQ("?\xEF\xBF\xBD@", actual); } } TEST(JSONStringEscapeTest, EscapeBytes) { const struct { const char* to_escape; const char* escaped; } cases[] = { {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"}, {"\xe5\xc4\x4f\x05\xb6\xfd", "\\u00E5\\u00C4O\\u0005\\u00B6\\u00FD"}, }; for (const auto& i : cases) { std::string in = std::string(i.to_escape); EXPECT_FALSE(IsStringUTF8AllowingNoncharacters(in)); EXPECT_EQ(std::string(i.escaped), EscapeBytesAsInvalidJSONString(in, false)); EXPECT_EQ("\"" + std::string(i.escaped) + "\"", EscapeBytesAsInvalidJSONString(in, true)); } const char kEmbedNull[] = { '\xab', '\x39', '\0', '\x9f', '\xab' }; std::string in(kEmbedNull, std::size(kEmbedNull)); EXPECT_FALSE(IsStringUTF8AllowingNoncharacters(in)); EXPECT_EQ(std::string("\\u00AB9\\u0000\\u009F\\u00AB"), EscapeBytesAsInvalidJSONString(in, false)); } } // namespace base