1*6777b538SAndroid Build Coastguard Worker // Copyright 2012 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker #include "base/json/json_parser.h"
6*6777b538SAndroid Build Coastguard Worker
7*6777b538SAndroid Build Coastguard Worker #include <cmath>
8*6777b538SAndroid Build Coastguard Worker #include <iterator>
9*6777b538SAndroid Build Coastguard Worker #include <string_view>
10*6777b538SAndroid Build Coastguard Worker #include <utility>
11*6777b538SAndroid Build Coastguard Worker #include <vector>
12*6777b538SAndroid Build Coastguard Worker
13*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
14*6777b538SAndroid Build Coastguard Worker #include "base/feature_list.h"
15*6777b538SAndroid Build Coastguard Worker #include "base/features.h"
16*6777b538SAndroid Build Coastguard Worker #include "base/json/json_reader.h"
17*6777b538SAndroid Build Coastguard Worker #include "base/metrics/histogram_functions.h"
18*6777b538SAndroid Build Coastguard Worker #include "base/notreached.h"
19*6777b538SAndroid Build Coastguard Worker #include "base/numerics/safe_conversions.h"
20*6777b538SAndroid Build Coastguard Worker #include "base/ranges/algorithm.h"
21*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_number_conversions.h"
22*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
23*6777b538SAndroid Build Coastguard Worker #include "base/strings/stringprintf.h"
24*6777b538SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversion_utils.h"
25*6777b538SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
26*6777b538SAndroid Build Coastguard Worker #include "base/third_party/icu/icu_utf.h"
27*6777b538SAndroid Build Coastguard Worker
28*6777b538SAndroid Build Coastguard Worker namespace base {
29*6777b538SAndroid Build Coastguard Worker namespace internal {
30*6777b538SAndroid Build Coastguard Worker
31*6777b538SAndroid Build Coastguard Worker namespace {
32*6777b538SAndroid Build Coastguard Worker
33*6777b538SAndroid Build Coastguard Worker // Values 1000 and above are used by JSONFileValueSerializer::JsonFileError.
34*6777b538SAndroid Build Coastguard Worker static_assert(JSONParser::JSON_PARSE_ERROR_COUNT < 1000,
35*6777b538SAndroid Build Coastguard Worker "JSONParser error out of bounds");
36*6777b538SAndroid Build Coastguard Worker
ErrorCodeToString(JSONParser::JsonParseError error_code)37*6777b538SAndroid Build Coastguard Worker std::string ErrorCodeToString(JSONParser::JsonParseError error_code) {
38*6777b538SAndroid Build Coastguard Worker switch (error_code) {
39*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_NO_ERROR:
40*6777b538SAndroid Build Coastguard Worker return std::string();
41*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_SYNTAX_ERROR:
42*6777b538SAndroid Build Coastguard Worker return JSONParser::kSyntaxError;
43*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_INVALID_ESCAPE:
44*6777b538SAndroid Build Coastguard Worker return JSONParser::kInvalidEscape;
45*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_UNEXPECTED_TOKEN:
46*6777b538SAndroid Build Coastguard Worker return JSONParser::kUnexpectedToken;
47*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_TRAILING_COMMA:
48*6777b538SAndroid Build Coastguard Worker return JSONParser::kTrailingComma;
49*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_TOO_MUCH_NESTING:
50*6777b538SAndroid Build Coastguard Worker return JSONParser::kTooMuchNesting;
51*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_UNEXPECTED_DATA_AFTER_ROOT:
52*6777b538SAndroid Build Coastguard Worker return JSONParser::kUnexpectedDataAfterRoot;
53*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_UNSUPPORTED_ENCODING:
54*6777b538SAndroid Build Coastguard Worker return JSONParser::kUnsupportedEncoding;
55*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_UNQUOTED_DICTIONARY_KEY:
56*6777b538SAndroid Build Coastguard Worker return JSONParser::kUnquotedDictionaryKey;
57*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_UNREPRESENTABLE_NUMBER:
58*6777b538SAndroid Build Coastguard Worker return JSONParser::kUnrepresentableNumber;
59*6777b538SAndroid Build Coastguard Worker case JSONParser::JSON_PARSE_ERROR_COUNT:
60*6777b538SAndroid Build Coastguard Worker break;
61*6777b538SAndroid Build Coastguard Worker }
62*6777b538SAndroid Build Coastguard Worker NOTREACHED();
63*6777b538SAndroid Build Coastguard Worker return std::string();
64*6777b538SAndroid Build Coastguard Worker }
65*6777b538SAndroid Build Coastguard Worker
66*6777b538SAndroid Build Coastguard Worker const int32_t kExtendedASCIIStart = 0x80;
67*6777b538SAndroid Build Coastguard Worker constexpr base_icu::UChar32 kUnicodeReplacementPoint = 0xFFFD;
68*6777b538SAndroid Build Coastguard Worker
69*6777b538SAndroid Build Coastguard Worker // UnprefixedHexStringToInt acts like |HexStringToInt|, but enforces that the
70*6777b538SAndroid Build Coastguard Worker // input consists purely of hex digits. I.e. no "0x" nor "OX" prefix is
71*6777b538SAndroid Build Coastguard Worker // permitted.
UnprefixedHexStringToInt(std::string_view input,int * output)72*6777b538SAndroid Build Coastguard Worker bool UnprefixedHexStringToInt(std::string_view input, int* output) {
73*6777b538SAndroid Build Coastguard Worker for (size_t i = 0; i < input.size(); i++) {
74*6777b538SAndroid Build Coastguard Worker if (!IsHexDigit(input[i])) {
75*6777b538SAndroid Build Coastguard Worker return false;
76*6777b538SAndroid Build Coastguard Worker }
77*6777b538SAndroid Build Coastguard Worker }
78*6777b538SAndroid Build Coastguard Worker return HexStringToInt(input, output);
79*6777b538SAndroid Build Coastguard Worker }
80*6777b538SAndroid Build Coastguard Worker
81*6777b538SAndroid Build Coastguard Worker // These values are persisted to logs. Entries should not be renumbered and
82*6777b538SAndroid Build Coastguard Worker // numeric values should never be reused.
83*6777b538SAndroid Build Coastguard Worker enum class ChromiumJsonExtension {
84*6777b538SAndroid Build Coastguard Worker kCComment,
85*6777b538SAndroid Build Coastguard Worker kCppComment,
86*6777b538SAndroid Build Coastguard Worker kXEscape,
87*6777b538SAndroid Build Coastguard Worker kVerticalTabEscape,
88*6777b538SAndroid Build Coastguard Worker kControlCharacter,
89*6777b538SAndroid Build Coastguard Worker kNewlineInString,
90*6777b538SAndroid Build Coastguard Worker kMaxValue = kNewlineInString,
91*6777b538SAndroid Build Coastguard Worker };
92*6777b538SAndroid Build Coastguard Worker
93*6777b538SAndroid Build Coastguard Worker const char kExtensionHistogramName[] =
94*6777b538SAndroid Build Coastguard Worker "Security.JSONParser.ChromiumExtensionUsage";
95*6777b538SAndroid Build Coastguard Worker
96*6777b538SAndroid Build Coastguard Worker } // namespace
97*6777b538SAndroid Build Coastguard Worker
98*6777b538SAndroid Build Coastguard Worker // This is U+FFFD.
99*6777b538SAndroid Build Coastguard Worker const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
100*6777b538SAndroid Build Coastguard Worker
101*6777b538SAndroid Build Coastguard Worker const char JSONParser::kSyntaxError[] = "Syntax error.";
102*6777b538SAndroid Build Coastguard Worker const char JSONParser::kInvalidEscape[] = "Invalid escape sequence.";
103*6777b538SAndroid Build Coastguard Worker const char JSONParser::kUnexpectedToken[] = "Unexpected token.";
104*6777b538SAndroid Build Coastguard Worker const char JSONParser::kTrailingComma[] = "Trailing comma not allowed.";
105*6777b538SAndroid Build Coastguard Worker const char JSONParser::kTooMuchNesting[] = "Too much nesting.";
106*6777b538SAndroid Build Coastguard Worker const char JSONParser::kUnexpectedDataAfterRoot[] =
107*6777b538SAndroid Build Coastguard Worker "Unexpected data after root element.";
108*6777b538SAndroid Build Coastguard Worker const char JSONParser::kUnsupportedEncoding[] =
109*6777b538SAndroid Build Coastguard Worker "Unsupported encoding. JSON must be UTF-8.";
110*6777b538SAndroid Build Coastguard Worker const char JSONParser::kUnquotedDictionaryKey[] =
111*6777b538SAndroid Build Coastguard Worker "Dictionary keys must be quoted.";
112*6777b538SAndroid Build Coastguard Worker const char JSONParser::kUnrepresentableNumber[] =
113*6777b538SAndroid Build Coastguard Worker "Number cannot be represented.";
114*6777b538SAndroid Build Coastguard Worker
JSONParser(int options,size_t max_depth)115*6777b538SAndroid Build Coastguard Worker JSONParser::JSONParser(int options, size_t max_depth)
116*6777b538SAndroid Build Coastguard Worker : options_(options),
117*6777b538SAndroid Build Coastguard Worker max_depth_(max_depth),
118*6777b538SAndroid Build Coastguard Worker index_(0),
119*6777b538SAndroid Build Coastguard Worker stack_depth_(0),
120*6777b538SAndroid Build Coastguard Worker line_number_(0),
121*6777b538SAndroid Build Coastguard Worker index_last_line_(0),
122*6777b538SAndroid Build Coastguard Worker error_code_(JSON_NO_ERROR),
123*6777b538SAndroid Build Coastguard Worker error_line_(0),
124*6777b538SAndroid Build Coastguard Worker error_column_(0) {
125*6777b538SAndroid Build Coastguard Worker CHECK_LE(max_depth, kAbsoluteMaxDepth);
126*6777b538SAndroid Build Coastguard Worker }
127*6777b538SAndroid Build Coastguard Worker
128*6777b538SAndroid Build Coastguard Worker JSONParser::~JSONParser() = default;
129*6777b538SAndroid Build Coastguard Worker
Parse(std::string_view input)130*6777b538SAndroid Build Coastguard Worker std::optional<Value> JSONParser::Parse(std::string_view input) {
131*6777b538SAndroid Build Coastguard Worker input_ = input;
132*6777b538SAndroid Build Coastguard Worker index_ = 0;
133*6777b538SAndroid Build Coastguard Worker // Line and column counting is 1-based, but |index_| is 0-based. For example,
134*6777b538SAndroid Build Coastguard Worker // if input is "Aaa\nB" then 'A' and 'B' are both in column 1 (at lines 1 and
135*6777b538SAndroid Build Coastguard Worker // 2) and have indexes of 0 and 4. We track the line number explicitly (the
136*6777b538SAndroid Build Coastguard Worker // |line_number_| field) and the column number implicitly (the difference
137*6777b538SAndroid Build Coastguard Worker // between |index_| and |index_last_line_|). In calculating that difference,
138*6777b538SAndroid Build Coastguard Worker // |index_last_line_| is the index of the '\r' or '\n', not the index of the
139*6777b538SAndroid Build Coastguard Worker // first byte after the '\n'. For the 'B' in "Aaa\nB", its |index_| and
140*6777b538SAndroid Build Coastguard Worker // |index_last_line_| would be 4 and 3: 'B' is in column (4 - 3) = 1. We
141*6777b538SAndroid Build Coastguard Worker // initialize |index_last_line_| to -1, not 0, since -1 is the (out of range)
142*6777b538SAndroid Build Coastguard Worker // index of the imaginary '\n' immediately before the start of the string:
143*6777b538SAndroid Build Coastguard Worker // 'A' is in column (0 - -1) = 1.
144*6777b538SAndroid Build Coastguard Worker line_number_ = 1;
145*6777b538SAndroid Build Coastguard Worker index_last_line_ = static_cast<size_t>(-1);
146*6777b538SAndroid Build Coastguard Worker
147*6777b538SAndroid Build Coastguard Worker error_code_ = JSON_NO_ERROR;
148*6777b538SAndroid Build Coastguard Worker error_line_ = 0;
149*6777b538SAndroid Build Coastguard Worker error_column_ = 0;
150*6777b538SAndroid Build Coastguard Worker
151*6777b538SAndroid Build Coastguard Worker // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
152*6777b538SAndroid Build Coastguard Worker // advance the start position to avoid the ParseNextToken function mis-
153*6777b538SAndroid Build Coastguard Worker // treating a Unicode BOM as an invalid character and returning NULL.
154*6777b538SAndroid Build Coastguard Worker ConsumeIfMatch("\xEF\xBB\xBF");
155*6777b538SAndroid Build Coastguard Worker
156*6777b538SAndroid Build Coastguard Worker // Parse the first and any nested tokens.
157*6777b538SAndroid Build Coastguard Worker std::optional<Value> root(ParseNextToken());
158*6777b538SAndroid Build Coastguard Worker if (!root)
159*6777b538SAndroid Build Coastguard Worker return std::nullopt;
160*6777b538SAndroid Build Coastguard Worker
161*6777b538SAndroid Build Coastguard Worker // Make sure the input stream is at an end.
162*6777b538SAndroid Build Coastguard Worker if (GetNextToken() != T_END_OF_INPUT) {
163*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNEXPECTED_DATA_AFTER_ROOT, 0);
164*6777b538SAndroid Build Coastguard Worker return std::nullopt;
165*6777b538SAndroid Build Coastguard Worker }
166*6777b538SAndroid Build Coastguard Worker
167*6777b538SAndroid Build Coastguard Worker return root;
168*6777b538SAndroid Build Coastguard Worker }
169*6777b538SAndroid Build Coastguard Worker
error_code() const170*6777b538SAndroid Build Coastguard Worker JSONParser::JsonParseError JSONParser::error_code() const {
171*6777b538SAndroid Build Coastguard Worker return error_code_;
172*6777b538SAndroid Build Coastguard Worker }
173*6777b538SAndroid Build Coastguard Worker
GetErrorMessage() const174*6777b538SAndroid Build Coastguard Worker std::string JSONParser::GetErrorMessage() const {
175*6777b538SAndroid Build Coastguard Worker return FormatErrorMessage(error_line_, error_column_,
176*6777b538SAndroid Build Coastguard Worker ErrorCodeToString(error_code_));
177*6777b538SAndroid Build Coastguard Worker }
178*6777b538SAndroid Build Coastguard Worker
error_line() const179*6777b538SAndroid Build Coastguard Worker int JSONParser::error_line() const {
180*6777b538SAndroid Build Coastguard Worker return error_line_;
181*6777b538SAndroid Build Coastguard Worker }
182*6777b538SAndroid Build Coastguard Worker
error_column() const183*6777b538SAndroid Build Coastguard Worker int JSONParser::error_column() const {
184*6777b538SAndroid Build Coastguard Worker return error_column_;
185*6777b538SAndroid Build Coastguard Worker }
186*6777b538SAndroid Build Coastguard Worker
187*6777b538SAndroid Build Coastguard Worker // StringBuilder ///////////////////////////////////////////////////////////////
188*6777b538SAndroid Build Coastguard Worker
StringBuilder()189*6777b538SAndroid Build Coastguard Worker JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
190*6777b538SAndroid Build Coastguard Worker
StringBuilder(const char * pos)191*6777b538SAndroid Build Coastguard Worker JSONParser::StringBuilder::StringBuilder(const char* pos)
192*6777b538SAndroid Build Coastguard Worker : pos_(pos), length_(0) {}
193*6777b538SAndroid Build Coastguard Worker
194*6777b538SAndroid Build Coastguard Worker JSONParser::StringBuilder::~StringBuilder() = default;
195*6777b538SAndroid Build Coastguard Worker
196*6777b538SAndroid Build Coastguard Worker JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
197*6777b538SAndroid Build Coastguard Worker StringBuilder&& other) = default;
198*6777b538SAndroid Build Coastguard Worker
Append(base_icu::UChar32 point)199*6777b538SAndroid Build Coastguard Worker void JSONParser::StringBuilder::Append(base_icu::UChar32 point) {
200*6777b538SAndroid Build Coastguard Worker DCHECK(IsValidCodepoint(point));
201*6777b538SAndroid Build Coastguard Worker
202*6777b538SAndroid Build Coastguard Worker if (point < kExtendedASCIIStart) {
203*6777b538SAndroid Build Coastguard Worker if (!string_) {
204*6777b538SAndroid Build Coastguard Worker DCHECK_EQ(static_cast<char>(point), pos_[length_]);
205*6777b538SAndroid Build Coastguard Worker ++length_;
206*6777b538SAndroid Build Coastguard Worker } else {
207*6777b538SAndroid Build Coastguard Worker string_->push_back(static_cast<char>(point));
208*6777b538SAndroid Build Coastguard Worker }
209*6777b538SAndroid Build Coastguard Worker } else {
210*6777b538SAndroid Build Coastguard Worker Convert();
211*6777b538SAndroid Build Coastguard Worker if (UNLIKELY(point == kUnicodeReplacementPoint)) {
212*6777b538SAndroid Build Coastguard Worker string_->append(kUnicodeReplacementString);
213*6777b538SAndroid Build Coastguard Worker } else {
214*6777b538SAndroid Build Coastguard Worker WriteUnicodeCharacter(point, &*string_);
215*6777b538SAndroid Build Coastguard Worker }
216*6777b538SAndroid Build Coastguard Worker }
217*6777b538SAndroid Build Coastguard Worker }
218*6777b538SAndroid Build Coastguard Worker
Convert()219*6777b538SAndroid Build Coastguard Worker void JSONParser::StringBuilder::Convert() {
220*6777b538SAndroid Build Coastguard Worker if (string_)
221*6777b538SAndroid Build Coastguard Worker return;
222*6777b538SAndroid Build Coastguard Worker string_.emplace(pos_, length_);
223*6777b538SAndroid Build Coastguard Worker }
224*6777b538SAndroid Build Coastguard Worker
DestructiveAsString()225*6777b538SAndroid Build Coastguard Worker std::string JSONParser::StringBuilder::DestructiveAsString() {
226*6777b538SAndroid Build Coastguard Worker if (string_)
227*6777b538SAndroid Build Coastguard Worker return std::move(*string_);
228*6777b538SAndroid Build Coastguard Worker return std::string(pos_, length_);
229*6777b538SAndroid Build Coastguard Worker }
230*6777b538SAndroid Build Coastguard Worker
231*6777b538SAndroid Build Coastguard Worker // JSONParser private //////////////////////////////////////////////////////////
232*6777b538SAndroid Build Coastguard Worker
PeekChars(size_t count)233*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> JSONParser::PeekChars(size_t count) {
234*6777b538SAndroid Build Coastguard Worker if (index_ + count > input_.length())
235*6777b538SAndroid Build Coastguard Worker return std::nullopt;
236*6777b538SAndroid Build Coastguard Worker // Using StringPiece::substr() is significantly slower (according to
237*6777b538SAndroid Build Coastguard Worker // base_perftests) than constructing a substring manually.
238*6777b538SAndroid Build Coastguard Worker return std::string_view(input_.data() + index_, count);
239*6777b538SAndroid Build Coastguard Worker }
240*6777b538SAndroid Build Coastguard Worker
PeekChar()241*6777b538SAndroid Build Coastguard Worker std::optional<char> JSONParser::PeekChar() {
242*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> chars = PeekChars(1);
243*6777b538SAndroid Build Coastguard Worker if (chars)
244*6777b538SAndroid Build Coastguard Worker return (*chars)[0];
245*6777b538SAndroid Build Coastguard Worker return std::nullopt;
246*6777b538SAndroid Build Coastguard Worker }
247*6777b538SAndroid Build Coastguard Worker
ConsumeChars(size_t count)248*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> JSONParser::ConsumeChars(size_t count) {
249*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> chars = PeekChars(count);
250*6777b538SAndroid Build Coastguard Worker if (chars)
251*6777b538SAndroid Build Coastguard Worker index_ += count;
252*6777b538SAndroid Build Coastguard Worker return chars;
253*6777b538SAndroid Build Coastguard Worker }
254*6777b538SAndroid Build Coastguard Worker
ConsumeChar()255*6777b538SAndroid Build Coastguard Worker std::optional<char> JSONParser::ConsumeChar() {
256*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> chars = ConsumeChars(1);
257*6777b538SAndroid Build Coastguard Worker if (chars)
258*6777b538SAndroid Build Coastguard Worker return (*chars)[0];
259*6777b538SAndroid Build Coastguard Worker return std::nullopt;
260*6777b538SAndroid Build Coastguard Worker }
261*6777b538SAndroid Build Coastguard Worker
pos()262*6777b538SAndroid Build Coastguard Worker const char* JSONParser::pos() {
263*6777b538SAndroid Build Coastguard Worker CHECK_LE(index_, input_.length());
264*6777b538SAndroid Build Coastguard Worker return input_.data() + index_;
265*6777b538SAndroid Build Coastguard Worker }
266*6777b538SAndroid Build Coastguard Worker
GetNextToken()267*6777b538SAndroid Build Coastguard Worker JSONParser::Token JSONParser::GetNextToken() {
268*6777b538SAndroid Build Coastguard Worker EatWhitespaceAndComments();
269*6777b538SAndroid Build Coastguard Worker
270*6777b538SAndroid Build Coastguard Worker std::optional<char> c = PeekChar();
271*6777b538SAndroid Build Coastguard Worker if (!c)
272*6777b538SAndroid Build Coastguard Worker return T_END_OF_INPUT;
273*6777b538SAndroid Build Coastguard Worker
274*6777b538SAndroid Build Coastguard Worker switch (*c) {
275*6777b538SAndroid Build Coastguard Worker case '{':
276*6777b538SAndroid Build Coastguard Worker return T_OBJECT_BEGIN;
277*6777b538SAndroid Build Coastguard Worker case '}':
278*6777b538SAndroid Build Coastguard Worker return T_OBJECT_END;
279*6777b538SAndroid Build Coastguard Worker case '[':
280*6777b538SAndroid Build Coastguard Worker return T_ARRAY_BEGIN;
281*6777b538SAndroid Build Coastguard Worker case ']':
282*6777b538SAndroid Build Coastguard Worker return T_ARRAY_END;
283*6777b538SAndroid Build Coastguard Worker case '"':
284*6777b538SAndroid Build Coastguard Worker return T_STRING;
285*6777b538SAndroid Build Coastguard Worker case '0':
286*6777b538SAndroid Build Coastguard Worker case '1':
287*6777b538SAndroid Build Coastguard Worker case '2':
288*6777b538SAndroid Build Coastguard Worker case '3':
289*6777b538SAndroid Build Coastguard Worker case '4':
290*6777b538SAndroid Build Coastguard Worker case '5':
291*6777b538SAndroid Build Coastguard Worker case '6':
292*6777b538SAndroid Build Coastguard Worker case '7':
293*6777b538SAndroid Build Coastguard Worker case '8':
294*6777b538SAndroid Build Coastguard Worker case '9':
295*6777b538SAndroid Build Coastguard Worker case '-':
296*6777b538SAndroid Build Coastguard Worker return T_NUMBER;
297*6777b538SAndroid Build Coastguard Worker case 't':
298*6777b538SAndroid Build Coastguard Worker return T_BOOL_TRUE;
299*6777b538SAndroid Build Coastguard Worker case 'f':
300*6777b538SAndroid Build Coastguard Worker return T_BOOL_FALSE;
301*6777b538SAndroid Build Coastguard Worker case 'n':
302*6777b538SAndroid Build Coastguard Worker return T_NULL;
303*6777b538SAndroid Build Coastguard Worker case ',':
304*6777b538SAndroid Build Coastguard Worker return T_LIST_SEPARATOR;
305*6777b538SAndroid Build Coastguard Worker case ':':
306*6777b538SAndroid Build Coastguard Worker return T_OBJECT_PAIR_SEPARATOR;
307*6777b538SAndroid Build Coastguard Worker default:
308*6777b538SAndroid Build Coastguard Worker return T_INVALID_TOKEN;
309*6777b538SAndroid Build Coastguard Worker }
310*6777b538SAndroid Build Coastguard Worker }
311*6777b538SAndroid Build Coastguard Worker
EatWhitespaceAndComments()312*6777b538SAndroid Build Coastguard Worker void JSONParser::EatWhitespaceAndComments() {
313*6777b538SAndroid Build Coastguard Worker while (std::optional<char> c = PeekChar()) {
314*6777b538SAndroid Build Coastguard Worker switch (*c) {
315*6777b538SAndroid Build Coastguard Worker case '\r':
316*6777b538SAndroid Build Coastguard Worker case '\n':
317*6777b538SAndroid Build Coastguard Worker index_last_line_ = index_;
318*6777b538SAndroid Build Coastguard Worker // Don't increment line_number_ twice for "\r\n".
319*6777b538SAndroid Build Coastguard Worker if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
320*6777b538SAndroid Build Coastguard Worker ++line_number_;
321*6777b538SAndroid Build Coastguard Worker }
322*6777b538SAndroid Build Coastguard Worker [[fallthrough]];
323*6777b538SAndroid Build Coastguard Worker case ' ':
324*6777b538SAndroid Build Coastguard Worker case '\t':
325*6777b538SAndroid Build Coastguard Worker ConsumeChar();
326*6777b538SAndroid Build Coastguard Worker break;
327*6777b538SAndroid Build Coastguard Worker case '/':
328*6777b538SAndroid Build Coastguard Worker if (!EatComment())
329*6777b538SAndroid Build Coastguard Worker return;
330*6777b538SAndroid Build Coastguard Worker break;
331*6777b538SAndroid Build Coastguard Worker default:
332*6777b538SAndroid Build Coastguard Worker return;
333*6777b538SAndroid Build Coastguard Worker }
334*6777b538SAndroid Build Coastguard Worker }
335*6777b538SAndroid Build Coastguard Worker }
336*6777b538SAndroid Build Coastguard Worker
EatComment()337*6777b538SAndroid Build Coastguard Worker bool JSONParser::EatComment() {
338*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> comment_start = PeekChars(2);
339*6777b538SAndroid Build Coastguard Worker if (!comment_start)
340*6777b538SAndroid Build Coastguard Worker return false;
341*6777b538SAndroid Build Coastguard Worker
342*6777b538SAndroid Build Coastguard Worker const bool comments_allowed = options_ & JSON_ALLOW_COMMENTS;
343*6777b538SAndroid Build Coastguard Worker
344*6777b538SAndroid Build Coastguard Worker if (comment_start == "//") {
345*6777b538SAndroid Build Coastguard Worker UmaHistogramEnumeration(kExtensionHistogramName,
346*6777b538SAndroid Build Coastguard Worker ChromiumJsonExtension::kCppComment);
347*6777b538SAndroid Build Coastguard Worker if (!comments_allowed) {
348*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNEXPECTED_TOKEN, 0);
349*6777b538SAndroid Build Coastguard Worker return false;
350*6777b538SAndroid Build Coastguard Worker }
351*6777b538SAndroid Build Coastguard Worker
352*6777b538SAndroid Build Coastguard Worker ConsumeChars(2);
353*6777b538SAndroid Build Coastguard Worker // Single line comment, read to newline.
354*6777b538SAndroid Build Coastguard Worker while (std::optional<char> c = PeekChar()) {
355*6777b538SAndroid Build Coastguard Worker if (c == '\n' || c == '\r')
356*6777b538SAndroid Build Coastguard Worker return true;
357*6777b538SAndroid Build Coastguard Worker ConsumeChar();
358*6777b538SAndroid Build Coastguard Worker }
359*6777b538SAndroid Build Coastguard Worker } else if (comment_start == "/*") {
360*6777b538SAndroid Build Coastguard Worker UmaHistogramEnumeration(kExtensionHistogramName,
361*6777b538SAndroid Build Coastguard Worker ChromiumJsonExtension::kCComment);
362*6777b538SAndroid Build Coastguard Worker if (!comments_allowed) {
363*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNEXPECTED_TOKEN, 0);
364*6777b538SAndroid Build Coastguard Worker return false;
365*6777b538SAndroid Build Coastguard Worker }
366*6777b538SAndroid Build Coastguard Worker
367*6777b538SAndroid Build Coastguard Worker ConsumeChars(2);
368*6777b538SAndroid Build Coastguard Worker char previous_char = '\0';
369*6777b538SAndroid Build Coastguard Worker // Block comment, read until end marker.
370*6777b538SAndroid Build Coastguard Worker while (std::optional<char> c = PeekChar()) {
371*6777b538SAndroid Build Coastguard Worker if (previous_char == '*' && c == '/') {
372*6777b538SAndroid Build Coastguard Worker // EatWhitespaceAndComments will inspect pos(), which will still be on
373*6777b538SAndroid Build Coastguard Worker // the last / of the comment, so advance once more (which may also be
374*6777b538SAndroid Build Coastguard Worker // end of input).
375*6777b538SAndroid Build Coastguard Worker ConsumeChar();
376*6777b538SAndroid Build Coastguard Worker return true;
377*6777b538SAndroid Build Coastguard Worker }
378*6777b538SAndroid Build Coastguard Worker previous_char = *ConsumeChar();
379*6777b538SAndroid Build Coastguard Worker }
380*6777b538SAndroid Build Coastguard Worker
381*6777b538SAndroid Build Coastguard Worker // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
382*6777b538SAndroid Build Coastguard Worker }
383*6777b538SAndroid Build Coastguard Worker
384*6777b538SAndroid Build Coastguard Worker return false;
385*6777b538SAndroid Build Coastguard Worker }
386*6777b538SAndroid Build Coastguard Worker
ParseNextToken()387*6777b538SAndroid Build Coastguard Worker std::optional<Value> JSONParser::ParseNextToken() {
388*6777b538SAndroid Build Coastguard Worker return ParseToken(GetNextToken());
389*6777b538SAndroid Build Coastguard Worker }
390*6777b538SAndroid Build Coastguard Worker
ParseToken(Token token)391*6777b538SAndroid Build Coastguard Worker std::optional<Value> JSONParser::ParseToken(Token token) {
392*6777b538SAndroid Build Coastguard Worker switch (token) {
393*6777b538SAndroid Build Coastguard Worker case T_OBJECT_BEGIN:
394*6777b538SAndroid Build Coastguard Worker return ConsumeDictionary();
395*6777b538SAndroid Build Coastguard Worker case T_ARRAY_BEGIN:
396*6777b538SAndroid Build Coastguard Worker return ConsumeList();
397*6777b538SAndroid Build Coastguard Worker case T_STRING:
398*6777b538SAndroid Build Coastguard Worker return ConsumeString();
399*6777b538SAndroid Build Coastguard Worker case T_NUMBER:
400*6777b538SAndroid Build Coastguard Worker return ConsumeNumber();
401*6777b538SAndroid Build Coastguard Worker case T_BOOL_TRUE:
402*6777b538SAndroid Build Coastguard Worker case T_BOOL_FALSE:
403*6777b538SAndroid Build Coastguard Worker case T_NULL:
404*6777b538SAndroid Build Coastguard Worker return ConsumeLiteral();
405*6777b538SAndroid Build Coastguard Worker default:
406*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNEXPECTED_TOKEN, 0);
407*6777b538SAndroid Build Coastguard Worker return std::nullopt;
408*6777b538SAndroid Build Coastguard Worker }
409*6777b538SAndroid Build Coastguard Worker }
410*6777b538SAndroid Build Coastguard Worker
ConsumeDictionary()411*6777b538SAndroid Build Coastguard Worker std::optional<Value> JSONParser::ConsumeDictionary() {
412*6777b538SAndroid Build Coastguard Worker if (ConsumeChar() != '{') {
413*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNEXPECTED_TOKEN, 0);
414*6777b538SAndroid Build Coastguard Worker return std::nullopt;
415*6777b538SAndroid Build Coastguard Worker }
416*6777b538SAndroid Build Coastguard Worker
417*6777b538SAndroid Build Coastguard Worker StackMarker depth_check(max_depth_, &stack_depth_);
418*6777b538SAndroid Build Coastguard Worker if (depth_check.IsTooDeep()) {
419*6777b538SAndroid Build Coastguard Worker ReportError(JSON_TOO_MUCH_NESTING, -1);
420*6777b538SAndroid Build Coastguard Worker return std::nullopt;
421*6777b538SAndroid Build Coastguard Worker }
422*6777b538SAndroid Build Coastguard Worker
423*6777b538SAndroid Build Coastguard Worker std::vector<std::pair<std::string, Value>> values;
424*6777b538SAndroid Build Coastguard Worker
425*6777b538SAndroid Build Coastguard Worker Token token = GetNextToken();
426*6777b538SAndroid Build Coastguard Worker while (token != T_OBJECT_END) {
427*6777b538SAndroid Build Coastguard Worker if (token != T_STRING) {
428*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNQUOTED_DICTIONARY_KEY, 0);
429*6777b538SAndroid Build Coastguard Worker return std::nullopt;
430*6777b538SAndroid Build Coastguard Worker }
431*6777b538SAndroid Build Coastguard Worker
432*6777b538SAndroid Build Coastguard Worker // First consume the key.
433*6777b538SAndroid Build Coastguard Worker StringBuilder key;
434*6777b538SAndroid Build Coastguard Worker if (!ConsumeStringRaw(&key)) {
435*6777b538SAndroid Build Coastguard Worker return std::nullopt;
436*6777b538SAndroid Build Coastguard Worker }
437*6777b538SAndroid Build Coastguard Worker
438*6777b538SAndroid Build Coastguard Worker // Read the separator.
439*6777b538SAndroid Build Coastguard Worker token = GetNextToken();
440*6777b538SAndroid Build Coastguard Worker if (token != T_OBJECT_PAIR_SEPARATOR) {
441*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, 0);
442*6777b538SAndroid Build Coastguard Worker return std::nullopt;
443*6777b538SAndroid Build Coastguard Worker }
444*6777b538SAndroid Build Coastguard Worker
445*6777b538SAndroid Build Coastguard Worker // The next token is the value. Ownership transfers to |dict|.
446*6777b538SAndroid Build Coastguard Worker ConsumeChar();
447*6777b538SAndroid Build Coastguard Worker std::optional<Value> value = ParseNextToken();
448*6777b538SAndroid Build Coastguard Worker if (!value) {
449*6777b538SAndroid Build Coastguard Worker // ReportError from deeper level.
450*6777b538SAndroid Build Coastguard Worker return std::nullopt;
451*6777b538SAndroid Build Coastguard Worker }
452*6777b538SAndroid Build Coastguard Worker
453*6777b538SAndroid Build Coastguard Worker values.emplace_back(key.DestructiveAsString(), std::move(*value));
454*6777b538SAndroid Build Coastguard Worker
455*6777b538SAndroid Build Coastguard Worker token = GetNextToken();
456*6777b538SAndroid Build Coastguard Worker if (token == T_LIST_SEPARATOR) {
457*6777b538SAndroid Build Coastguard Worker ConsumeChar();
458*6777b538SAndroid Build Coastguard Worker token = GetNextToken();
459*6777b538SAndroid Build Coastguard Worker if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
460*6777b538SAndroid Build Coastguard Worker ReportError(JSON_TRAILING_COMMA, 0);
461*6777b538SAndroid Build Coastguard Worker return std::nullopt;
462*6777b538SAndroid Build Coastguard Worker }
463*6777b538SAndroid Build Coastguard Worker } else if (token != T_OBJECT_END) {
464*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, 0);
465*6777b538SAndroid Build Coastguard Worker return std::nullopt;
466*6777b538SAndroid Build Coastguard Worker }
467*6777b538SAndroid Build Coastguard Worker }
468*6777b538SAndroid Build Coastguard Worker
469*6777b538SAndroid Build Coastguard Worker ConsumeChar(); // Closing '}'.
470*6777b538SAndroid Build Coastguard Worker // Reverse |dict_storage| to keep the last of elements with the same key in
471*6777b538SAndroid Build Coastguard Worker // the input.
472*6777b538SAndroid Build Coastguard Worker ranges::reverse(values);
473*6777b538SAndroid Build Coastguard Worker return Value(Value::Dict(std::make_move_iterator(values.begin()),
474*6777b538SAndroid Build Coastguard Worker std::make_move_iterator(values.end())));
475*6777b538SAndroid Build Coastguard Worker }
476*6777b538SAndroid Build Coastguard Worker
ConsumeList()477*6777b538SAndroid Build Coastguard Worker std::optional<Value> JSONParser::ConsumeList() {
478*6777b538SAndroid Build Coastguard Worker if (ConsumeChar() != '[') {
479*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNEXPECTED_TOKEN, 0);
480*6777b538SAndroid Build Coastguard Worker return std::nullopt;
481*6777b538SAndroid Build Coastguard Worker }
482*6777b538SAndroid Build Coastguard Worker
483*6777b538SAndroid Build Coastguard Worker StackMarker depth_check(max_depth_, &stack_depth_);
484*6777b538SAndroid Build Coastguard Worker if (depth_check.IsTooDeep()) {
485*6777b538SAndroid Build Coastguard Worker ReportError(JSON_TOO_MUCH_NESTING, -1);
486*6777b538SAndroid Build Coastguard Worker return std::nullopt;
487*6777b538SAndroid Build Coastguard Worker }
488*6777b538SAndroid Build Coastguard Worker
489*6777b538SAndroid Build Coastguard Worker Value::List list;
490*6777b538SAndroid Build Coastguard Worker
491*6777b538SAndroid Build Coastguard Worker Token token = GetNextToken();
492*6777b538SAndroid Build Coastguard Worker while (token != T_ARRAY_END) {
493*6777b538SAndroid Build Coastguard Worker std::optional<Value> item = ParseToken(token);
494*6777b538SAndroid Build Coastguard Worker if (!item) {
495*6777b538SAndroid Build Coastguard Worker // ReportError from deeper level.
496*6777b538SAndroid Build Coastguard Worker return std::nullopt;
497*6777b538SAndroid Build Coastguard Worker }
498*6777b538SAndroid Build Coastguard Worker
499*6777b538SAndroid Build Coastguard Worker list.Append(std::move(*item));
500*6777b538SAndroid Build Coastguard Worker
501*6777b538SAndroid Build Coastguard Worker token = GetNextToken();
502*6777b538SAndroid Build Coastguard Worker if (token == T_LIST_SEPARATOR) {
503*6777b538SAndroid Build Coastguard Worker ConsumeChar();
504*6777b538SAndroid Build Coastguard Worker token = GetNextToken();
505*6777b538SAndroid Build Coastguard Worker if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
506*6777b538SAndroid Build Coastguard Worker ReportError(JSON_TRAILING_COMMA, 0);
507*6777b538SAndroid Build Coastguard Worker return std::nullopt;
508*6777b538SAndroid Build Coastguard Worker }
509*6777b538SAndroid Build Coastguard Worker } else if (token != T_ARRAY_END) {
510*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, 0);
511*6777b538SAndroid Build Coastguard Worker return std::nullopt;
512*6777b538SAndroid Build Coastguard Worker }
513*6777b538SAndroid Build Coastguard Worker }
514*6777b538SAndroid Build Coastguard Worker
515*6777b538SAndroid Build Coastguard Worker ConsumeChar(); // Closing ']'.
516*6777b538SAndroid Build Coastguard Worker
517*6777b538SAndroid Build Coastguard Worker return Value(std::move(list));
518*6777b538SAndroid Build Coastguard Worker }
519*6777b538SAndroid Build Coastguard Worker
ConsumeString()520*6777b538SAndroid Build Coastguard Worker std::optional<Value> JSONParser::ConsumeString() {
521*6777b538SAndroid Build Coastguard Worker StringBuilder string;
522*6777b538SAndroid Build Coastguard Worker if (!ConsumeStringRaw(&string))
523*6777b538SAndroid Build Coastguard Worker return std::nullopt;
524*6777b538SAndroid Build Coastguard Worker return Value(string.DestructiveAsString());
525*6777b538SAndroid Build Coastguard Worker }
526*6777b538SAndroid Build Coastguard Worker
ConsumeStringRaw(StringBuilder * out)527*6777b538SAndroid Build Coastguard Worker bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
528*6777b538SAndroid Build Coastguard Worker if (ConsumeChar() != '"') {
529*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNEXPECTED_TOKEN, 0);
530*6777b538SAndroid Build Coastguard Worker return false;
531*6777b538SAndroid Build Coastguard Worker }
532*6777b538SAndroid Build Coastguard Worker
533*6777b538SAndroid Build Coastguard Worker // StringBuilder will internally build a std::string_view unless a UTF-16
534*6777b538SAndroid Build Coastguard Worker // conversion occurs, at which point it will perform a copy into a
535*6777b538SAndroid Build Coastguard Worker // std::string.
536*6777b538SAndroid Build Coastguard Worker StringBuilder string(pos());
537*6777b538SAndroid Build Coastguard Worker
538*6777b538SAndroid Build Coastguard Worker while (std::optional<char> c = PeekChar()) {
539*6777b538SAndroid Build Coastguard Worker base_icu::UChar32 next_char = 0;
540*6777b538SAndroid Build Coastguard Worker if (static_cast<unsigned char>(*c) < kExtendedASCIIStart) {
541*6777b538SAndroid Build Coastguard Worker // Fast path for ASCII.
542*6777b538SAndroid Build Coastguard Worker next_char = *c;
543*6777b538SAndroid Build Coastguard Worker } else if (!ReadUnicodeCharacter(input_.data(), input_.length(), &index_,
544*6777b538SAndroid Build Coastguard Worker &next_char) ||
545*6777b538SAndroid Build Coastguard Worker !IsValidCodepoint(next_char)) {
546*6777b538SAndroid Build Coastguard Worker if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
547*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNSUPPORTED_ENCODING, 0);
548*6777b538SAndroid Build Coastguard Worker return false;
549*6777b538SAndroid Build Coastguard Worker }
550*6777b538SAndroid Build Coastguard Worker ConsumeChar();
551*6777b538SAndroid Build Coastguard Worker string.Append(kUnicodeReplacementPoint);
552*6777b538SAndroid Build Coastguard Worker continue;
553*6777b538SAndroid Build Coastguard Worker }
554*6777b538SAndroid Build Coastguard Worker
555*6777b538SAndroid Build Coastguard Worker if (next_char == '"') {
556*6777b538SAndroid Build Coastguard Worker ConsumeChar();
557*6777b538SAndroid Build Coastguard Worker *out = std::move(string);
558*6777b538SAndroid Build Coastguard Worker return true;
559*6777b538SAndroid Build Coastguard Worker }
560*6777b538SAndroid Build Coastguard Worker if (next_char != '\\') {
561*6777b538SAndroid Build Coastguard Worker // Per Section 7, "All Unicode characters may be placed within the
562*6777b538SAndroid Build Coastguard Worker // quotation marks, except for the characters that MUST be escaped:
563*6777b538SAndroid Build Coastguard Worker // quotation mark, reverse solidus, and the control characters (U+0000
564*6777b538SAndroid Build Coastguard Worker // through U+001F)".
565*6777b538SAndroid Build Coastguard Worker if (next_char == '\n' || next_char == '\r') {
566*6777b538SAndroid Build Coastguard Worker UmaHistogramEnumeration(kExtensionHistogramName,
567*6777b538SAndroid Build Coastguard Worker ChromiumJsonExtension::kNewlineInString);
568*6777b538SAndroid Build Coastguard Worker if (!(options_ &
569*6777b538SAndroid Build Coastguard Worker (JSON_ALLOW_NEWLINES_IN_STRINGS | JSON_ALLOW_CONTROL_CHARS))) {
570*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNSUPPORTED_ENCODING, -1);
571*6777b538SAndroid Build Coastguard Worker return false;
572*6777b538SAndroid Build Coastguard Worker }
573*6777b538SAndroid Build Coastguard Worker } else if (next_char <= 0x1F) {
574*6777b538SAndroid Build Coastguard Worker UmaHistogramEnumeration(kExtensionHistogramName,
575*6777b538SAndroid Build Coastguard Worker ChromiumJsonExtension::kControlCharacter);
576*6777b538SAndroid Build Coastguard Worker if (!(options_ & JSON_ALLOW_CONTROL_CHARS)) {
577*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNSUPPORTED_ENCODING, -1);
578*6777b538SAndroid Build Coastguard Worker return false;
579*6777b538SAndroid Build Coastguard Worker }
580*6777b538SAndroid Build Coastguard Worker }
581*6777b538SAndroid Build Coastguard Worker
582*6777b538SAndroid Build Coastguard Worker // If this character is not an escape sequence, track any line breaks and
583*6777b538SAndroid Build Coastguard Worker // copy next_char to the StringBuilder. The JSON spec forbids unescaped
584*6777b538SAndroid Build Coastguard Worker // ASCII control characters within a string, including '\r' and '\n', but
585*6777b538SAndroid Build Coastguard Worker // this implementation is more lenient.
586*6777b538SAndroid Build Coastguard Worker if ((next_char == '\r') || (next_char == '\n')) {
587*6777b538SAndroid Build Coastguard Worker index_last_line_ = index_;
588*6777b538SAndroid Build Coastguard Worker // Don't increment line_number_ twice for "\r\n". We are guaranteed
589*6777b538SAndroid Build Coastguard Worker // that (index_ > 0) because we are consuming a string, so we must have
590*6777b538SAndroid Build Coastguard Worker // seen an opening '"' quote character.
591*6777b538SAndroid Build Coastguard Worker if ((next_char == '\r') || (input_[index_ - 1] != '\r')) {
592*6777b538SAndroid Build Coastguard Worker ++line_number_;
593*6777b538SAndroid Build Coastguard Worker }
594*6777b538SAndroid Build Coastguard Worker }
595*6777b538SAndroid Build Coastguard Worker ConsumeChar();
596*6777b538SAndroid Build Coastguard Worker string.Append(next_char);
597*6777b538SAndroid Build Coastguard Worker } else {
598*6777b538SAndroid Build Coastguard Worker // And if it is an escape sequence, the input string will be adjusted
599*6777b538SAndroid Build Coastguard Worker // (either by combining the two characters of an encoded escape sequence,
600*6777b538SAndroid Build Coastguard Worker // or with a UTF conversion), so using std::string_view isn't possible --
601*6777b538SAndroid Build Coastguard Worker // force a conversion.
602*6777b538SAndroid Build Coastguard Worker string.Convert();
603*6777b538SAndroid Build Coastguard Worker
604*6777b538SAndroid Build Coastguard Worker // Read past the escape '\' and ensure there's a character following.
605*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> escape_sequence = ConsumeChars(2);
606*6777b538SAndroid Build Coastguard Worker if (!escape_sequence) {
607*6777b538SAndroid Build Coastguard Worker ReportError(JSON_INVALID_ESCAPE, -1);
608*6777b538SAndroid Build Coastguard Worker return false;
609*6777b538SAndroid Build Coastguard Worker }
610*6777b538SAndroid Build Coastguard Worker
611*6777b538SAndroid Build Coastguard Worker switch ((*escape_sequence)[1]) {
612*6777b538SAndroid Build Coastguard Worker // Allowed esape sequences:
613*6777b538SAndroid Build Coastguard Worker case 'x': { // UTF-8 sequence.
614*6777b538SAndroid Build Coastguard Worker // UTF-8 \x escape sequences are not allowed in the spec, but they
615*6777b538SAndroid Build Coastguard Worker // are supported here for backwards-compatiblity with the old parser.
616*6777b538SAndroid Build Coastguard Worker UmaHistogramEnumeration(kExtensionHistogramName,
617*6777b538SAndroid Build Coastguard Worker ChromiumJsonExtension::kXEscape);
618*6777b538SAndroid Build Coastguard Worker if (!(options_ & JSON_ALLOW_X_ESCAPES)) {
619*6777b538SAndroid Build Coastguard Worker ReportError(JSON_INVALID_ESCAPE, -1);
620*6777b538SAndroid Build Coastguard Worker return false;
621*6777b538SAndroid Build Coastguard Worker }
622*6777b538SAndroid Build Coastguard Worker
623*6777b538SAndroid Build Coastguard Worker escape_sequence = ConsumeChars(2);
624*6777b538SAndroid Build Coastguard Worker if (!escape_sequence) {
625*6777b538SAndroid Build Coastguard Worker ReportError(JSON_INVALID_ESCAPE, -3);
626*6777b538SAndroid Build Coastguard Worker return false;
627*6777b538SAndroid Build Coastguard Worker }
628*6777b538SAndroid Build Coastguard Worker
629*6777b538SAndroid Build Coastguard Worker int hex_digit = 0;
630*6777b538SAndroid Build Coastguard Worker if (!UnprefixedHexStringToInt(*escape_sequence, &hex_digit) ||
631*6777b538SAndroid Build Coastguard Worker !IsValidCharacter(hex_digit)) {
632*6777b538SAndroid Build Coastguard Worker ReportError(JSON_INVALID_ESCAPE, -3);
633*6777b538SAndroid Build Coastguard Worker return false;
634*6777b538SAndroid Build Coastguard Worker }
635*6777b538SAndroid Build Coastguard Worker
636*6777b538SAndroid Build Coastguard Worker string.Append(hex_digit);
637*6777b538SAndroid Build Coastguard Worker break;
638*6777b538SAndroid Build Coastguard Worker }
639*6777b538SAndroid Build Coastguard Worker case 'u': { // UTF-16 sequence.
640*6777b538SAndroid Build Coastguard Worker // UTF units are of the form \uXXXX.
641*6777b538SAndroid Build Coastguard Worker base_icu::UChar32 code_point;
642*6777b538SAndroid Build Coastguard Worker if (!DecodeUTF16(&code_point)) {
643*6777b538SAndroid Build Coastguard Worker ReportError(JSON_INVALID_ESCAPE, -1);
644*6777b538SAndroid Build Coastguard Worker return false;
645*6777b538SAndroid Build Coastguard Worker }
646*6777b538SAndroid Build Coastguard Worker string.Append(code_point);
647*6777b538SAndroid Build Coastguard Worker break;
648*6777b538SAndroid Build Coastguard Worker }
649*6777b538SAndroid Build Coastguard Worker case '"':
650*6777b538SAndroid Build Coastguard Worker string.Append('"');
651*6777b538SAndroid Build Coastguard Worker break;
652*6777b538SAndroid Build Coastguard Worker case '\\':
653*6777b538SAndroid Build Coastguard Worker string.Append('\\');
654*6777b538SAndroid Build Coastguard Worker break;
655*6777b538SAndroid Build Coastguard Worker case '/':
656*6777b538SAndroid Build Coastguard Worker string.Append('/');
657*6777b538SAndroid Build Coastguard Worker break;
658*6777b538SAndroid Build Coastguard Worker case 'b':
659*6777b538SAndroid Build Coastguard Worker string.Append('\b');
660*6777b538SAndroid Build Coastguard Worker break;
661*6777b538SAndroid Build Coastguard Worker case 'f':
662*6777b538SAndroid Build Coastguard Worker string.Append('\f');
663*6777b538SAndroid Build Coastguard Worker break;
664*6777b538SAndroid Build Coastguard Worker case 'n':
665*6777b538SAndroid Build Coastguard Worker string.Append('\n');
666*6777b538SAndroid Build Coastguard Worker break;
667*6777b538SAndroid Build Coastguard Worker case 'r':
668*6777b538SAndroid Build Coastguard Worker string.Append('\r');
669*6777b538SAndroid Build Coastguard Worker break;
670*6777b538SAndroid Build Coastguard Worker case 't':
671*6777b538SAndroid Build Coastguard Worker string.Append('\t');
672*6777b538SAndroid Build Coastguard Worker break;
673*6777b538SAndroid Build Coastguard Worker case 'v': // Not listed as valid escape sequence in the RFC.
674*6777b538SAndroid Build Coastguard Worker UmaHistogramEnumeration(kExtensionHistogramName,
675*6777b538SAndroid Build Coastguard Worker ChromiumJsonExtension::kVerticalTabEscape);
676*6777b538SAndroid Build Coastguard Worker if (!(options_ & JSON_ALLOW_VERT_TAB)) {
677*6777b538SAndroid Build Coastguard Worker ReportError(JSON_INVALID_ESCAPE, -1);
678*6777b538SAndroid Build Coastguard Worker return false;
679*6777b538SAndroid Build Coastguard Worker }
680*6777b538SAndroid Build Coastguard Worker string.Append('\v');
681*6777b538SAndroid Build Coastguard Worker break;
682*6777b538SAndroid Build Coastguard Worker // All other escape squences are illegal.
683*6777b538SAndroid Build Coastguard Worker default:
684*6777b538SAndroid Build Coastguard Worker ReportError(JSON_INVALID_ESCAPE, -1);
685*6777b538SAndroid Build Coastguard Worker return false;
686*6777b538SAndroid Build Coastguard Worker }
687*6777b538SAndroid Build Coastguard Worker }
688*6777b538SAndroid Build Coastguard Worker }
689*6777b538SAndroid Build Coastguard Worker
690*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, -1);
691*6777b538SAndroid Build Coastguard Worker return false;
692*6777b538SAndroid Build Coastguard Worker }
693*6777b538SAndroid Build Coastguard Worker
694*6777b538SAndroid Build Coastguard Worker // Entry is at the first X in \uXXXX.
DecodeUTF16(base_icu::UChar32 * out_code_point)695*6777b538SAndroid Build Coastguard Worker bool JSONParser::DecodeUTF16(base_icu::UChar32* out_code_point) {
696*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> escape_sequence = ConsumeChars(4);
697*6777b538SAndroid Build Coastguard Worker if (!escape_sequence)
698*6777b538SAndroid Build Coastguard Worker return false;
699*6777b538SAndroid Build Coastguard Worker
700*6777b538SAndroid Build Coastguard Worker // Consume the UTF-16 code unit, which may be a high surrogate.
701*6777b538SAndroid Build Coastguard Worker int code_unit16_high = 0;
702*6777b538SAndroid Build Coastguard Worker if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_high))
703*6777b538SAndroid Build Coastguard Worker return false;
704*6777b538SAndroid Build Coastguard Worker
705*6777b538SAndroid Build Coastguard Worker // If this is a high surrogate, consume the next code unit to get the
706*6777b538SAndroid Build Coastguard Worker // low surrogate.
707*6777b538SAndroid Build Coastguard Worker if (CBU16_IS_SURROGATE(code_unit16_high)) {
708*6777b538SAndroid Build Coastguard Worker // Make sure this is the high surrogate.
709*6777b538SAndroid Build Coastguard Worker if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
710*6777b538SAndroid Build Coastguard Worker if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
711*6777b538SAndroid Build Coastguard Worker return false;
712*6777b538SAndroid Build Coastguard Worker *out_code_point = kUnicodeReplacementPoint;
713*6777b538SAndroid Build Coastguard Worker return true;
714*6777b538SAndroid Build Coastguard Worker }
715*6777b538SAndroid Build Coastguard Worker
716*6777b538SAndroid Build Coastguard Worker // Make sure that the token has more characters to consume the
717*6777b538SAndroid Build Coastguard Worker // lower surrogate.
718*6777b538SAndroid Build Coastguard Worker if (!ConsumeIfMatch("\\u")) {
719*6777b538SAndroid Build Coastguard Worker if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
720*6777b538SAndroid Build Coastguard Worker return false;
721*6777b538SAndroid Build Coastguard Worker *out_code_point = kUnicodeReplacementPoint;
722*6777b538SAndroid Build Coastguard Worker return true;
723*6777b538SAndroid Build Coastguard Worker }
724*6777b538SAndroid Build Coastguard Worker
725*6777b538SAndroid Build Coastguard Worker escape_sequence = ConsumeChars(4);
726*6777b538SAndroid Build Coastguard Worker if (!escape_sequence)
727*6777b538SAndroid Build Coastguard Worker return false;
728*6777b538SAndroid Build Coastguard Worker
729*6777b538SAndroid Build Coastguard Worker int code_unit16_low = 0;
730*6777b538SAndroid Build Coastguard Worker if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low))
731*6777b538SAndroid Build Coastguard Worker return false;
732*6777b538SAndroid Build Coastguard Worker
733*6777b538SAndroid Build Coastguard Worker if (!CBU16_IS_TRAIL(code_unit16_low)) {
734*6777b538SAndroid Build Coastguard Worker if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
735*6777b538SAndroid Build Coastguard Worker return false;
736*6777b538SAndroid Build Coastguard Worker *out_code_point = kUnicodeReplacementPoint;
737*6777b538SAndroid Build Coastguard Worker return true;
738*6777b538SAndroid Build Coastguard Worker }
739*6777b538SAndroid Build Coastguard Worker
740*6777b538SAndroid Build Coastguard Worker base_icu::UChar32 code_point =
741*6777b538SAndroid Build Coastguard Worker CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
742*6777b538SAndroid Build Coastguard Worker
743*6777b538SAndroid Build Coastguard Worker *out_code_point = code_point;
744*6777b538SAndroid Build Coastguard Worker } else {
745*6777b538SAndroid Build Coastguard Worker // Not a surrogate.
746*6777b538SAndroid Build Coastguard Worker DCHECK(CBU16_IS_SINGLE(code_unit16_high));
747*6777b538SAndroid Build Coastguard Worker
748*6777b538SAndroid Build Coastguard Worker *out_code_point = code_unit16_high;
749*6777b538SAndroid Build Coastguard Worker }
750*6777b538SAndroid Build Coastguard Worker
751*6777b538SAndroid Build Coastguard Worker return true;
752*6777b538SAndroid Build Coastguard Worker }
753*6777b538SAndroid Build Coastguard Worker
ConsumeNumber()754*6777b538SAndroid Build Coastguard Worker std::optional<Value> JSONParser::ConsumeNumber() {
755*6777b538SAndroid Build Coastguard Worker const char* num_start = pos();
756*6777b538SAndroid Build Coastguard Worker const size_t start_index = index_;
757*6777b538SAndroid Build Coastguard Worker size_t end_index = start_index;
758*6777b538SAndroid Build Coastguard Worker
759*6777b538SAndroid Build Coastguard Worker if (PeekChar() == '-')
760*6777b538SAndroid Build Coastguard Worker ConsumeChar();
761*6777b538SAndroid Build Coastguard Worker
762*6777b538SAndroid Build Coastguard Worker if (!ReadInt(false)) {
763*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, 0);
764*6777b538SAndroid Build Coastguard Worker return std::nullopt;
765*6777b538SAndroid Build Coastguard Worker }
766*6777b538SAndroid Build Coastguard Worker end_index = index_;
767*6777b538SAndroid Build Coastguard Worker
768*6777b538SAndroid Build Coastguard Worker // The optional fraction part.
769*6777b538SAndroid Build Coastguard Worker if (PeekChar() == '.') {
770*6777b538SAndroid Build Coastguard Worker ConsumeChar();
771*6777b538SAndroid Build Coastguard Worker if (!ReadInt(true)) {
772*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, 0);
773*6777b538SAndroid Build Coastguard Worker return std::nullopt;
774*6777b538SAndroid Build Coastguard Worker }
775*6777b538SAndroid Build Coastguard Worker end_index = index_;
776*6777b538SAndroid Build Coastguard Worker }
777*6777b538SAndroid Build Coastguard Worker
778*6777b538SAndroid Build Coastguard Worker // Optional exponent part.
779*6777b538SAndroid Build Coastguard Worker std::optional<char> c = PeekChar();
780*6777b538SAndroid Build Coastguard Worker if (c == 'e' || c == 'E') {
781*6777b538SAndroid Build Coastguard Worker ConsumeChar();
782*6777b538SAndroid Build Coastguard Worker if (PeekChar() == '-' || PeekChar() == '+') {
783*6777b538SAndroid Build Coastguard Worker ConsumeChar();
784*6777b538SAndroid Build Coastguard Worker }
785*6777b538SAndroid Build Coastguard Worker if (!ReadInt(true)) {
786*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, 0);
787*6777b538SAndroid Build Coastguard Worker return std::nullopt;
788*6777b538SAndroid Build Coastguard Worker }
789*6777b538SAndroid Build Coastguard Worker end_index = index_;
790*6777b538SAndroid Build Coastguard Worker }
791*6777b538SAndroid Build Coastguard Worker
792*6777b538SAndroid Build Coastguard Worker // ReadInt is greedy because numbers have no easily detectable sentinel,
793*6777b538SAndroid Build Coastguard Worker // so save off where the parser should be on exit (see Consume invariant at
794*6777b538SAndroid Build Coastguard Worker // the top of the header), then make sure the next token is one which is
795*6777b538SAndroid Build Coastguard Worker // valid.
796*6777b538SAndroid Build Coastguard Worker size_t exit_index = index_;
797*6777b538SAndroid Build Coastguard Worker
798*6777b538SAndroid Build Coastguard Worker switch (GetNextToken()) {
799*6777b538SAndroid Build Coastguard Worker case T_OBJECT_END:
800*6777b538SAndroid Build Coastguard Worker case T_ARRAY_END:
801*6777b538SAndroid Build Coastguard Worker case T_LIST_SEPARATOR:
802*6777b538SAndroid Build Coastguard Worker case T_END_OF_INPUT:
803*6777b538SAndroid Build Coastguard Worker break;
804*6777b538SAndroid Build Coastguard Worker default:
805*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, 0);
806*6777b538SAndroid Build Coastguard Worker return std::nullopt;
807*6777b538SAndroid Build Coastguard Worker }
808*6777b538SAndroid Build Coastguard Worker
809*6777b538SAndroid Build Coastguard Worker index_ = exit_index;
810*6777b538SAndroid Build Coastguard Worker
811*6777b538SAndroid Build Coastguard Worker std::string_view num_string(num_start, end_index - start_index);
812*6777b538SAndroid Build Coastguard Worker
813*6777b538SAndroid Build Coastguard Worker int num_int;
814*6777b538SAndroid Build Coastguard Worker if (StringToInt(num_string, &num_int)) {
815*6777b538SAndroid Build Coastguard Worker // StringToInt will treat `-0` as zero, losing the significance of the
816*6777b538SAndroid Build Coastguard Worker // negation.
817*6777b538SAndroid Build Coastguard Worker if (num_int == 0 && num_string.starts_with('-')) {
818*6777b538SAndroid Build Coastguard Worker if (base::FeatureList::IsEnabled(features::kJsonNegativeZero)) {
819*6777b538SAndroid Build Coastguard Worker return Value(-0.0);
820*6777b538SAndroid Build Coastguard Worker }
821*6777b538SAndroid Build Coastguard Worker }
822*6777b538SAndroid Build Coastguard Worker return Value(num_int);
823*6777b538SAndroid Build Coastguard Worker }
824*6777b538SAndroid Build Coastguard Worker
825*6777b538SAndroid Build Coastguard Worker double num_double;
826*6777b538SAndroid Build Coastguard Worker if (StringToDouble(num_string, &num_double) && std::isfinite(num_double)) {
827*6777b538SAndroid Build Coastguard Worker return Value(num_double);
828*6777b538SAndroid Build Coastguard Worker }
829*6777b538SAndroid Build Coastguard Worker
830*6777b538SAndroid Build Coastguard Worker ReportError(JSON_UNREPRESENTABLE_NUMBER, 0);
831*6777b538SAndroid Build Coastguard Worker return std::nullopt;
832*6777b538SAndroid Build Coastguard Worker }
833*6777b538SAndroid Build Coastguard Worker
ReadInt(bool allow_leading_zeros)834*6777b538SAndroid Build Coastguard Worker bool JSONParser::ReadInt(bool allow_leading_zeros) {
835*6777b538SAndroid Build Coastguard Worker size_t len = 0;
836*6777b538SAndroid Build Coastguard Worker char first = 0;
837*6777b538SAndroid Build Coastguard Worker
838*6777b538SAndroid Build Coastguard Worker while (std::optional<char> c = PeekChar()) {
839*6777b538SAndroid Build Coastguard Worker if (!IsAsciiDigit(c))
840*6777b538SAndroid Build Coastguard Worker break;
841*6777b538SAndroid Build Coastguard Worker
842*6777b538SAndroid Build Coastguard Worker if (len == 0)
843*6777b538SAndroid Build Coastguard Worker first = *c;
844*6777b538SAndroid Build Coastguard Worker
845*6777b538SAndroid Build Coastguard Worker ++len;
846*6777b538SAndroid Build Coastguard Worker ConsumeChar();
847*6777b538SAndroid Build Coastguard Worker }
848*6777b538SAndroid Build Coastguard Worker
849*6777b538SAndroid Build Coastguard Worker if (len == 0)
850*6777b538SAndroid Build Coastguard Worker return false;
851*6777b538SAndroid Build Coastguard Worker
852*6777b538SAndroid Build Coastguard Worker if (!allow_leading_zeros && len > 1 && first == '0')
853*6777b538SAndroid Build Coastguard Worker return false;
854*6777b538SAndroid Build Coastguard Worker
855*6777b538SAndroid Build Coastguard Worker return true;
856*6777b538SAndroid Build Coastguard Worker }
857*6777b538SAndroid Build Coastguard Worker
ConsumeLiteral()858*6777b538SAndroid Build Coastguard Worker std::optional<Value> JSONParser::ConsumeLiteral() {
859*6777b538SAndroid Build Coastguard Worker if (ConsumeIfMatch("true"))
860*6777b538SAndroid Build Coastguard Worker return Value(true);
861*6777b538SAndroid Build Coastguard Worker if (ConsumeIfMatch("false"))
862*6777b538SAndroid Build Coastguard Worker return Value(false);
863*6777b538SAndroid Build Coastguard Worker if (ConsumeIfMatch("null"))
864*6777b538SAndroid Build Coastguard Worker return Value(Value::Type::NONE);
865*6777b538SAndroid Build Coastguard Worker ReportError(JSON_SYNTAX_ERROR, 0);
866*6777b538SAndroid Build Coastguard Worker return std::nullopt;
867*6777b538SAndroid Build Coastguard Worker }
868*6777b538SAndroid Build Coastguard Worker
ConsumeIfMatch(std::string_view match)869*6777b538SAndroid Build Coastguard Worker bool JSONParser::ConsumeIfMatch(std::string_view match) {
870*6777b538SAndroid Build Coastguard Worker if (match == PeekChars(match.size())) {
871*6777b538SAndroid Build Coastguard Worker ConsumeChars(match.size());
872*6777b538SAndroid Build Coastguard Worker return true;
873*6777b538SAndroid Build Coastguard Worker }
874*6777b538SAndroid Build Coastguard Worker return false;
875*6777b538SAndroid Build Coastguard Worker }
876*6777b538SAndroid Build Coastguard Worker
ReportError(JsonParseError code,int column_adjust)877*6777b538SAndroid Build Coastguard Worker void JSONParser::ReportError(JsonParseError code, int column_adjust) {
878*6777b538SAndroid Build Coastguard Worker error_code_ = code;
879*6777b538SAndroid Build Coastguard Worker error_line_ = line_number_;
880*6777b538SAndroid Build Coastguard Worker error_column_ = static_cast<int>(index_ - index_last_line_) + column_adjust;
881*6777b538SAndroid Build Coastguard Worker
882*6777b538SAndroid Build Coastguard Worker // For a final blank line ('\n' and then EOF), a negative column_adjust may
883*6777b538SAndroid Build Coastguard Worker // put us below 1, which doesn't really make sense for 1-based columns.
884*6777b538SAndroid Build Coastguard Worker if (error_column_ < 1) {
885*6777b538SAndroid Build Coastguard Worker error_column_ = 1;
886*6777b538SAndroid Build Coastguard Worker }
887*6777b538SAndroid Build Coastguard Worker }
888*6777b538SAndroid Build Coastguard Worker
889*6777b538SAndroid Build Coastguard Worker // static
FormatErrorMessage(int line,int column,const std::string & description)890*6777b538SAndroid Build Coastguard Worker std::string JSONParser::FormatErrorMessage(int line, int column,
891*6777b538SAndroid Build Coastguard Worker const std::string& description) {
892*6777b538SAndroid Build Coastguard Worker if (line || column) {
893*6777b538SAndroid Build Coastguard Worker return StringPrintf("Line: %i, column: %i, %s",
894*6777b538SAndroid Build Coastguard Worker line, column, description.c_str());
895*6777b538SAndroid Build Coastguard Worker }
896*6777b538SAndroid Build Coastguard Worker return description;
897*6777b538SAndroid Build Coastguard Worker }
898*6777b538SAndroid Build Coastguard Worker
899*6777b538SAndroid Build Coastguard Worker } // namespace internal
900*6777b538SAndroid Build Coastguard Worker } // namespace base
901