1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/json/json_parser.h"
6
7 #include <cmath>
8 #include <iterator>
9 #include <string_view>
10 #include <utility>
11 #include <vector>
12
13 #include "base/check_op.h"
14 #include "base/feature_list.h"
15 #include "base/features.h"
16 #include "base/json/json_reader.h"
17 #include "base/metrics/histogram_functions.h"
18 #include "base/notreached.h"
19 #include "base/numerics/safe_conversions.h"
20 #include "base/ranges/algorithm.h"
21 #include "base/strings/string_number_conversions.h"
22 #include "base/strings/string_util.h"
23 #include "base/strings/stringprintf.h"
24 #include "base/strings/utf_string_conversion_utils.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "base/third_party/icu/icu_utf.h"
27
28 namespace base {
29 namespace internal {
30
31 namespace {
32
33 // Values 1000 and above are used by JSONFileValueSerializer::JsonFileError.
34 static_assert(JSONParser::JSON_PARSE_ERROR_COUNT < 1000,
35 "JSONParser error out of bounds");
36
ErrorCodeToString(JSONParser::JsonParseError error_code)37 std::string ErrorCodeToString(JSONParser::JsonParseError error_code) {
38 switch (error_code) {
39 case JSONParser::JSON_NO_ERROR:
40 return std::string();
41 case JSONParser::JSON_SYNTAX_ERROR:
42 return JSONParser::kSyntaxError;
43 case JSONParser::JSON_INVALID_ESCAPE:
44 return JSONParser::kInvalidEscape;
45 case JSONParser::JSON_UNEXPECTED_TOKEN:
46 return JSONParser::kUnexpectedToken;
47 case JSONParser::JSON_TRAILING_COMMA:
48 return JSONParser::kTrailingComma;
49 case JSONParser::JSON_TOO_MUCH_NESTING:
50 return JSONParser::kTooMuchNesting;
51 case JSONParser::JSON_UNEXPECTED_DATA_AFTER_ROOT:
52 return JSONParser::kUnexpectedDataAfterRoot;
53 case JSONParser::JSON_UNSUPPORTED_ENCODING:
54 return JSONParser::kUnsupportedEncoding;
55 case JSONParser::JSON_UNQUOTED_DICTIONARY_KEY:
56 return JSONParser::kUnquotedDictionaryKey;
57 case JSONParser::JSON_UNREPRESENTABLE_NUMBER:
58 return JSONParser::kUnrepresentableNumber;
59 case JSONParser::JSON_PARSE_ERROR_COUNT:
60 break;
61 }
62 NOTREACHED();
63 return std::string();
64 }
65
66 const int32_t kExtendedASCIIStart = 0x80;
67 constexpr base_icu::UChar32 kUnicodeReplacementPoint = 0xFFFD;
68
69 // UnprefixedHexStringToInt acts like |HexStringToInt|, but enforces that the
70 // input consists purely of hex digits. I.e. no "0x" nor "OX" prefix is
71 // permitted.
UnprefixedHexStringToInt(std::string_view input,int * output)72 bool UnprefixedHexStringToInt(std::string_view input, int* output) {
73 for (size_t i = 0; i < input.size(); i++) {
74 if (!IsHexDigit(input[i])) {
75 return false;
76 }
77 }
78 return HexStringToInt(input, output);
79 }
80
81 // These values are persisted to logs. Entries should not be renumbered and
82 // numeric values should never be reused.
83 enum class ChromiumJsonExtension {
84 kCComment,
85 kCppComment,
86 kXEscape,
87 kVerticalTabEscape,
88 kControlCharacter,
89 kNewlineInString,
90 kMaxValue = kNewlineInString,
91 };
92
93 const char kExtensionHistogramName[] =
94 "Security.JSONParser.ChromiumExtensionUsage";
95
96 } // namespace
97
98 // This is U+FFFD.
99 const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
100
101 const char JSONParser::kSyntaxError[] = "Syntax error.";
102 const char JSONParser::kInvalidEscape[] = "Invalid escape sequence.";
103 const char JSONParser::kUnexpectedToken[] = "Unexpected token.";
104 const char JSONParser::kTrailingComma[] = "Trailing comma not allowed.";
105 const char JSONParser::kTooMuchNesting[] = "Too much nesting.";
106 const char JSONParser::kUnexpectedDataAfterRoot[] =
107 "Unexpected data after root element.";
108 const char JSONParser::kUnsupportedEncoding[] =
109 "Unsupported encoding. JSON must be UTF-8.";
110 const char JSONParser::kUnquotedDictionaryKey[] =
111 "Dictionary keys must be quoted.";
112 const char JSONParser::kUnrepresentableNumber[] =
113 "Number cannot be represented.";
114
JSONParser(int options,size_t max_depth)115 JSONParser::JSONParser(int options, size_t max_depth)
116 : options_(options),
117 max_depth_(max_depth),
118 index_(0),
119 stack_depth_(0),
120 line_number_(0),
121 index_last_line_(0),
122 error_code_(JSON_NO_ERROR),
123 error_line_(0),
124 error_column_(0) {
125 CHECK_LE(max_depth, kAbsoluteMaxDepth);
126 }
127
128 JSONParser::~JSONParser() = default;
129
Parse(std::string_view input)130 std::optional<Value> JSONParser::Parse(std::string_view input) {
131 input_ = input;
132 index_ = 0;
133 // Line and column counting is 1-based, but |index_| is 0-based. For example,
134 // if input is "Aaa\nB" then 'A' and 'B' are both in column 1 (at lines 1 and
135 // 2) and have indexes of 0 and 4. We track the line number explicitly (the
136 // |line_number_| field) and the column number implicitly (the difference
137 // between |index_| and |index_last_line_|). In calculating that difference,
138 // |index_last_line_| is the index of the '\r' or '\n', not the index of the
139 // first byte after the '\n'. For the 'B' in "Aaa\nB", its |index_| and
140 // |index_last_line_| would be 4 and 3: 'B' is in column (4 - 3) = 1. We
141 // initialize |index_last_line_| to -1, not 0, since -1 is the (out of range)
142 // index of the imaginary '\n' immediately before the start of the string:
143 // 'A' is in column (0 - -1) = 1.
144 line_number_ = 1;
145 index_last_line_ = static_cast<size_t>(-1);
146
147 error_code_ = JSON_NO_ERROR;
148 error_line_ = 0;
149 error_column_ = 0;
150
151 // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
152 // advance the start position to avoid the ParseNextToken function mis-
153 // treating a Unicode BOM as an invalid character and returning NULL.
154 ConsumeIfMatch("\xEF\xBB\xBF");
155
156 // Parse the first and any nested tokens.
157 std::optional<Value> root(ParseNextToken());
158 if (!root)
159 return std::nullopt;
160
161 // Make sure the input stream is at an end.
162 if (GetNextToken() != T_END_OF_INPUT) {
163 ReportError(JSON_UNEXPECTED_DATA_AFTER_ROOT, 0);
164 return std::nullopt;
165 }
166
167 return root;
168 }
169
error_code() const170 JSONParser::JsonParseError JSONParser::error_code() const {
171 return error_code_;
172 }
173
GetErrorMessage() const174 std::string JSONParser::GetErrorMessage() const {
175 return FormatErrorMessage(error_line_, error_column_,
176 ErrorCodeToString(error_code_));
177 }
178
error_line() const179 int JSONParser::error_line() const {
180 return error_line_;
181 }
182
error_column() const183 int JSONParser::error_column() const {
184 return error_column_;
185 }
186
187 // StringBuilder ///////////////////////////////////////////////////////////////
188
StringBuilder()189 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
190
StringBuilder(const char * pos)191 JSONParser::StringBuilder::StringBuilder(const char* pos)
192 : pos_(pos), length_(0) {}
193
194 JSONParser::StringBuilder::~StringBuilder() = default;
195
196 JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
197 StringBuilder&& other) = default;
198
Append(base_icu::UChar32 point)199 void JSONParser::StringBuilder::Append(base_icu::UChar32 point) {
200 DCHECK(IsValidCodepoint(point));
201
202 if (point < kExtendedASCIIStart) {
203 if (!string_) {
204 DCHECK_EQ(static_cast<char>(point), pos_[length_]);
205 ++length_;
206 } else {
207 string_->push_back(static_cast<char>(point));
208 }
209 } else {
210 Convert();
211 if (UNLIKELY(point == kUnicodeReplacementPoint)) {
212 string_->append(kUnicodeReplacementString);
213 } else {
214 WriteUnicodeCharacter(point, &*string_);
215 }
216 }
217 }
218
Convert()219 void JSONParser::StringBuilder::Convert() {
220 if (string_)
221 return;
222 string_.emplace(pos_, length_);
223 }
224
DestructiveAsString()225 std::string JSONParser::StringBuilder::DestructiveAsString() {
226 if (string_)
227 return std::move(*string_);
228 return std::string(pos_, length_);
229 }
230
231 // JSONParser private //////////////////////////////////////////////////////////
232
PeekChars(size_t count)233 std::optional<std::string_view> JSONParser::PeekChars(size_t count) {
234 if (index_ + count > input_.length())
235 return std::nullopt;
236 // Using StringPiece::substr() is significantly slower (according to
237 // base_perftests) than constructing a substring manually.
238 return std::string_view(input_.data() + index_, count);
239 }
240
PeekChar()241 std::optional<char> JSONParser::PeekChar() {
242 std::optional<std::string_view> chars = PeekChars(1);
243 if (chars)
244 return (*chars)[0];
245 return std::nullopt;
246 }
247
ConsumeChars(size_t count)248 std::optional<std::string_view> JSONParser::ConsumeChars(size_t count) {
249 std::optional<std::string_view> chars = PeekChars(count);
250 if (chars)
251 index_ += count;
252 return chars;
253 }
254
ConsumeChar()255 std::optional<char> JSONParser::ConsumeChar() {
256 std::optional<std::string_view> chars = ConsumeChars(1);
257 if (chars)
258 return (*chars)[0];
259 return std::nullopt;
260 }
261
pos()262 const char* JSONParser::pos() {
263 CHECK_LE(index_, input_.length());
264 return input_.data() + index_;
265 }
266
GetNextToken()267 JSONParser::Token JSONParser::GetNextToken() {
268 EatWhitespaceAndComments();
269
270 std::optional<char> c = PeekChar();
271 if (!c)
272 return T_END_OF_INPUT;
273
274 switch (*c) {
275 case '{':
276 return T_OBJECT_BEGIN;
277 case '}':
278 return T_OBJECT_END;
279 case '[':
280 return T_ARRAY_BEGIN;
281 case ']':
282 return T_ARRAY_END;
283 case '"':
284 return T_STRING;
285 case '0':
286 case '1':
287 case '2':
288 case '3':
289 case '4':
290 case '5':
291 case '6':
292 case '7':
293 case '8':
294 case '9':
295 case '-':
296 return T_NUMBER;
297 case 't':
298 return T_BOOL_TRUE;
299 case 'f':
300 return T_BOOL_FALSE;
301 case 'n':
302 return T_NULL;
303 case ',':
304 return T_LIST_SEPARATOR;
305 case ':':
306 return T_OBJECT_PAIR_SEPARATOR;
307 default:
308 return T_INVALID_TOKEN;
309 }
310 }
311
EatWhitespaceAndComments()312 void JSONParser::EatWhitespaceAndComments() {
313 while (std::optional<char> c = PeekChar()) {
314 switch (*c) {
315 case '\r':
316 case '\n':
317 index_last_line_ = index_;
318 // Don't increment line_number_ twice for "\r\n".
319 if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
320 ++line_number_;
321 }
322 [[fallthrough]];
323 case ' ':
324 case '\t':
325 ConsumeChar();
326 break;
327 case '/':
328 if (!EatComment())
329 return;
330 break;
331 default:
332 return;
333 }
334 }
335 }
336
EatComment()337 bool JSONParser::EatComment() {
338 std::optional<std::string_view> comment_start = PeekChars(2);
339 if (!comment_start)
340 return false;
341
342 const bool comments_allowed = options_ & JSON_ALLOW_COMMENTS;
343
344 if (comment_start == "//") {
345 UmaHistogramEnumeration(kExtensionHistogramName,
346 ChromiumJsonExtension::kCppComment);
347 if (!comments_allowed) {
348 ReportError(JSON_UNEXPECTED_TOKEN, 0);
349 return false;
350 }
351
352 ConsumeChars(2);
353 // Single line comment, read to newline.
354 while (std::optional<char> c = PeekChar()) {
355 if (c == '\n' || c == '\r')
356 return true;
357 ConsumeChar();
358 }
359 } else if (comment_start == "/*") {
360 UmaHistogramEnumeration(kExtensionHistogramName,
361 ChromiumJsonExtension::kCComment);
362 if (!comments_allowed) {
363 ReportError(JSON_UNEXPECTED_TOKEN, 0);
364 return false;
365 }
366
367 ConsumeChars(2);
368 char previous_char = '\0';
369 // Block comment, read until end marker.
370 while (std::optional<char> c = PeekChar()) {
371 if (previous_char == '*' && c == '/') {
372 // EatWhitespaceAndComments will inspect pos(), which will still be on
373 // the last / of the comment, so advance once more (which may also be
374 // end of input).
375 ConsumeChar();
376 return true;
377 }
378 previous_char = *ConsumeChar();
379 }
380
381 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
382 }
383
384 return false;
385 }
386
ParseNextToken()387 std::optional<Value> JSONParser::ParseNextToken() {
388 return ParseToken(GetNextToken());
389 }
390
ParseToken(Token token)391 std::optional<Value> JSONParser::ParseToken(Token token) {
392 switch (token) {
393 case T_OBJECT_BEGIN:
394 return ConsumeDictionary();
395 case T_ARRAY_BEGIN:
396 return ConsumeList();
397 case T_STRING:
398 return ConsumeString();
399 case T_NUMBER:
400 return ConsumeNumber();
401 case T_BOOL_TRUE:
402 case T_BOOL_FALSE:
403 case T_NULL:
404 return ConsumeLiteral();
405 default:
406 ReportError(JSON_UNEXPECTED_TOKEN, 0);
407 return std::nullopt;
408 }
409 }
410
ConsumeDictionary()411 std::optional<Value> JSONParser::ConsumeDictionary() {
412 if (ConsumeChar() != '{') {
413 ReportError(JSON_UNEXPECTED_TOKEN, 0);
414 return std::nullopt;
415 }
416
417 StackMarker depth_check(max_depth_, &stack_depth_);
418 if (depth_check.IsTooDeep()) {
419 ReportError(JSON_TOO_MUCH_NESTING, -1);
420 return std::nullopt;
421 }
422
423 std::vector<std::pair<std::string, Value>> values;
424
425 Token token = GetNextToken();
426 while (token != T_OBJECT_END) {
427 if (token != T_STRING) {
428 ReportError(JSON_UNQUOTED_DICTIONARY_KEY, 0);
429 return std::nullopt;
430 }
431
432 // First consume the key.
433 StringBuilder key;
434 if (!ConsumeStringRaw(&key)) {
435 return std::nullopt;
436 }
437
438 // Read the separator.
439 token = GetNextToken();
440 if (token != T_OBJECT_PAIR_SEPARATOR) {
441 ReportError(JSON_SYNTAX_ERROR, 0);
442 return std::nullopt;
443 }
444
445 // The next token is the value. Ownership transfers to |dict|.
446 ConsumeChar();
447 std::optional<Value> value = ParseNextToken();
448 if (!value) {
449 // ReportError from deeper level.
450 return std::nullopt;
451 }
452
453 values.emplace_back(key.DestructiveAsString(), std::move(*value));
454
455 token = GetNextToken();
456 if (token == T_LIST_SEPARATOR) {
457 ConsumeChar();
458 token = GetNextToken();
459 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
460 ReportError(JSON_TRAILING_COMMA, 0);
461 return std::nullopt;
462 }
463 } else if (token != T_OBJECT_END) {
464 ReportError(JSON_SYNTAX_ERROR, 0);
465 return std::nullopt;
466 }
467 }
468
469 ConsumeChar(); // Closing '}'.
470 // Reverse |dict_storage| to keep the last of elements with the same key in
471 // the input.
472 ranges::reverse(values);
473 return Value(Value::Dict(std::make_move_iterator(values.begin()),
474 std::make_move_iterator(values.end())));
475 }
476
ConsumeList()477 std::optional<Value> JSONParser::ConsumeList() {
478 if (ConsumeChar() != '[') {
479 ReportError(JSON_UNEXPECTED_TOKEN, 0);
480 return std::nullopt;
481 }
482
483 StackMarker depth_check(max_depth_, &stack_depth_);
484 if (depth_check.IsTooDeep()) {
485 ReportError(JSON_TOO_MUCH_NESTING, -1);
486 return std::nullopt;
487 }
488
489 Value::List list;
490
491 Token token = GetNextToken();
492 while (token != T_ARRAY_END) {
493 std::optional<Value> item = ParseToken(token);
494 if (!item) {
495 // ReportError from deeper level.
496 return std::nullopt;
497 }
498
499 list.Append(std::move(*item));
500
501 token = GetNextToken();
502 if (token == T_LIST_SEPARATOR) {
503 ConsumeChar();
504 token = GetNextToken();
505 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
506 ReportError(JSON_TRAILING_COMMA, 0);
507 return std::nullopt;
508 }
509 } else if (token != T_ARRAY_END) {
510 ReportError(JSON_SYNTAX_ERROR, 0);
511 return std::nullopt;
512 }
513 }
514
515 ConsumeChar(); // Closing ']'.
516
517 return Value(std::move(list));
518 }
519
ConsumeString()520 std::optional<Value> JSONParser::ConsumeString() {
521 StringBuilder string;
522 if (!ConsumeStringRaw(&string))
523 return std::nullopt;
524 return Value(string.DestructiveAsString());
525 }
526
ConsumeStringRaw(StringBuilder * out)527 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
528 if (ConsumeChar() != '"') {
529 ReportError(JSON_UNEXPECTED_TOKEN, 0);
530 return false;
531 }
532
533 // StringBuilder will internally build a std::string_view unless a UTF-16
534 // conversion occurs, at which point it will perform a copy into a
535 // std::string.
536 StringBuilder string(pos());
537
538 while (std::optional<char> c = PeekChar()) {
539 base_icu::UChar32 next_char = 0;
540 if (static_cast<unsigned char>(*c) < kExtendedASCIIStart) {
541 // Fast path for ASCII.
542 next_char = *c;
543 } else if (!ReadUnicodeCharacter(input_.data(), input_.length(), &index_,
544 &next_char) ||
545 !IsValidCodepoint(next_char)) {
546 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
547 ReportError(JSON_UNSUPPORTED_ENCODING, 0);
548 return false;
549 }
550 ConsumeChar();
551 string.Append(kUnicodeReplacementPoint);
552 continue;
553 }
554
555 if (next_char == '"') {
556 ConsumeChar();
557 *out = std::move(string);
558 return true;
559 }
560 if (next_char != '\\') {
561 // Per Section 7, "All Unicode characters may be placed within the
562 // quotation marks, except for the characters that MUST be escaped:
563 // quotation mark, reverse solidus, and the control characters (U+0000
564 // through U+001F)".
565 if (next_char == '\n' || next_char == '\r') {
566 UmaHistogramEnumeration(kExtensionHistogramName,
567 ChromiumJsonExtension::kNewlineInString);
568 if (!(options_ &
569 (JSON_ALLOW_NEWLINES_IN_STRINGS | JSON_ALLOW_CONTROL_CHARS))) {
570 ReportError(JSON_UNSUPPORTED_ENCODING, -1);
571 return false;
572 }
573 } else if (next_char <= 0x1F) {
574 UmaHistogramEnumeration(kExtensionHistogramName,
575 ChromiumJsonExtension::kControlCharacter);
576 if (!(options_ & JSON_ALLOW_CONTROL_CHARS)) {
577 ReportError(JSON_UNSUPPORTED_ENCODING, -1);
578 return false;
579 }
580 }
581
582 // If this character is not an escape sequence, track any line breaks and
583 // copy next_char to the StringBuilder. The JSON spec forbids unescaped
584 // ASCII control characters within a string, including '\r' and '\n', but
585 // this implementation is more lenient.
586 if ((next_char == '\r') || (next_char == '\n')) {
587 index_last_line_ = index_;
588 // Don't increment line_number_ twice for "\r\n". We are guaranteed
589 // that (index_ > 0) because we are consuming a string, so we must have
590 // seen an opening '"' quote character.
591 if ((next_char == '\r') || (input_[index_ - 1] != '\r')) {
592 ++line_number_;
593 }
594 }
595 ConsumeChar();
596 string.Append(next_char);
597 } else {
598 // And if it is an escape sequence, the input string will be adjusted
599 // (either by combining the two characters of an encoded escape sequence,
600 // or with a UTF conversion), so using std::string_view isn't possible --
601 // force a conversion.
602 string.Convert();
603
604 // Read past the escape '\' and ensure there's a character following.
605 std::optional<std::string_view> escape_sequence = ConsumeChars(2);
606 if (!escape_sequence) {
607 ReportError(JSON_INVALID_ESCAPE, -1);
608 return false;
609 }
610
611 switch ((*escape_sequence)[1]) {
612 // Allowed esape sequences:
613 case 'x': { // UTF-8 sequence.
614 // UTF-8 \x escape sequences are not allowed in the spec, but they
615 // are supported here for backwards-compatiblity with the old parser.
616 UmaHistogramEnumeration(kExtensionHistogramName,
617 ChromiumJsonExtension::kXEscape);
618 if (!(options_ & JSON_ALLOW_X_ESCAPES)) {
619 ReportError(JSON_INVALID_ESCAPE, -1);
620 return false;
621 }
622
623 escape_sequence = ConsumeChars(2);
624 if (!escape_sequence) {
625 ReportError(JSON_INVALID_ESCAPE, -3);
626 return false;
627 }
628
629 int hex_digit = 0;
630 if (!UnprefixedHexStringToInt(*escape_sequence, &hex_digit) ||
631 !IsValidCharacter(hex_digit)) {
632 ReportError(JSON_INVALID_ESCAPE, -3);
633 return false;
634 }
635
636 string.Append(hex_digit);
637 break;
638 }
639 case 'u': { // UTF-16 sequence.
640 // UTF units are of the form \uXXXX.
641 base_icu::UChar32 code_point;
642 if (!DecodeUTF16(&code_point)) {
643 ReportError(JSON_INVALID_ESCAPE, -1);
644 return false;
645 }
646 string.Append(code_point);
647 break;
648 }
649 case '"':
650 string.Append('"');
651 break;
652 case '\\':
653 string.Append('\\');
654 break;
655 case '/':
656 string.Append('/');
657 break;
658 case 'b':
659 string.Append('\b');
660 break;
661 case 'f':
662 string.Append('\f');
663 break;
664 case 'n':
665 string.Append('\n');
666 break;
667 case 'r':
668 string.Append('\r');
669 break;
670 case 't':
671 string.Append('\t');
672 break;
673 case 'v': // Not listed as valid escape sequence in the RFC.
674 UmaHistogramEnumeration(kExtensionHistogramName,
675 ChromiumJsonExtension::kVerticalTabEscape);
676 if (!(options_ & JSON_ALLOW_VERT_TAB)) {
677 ReportError(JSON_INVALID_ESCAPE, -1);
678 return false;
679 }
680 string.Append('\v');
681 break;
682 // All other escape squences are illegal.
683 default:
684 ReportError(JSON_INVALID_ESCAPE, -1);
685 return false;
686 }
687 }
688 }
689
690 ReportError(JSON_SYNTAX_ERROR, -1);
691 return false;
692 }
693
694 // Entry is at the first X in \uXXXX.
DecodeUTF16(base_icu::UChar32 * out_code_point)695 bool JSONParser::DecodeUTF16(base_icu::UChar32* out_code_point) {
696 std::optional<std::string_view> escape_sequence = ConsumeChars(4);
697 if (!escape_sequence)
698 return false;
699
700 // Consume the UTF-16 code unit, which may be a high surrogate.
701 int code_unit16_high = 0;
702 if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_high))
703 return false;
704
705 // If this is a high surrogate, consume the next code unit to get the
706 // low surrogate.
707 if (CBU16_IS_SURROGATE(code_unit16_high)) {
708 // Make sure this is the high surrogate.
709 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
710 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
711 return false;
712 *out_code_point = kUnicodeReplacementPoint;
713 return true;
714 }
715
716 // Make sure that the token has more characters to consume the
717 // lower surrogate.
718 if (!ConsumeIfMatch("\\u")) {
719 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
720 return false;
721 *out_code_point = kUnicodeReplacementPoint;
722 return true;
723 }
724
725 escape_sequence = ConsumeChars(4);
726 if (!escape_sequence)
727 return false;
728
729 int code_unit16_low = 0;
730 if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low))
731 return false;
732
733 if (!CBU16_IS_TRAIL(code_unit16_low)) {
734 if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
735 return false;
736 *out_code_point = kUnicodeReplacementPoint;
737 return true;
738 }
739
740 base_icu::UChar32 code_point =
741 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
742
743 *out_code_point = code_point;
744 } else {
745 // Not a surrogate.
746 DCHECK(CBU16_IS_SINGLE(code_unit16_high));
747
748 *out_code_point = code_unit16_high;
749 }
750
751 return true;
752 }
753
ConsumeNumber()754 std::optional<Value> JSONParser::ConsumeNumber() {
755 const char* num_start = pos();
756 const size_t start_index = index_;
757 size_t end_index = start_index;
758
759 if (PeekChar() == '-')
760 ConsumeChar();
761
762 if (!ReadInt(false)) {
763 ReportError(JSON_SYNTAX_ERROR, 0);
764 return std::nullopt;
765 }
766 end_index = index_;
767
768 // The optional fraction part.
769 if (PeekChar() == '.') {
770 ConsumeChar();
771 if (!ReadInt(true)) {
772 ReportError(JSON_SYNTAX_ERROR, 0);
773 return std::nullopt;
774 }
775 end_index = index_;
776 }
777
778 // Optional exponent part.
779 std::optional<char> c = PeekChar();
780 if (c == 'e' || c == 'E') {
781 ConsumeChar();
782 if (PeekChar() == '-' || PeekChar() == '+') {
783 ConsumeChar();
784 }
785 if (!ReadInt(true)) {
786 ReportError(JSON_SYNTAX_ERROR, 0);
787 return std::nullopt;
788 }
789 end_index = index_;
790 }
791
792 // ReadInt is greedy because numbers have no easily detectable sentinel,
793 // so save off where the parser should be on exit (see Consume invariant at
794 // the top of the header), then make sure the next token is one which is
795 // valid.
796 size_t exit_index = index_;
797
798 switch (GetNextToken()) {
799 case T_OBJECT_END:
800 case T_ARRAY_END:
801 case T_LIST_SEPARATOR:
802 case T_END_OF_INPUT:
803 break;
804 default:
805 ReportError(JSON_SYNTAX_ERROR, 0);
806 return std::nullopt;
807 }
808
809 index_ = exit_index;
810
811 std::string_view num_string(num_start, end_index - start_index);
812
813 int num_int;
814 if (StringToInt(num_string, &num_int)) {
815 // StringToInt will treat `-0` as zero, losing the significance of the
816 // negation.
817 if (num_int == 0 && num_string.starts_with('-')) {
818 if (base::FeatureList::IsEnabled(features::kJsonNegativeZero)) {
819 return Value(-0.0);
820 }
821 }
822 return Value(num_int);
823 }
824
825 double num_double;
826 if (StringToDouble(num_string, &num_double) && std::isfinite(num_double)) {
827 return Value(num_double);
828 }
829
830 ReportError(JSON_UNREPRESENTABLE_NUMBER, 0);
831 return std::nullopt;
832 }
833
ReadInt(bool allow_leading_zeros)834 bool JSONParser::ReadInt(bool allow_leading_zeros) {
835 size_t len = 0;
836 char first = 0;
837
838 while (std::optional<char> c = PeekChar()) {
839 if (!IsAsciiDigit(c))
840 break;
841
842 if (len == 0)
843 first = *c;
844
845 ++len;
846 ConsumeChar();
847 }
848
849 if (len == 0)
850 return false;
851
852 if (!allow_leading_zeros && len > 1 && first == '0')
853 return false;
854
855 return true;
856 }
857
ConsumeLiteral()858 std::optional<Value> JSONParser::ConsumeLiteral() {
859 if (ConsumeIfMatch("true"))
860 return Value(true);
861 if (ConsumeIfMatch("false"))
862 return Value(false);
863 if (ConsumeIfMatch("null"))
864 return Value(Value::Type::NONE);
865 ReportError(JSON_SYNTAX_ERROR, 0);
866 return std::nullopt;
867 }
868
ConsumeIfMatch(std::string_view match)869 bool JSONParser::ConsumeIfMatch(std::string_view match) {
870 if (match == PeekChars(match.size())) {
871 ConsumeChars(match.size());
872 return true;
873 }
874 return false;
875 }
876
ReportError(JsonParseError code,int column_adjust)877 void JSONParser::ReportError(JsonParseError code, int column_adjust) {
878 error_code_ = code;
879 error_line_ = line_number_;
880 error_column_ = static_cast<int>(index_ - index_last_line_) + column_adjust;
881
882 // For a final blank line ('\n' and then EOF), a negative column_adjust may
883 // put us below 1, which doesn't really make sense for 1-based columns.
884 if (error_column_ < 1) {
885 error_column_ = 1;
886 }
887 }
888
889 // static
FormatErrorMessage(int line,int column,const std::string & description)890 std::string JSONParser::FormatErrorMessage(int line, int column,
891 const std::string& description) {
892 if (line || column) {
893 return StringPrintf("Line: %i, column: %i, %s",
894 line, column, description.c_str());
895 }
896 return description;
897 }
898
899 } // namespace internal
900 } // namespace base
901