xref: /aosp_15_r20/external/cronet/base/json/json_parser.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/json_parser.h"
6 
7 #include <cmath>
8 #include <iterator>
9 #include <string_view>
10 #include <utility>
11 #include <vector>
12 
13 #include "base/check_op.h"
14 #include "base/feature_list.h"
15 #include "base/features.h"
16 #include "base/json/json_reader.h"
17 #include "base/metrics/histogram_functions.h"
18 #include "base/notreached.h"
19 #include "base/numerics/safe_conversions.h"
20 #include "base/ranges/algorithm.h"
21 #include "base/strings/string_number_conversions.h"
22 #include "base/strings/string_util.h"
23 #include "base/strings/stringprintf.h"
24 #include "base/strings/utf_string_conversion_utils.h"
25 #include "base/strings/utf_string_conversions.h"
26 #include "base/third_party/icu/icu_utf.h"
27 
28 namespace base {
29 namespace internal {
30 
31 namespace {
32 
33 // Values 1000 and above are used by JSONFileValueSerializer::JsonFileError.
34 static_assert(JSONParser::JSON_PARSE_ERROR_COUNT < 1000,
35               "JSONParser error out of bounds");
36 
ErrorCodeToString(JSONParser::JsonParseError error_code)37 std::string ErrorCodeToString(JSONParser::JsonParseError error_code) {
38   switch (error_code) {
39     case JSONParser::JSON_NO_ERROR:
40       return std::string();
41     case JSONParser::JSON_SYNTAX_ERROR:
42       return JSONParser::kSyntaxError;
43     case JSONParser::JSON_INVALID_ESCAPE:
44       return JSONParser::kInvalidEscape;
45     case JSONParser::JSON_UNEXPECTED_TOKEN:
46       return JSONParser::kUnexpectedToken;
47     case JSONParser::JSON_TRAILING_COMMA:
48       return JSONParser::kTrailingComma;
49     case JSONParser::JSON_TOO_MUCH_NESTING:
50       return JSONParser::kTooMuchNesting;
51     case JSONParser::JSON_UNEXPECTED_DATA_AFTER_ROOT:
52       return JSONParser::kUnexpectedDataAfterRoot;
53     case JSONParser::JSON_UNSUPPORTED_ENCODING:
54       return JSONParser::kUnsupportedEncoding;
55     case JSONParser::JSON_UNQUOTED_DICTIONARY_KEY:
56       return JSONParser::kUnquotedDictionaryKey;
57     case JSONParser::JSON_UNREPRESENTABLE_NUMBER:
58       return JSONParser::kUnrepresentableNumber;
59     case JSONParser::JSON_PARSE_ERROR_COUNT:
60       break;
61   }
62   NOTREACHED();
63   return std::string();
64 }
65 
66 const int32_t kExtendedASCIIStart = 0x80;
67 constexpr base_icu::UChar32 kUnicodeReplacementPoint = 0xFFFD;
68 
69 // UnprefixedHexStringToInt acts like |HexStringToInt|, but enforces that the
70 // input consists purely of hex digits. I.e. no "0x" nor "OX" prefix is
71 // permitted.
UnprefixedHexStringToInt(std::string_view input,int * output)72 bool UnprefixedHexStringToInt(std::string_view input, int* output) {
73   for (size_t i = 0; i < input.size(); i++) {
74     if (!IsHexDigit(input[i])) {
75       return false;
76     }
77   }
78   return HexStringToInt(input, output);
79 }
80 
81 // These values are persisted to logs. Entries should not be renumbered and
82 // numeric values should never be reused.
83 enum class ChromiumJsonExtension {
84   kCComment,
85   kCppComment,
86   kXEscape,
87   kVerticalTabEscape,
88   kControlCharacter,
89   kNewlineInString,
90   kMaxValue = kNewlineInString,
91 };
92 
93 const char kExtensionHistogramName[] =
94     "Security.JSONParser.ChromiumExtensionUsage";
95 
96 }  // namespace
97 
98 // This is U+FFFD.
99 const char kUnicodeReplacementString[] = "\xEF\xBF\xBD";
100 
101 const char JSONParser::kSyntaxError[] = "Syntax error.";
102 const char JSONParser::kInvalidEscape[] = "Invalid escape sequence.";
103 const char JSONParser::kUnexpectedToken[] = "Unexpected token.";
104 const char JSONParser::kTrailingComma[] = "Trailing comma not allowed.";
105 const char JSONParser::kTooMuchNesting[] = "Too much nesting.";
106 const char JSONParser::kUnexpectedDataAfterRoot[] =
107     "Unexpected data after root element.";
108 const char JSONParser::kUnsupportedEncoding[] =
109     "Unsupported encoding. JSON must be UTF-8.";
110 const char JSONParser::kUnquotedDictionaryKey[] =
111     "Dictionary keys must be quoted.";
112 const char JSONParser::kUnrepresentableNumber[] =
113     "Number cannot be represented.";
114 
JSONParser(int options,size_t max_depth)115 JSONParser::JSONParser(int options, size_t max_depth)
116     : options_(options),
117       max_depth_(max_depth),
118       index_(0),
119       stack_depth_(0),
120       line_number_(0),
121       index_last_line_(0),
122       error_code_(JSON_NO_ERROR),
123       error_line_(0),
124       error_column_(0) {
125   CHECK_LE(max_depth, kAbsoluteMaxDepth);
126 }
127 
128 JSONParser::~JSONParser() = default;
129 
Parse(std::string_view input)130 std::optional<Value> JSONParser::Parse(std::string_view input) {
131   input_ = input;
132   index_ = 0;
133   // Line and column counting is 1-based, but |index_| is 0-based. For example,
134   // if input is "Aaa\nB" then 'A' and 'B' are both in column 1 (at lines 1 and
135   // 2) and have indexes of 0 and 4. We track the line number explicitly (the
136   // |line_number_| field) and the column number implicitly (the difference
137   // between |index_| and |index_last_line_|). In calculating that difference,
138   // |index_last_line_| is the index of the '\r' or '\n', not the index of the
139   // first byte after the '\n'. For the 'B' in "Aaa\nB", its |index_| and
140   // |index_last_line_| would be 4 and 3: 'B' is in column (4 - 3) = 1. We
141   // initialize |index_last_line_| to -1, not 0, since -1 is the (out of range)
142   // index of the imaginary '\n' immediately before the start of the string:
143   // 'A' is in column (0 - -1) = 1.
144   line_number_ = 1;
145   index_last_line_ = static_cast<size_t>(-1);
146 
147   error_code_ = JSON_NO_ERROR;
148   error_line_ = 0;
149   error_column_ = 0;
150 
151   // When the input JSON string starts with a UTF-8 Byte-Order-Mark,
152   // advance the start position to avoid the ParseNextToken function mis-
153   // treating a Unicode BOM as an invalid character and returning NULL.
154   ConsumeIfMatch("\xEF\xBB\xBF");
155 
156   // Parse the first and any nested tokens.
157   std::optional<Value> root(ParseNextToken());
158   if (!root)
159     return std::nullopt;
160 
161   // Make sure the input stream is at an end.
162   if (GetNextToken() != T_END_OF_INPUT) {
163     ReportError(JSON_UNEXPECTED_DATA_AFTER_ROOT, 0);
164     return std::nullopt;
165   }
166 
167   return root;
168 }
169 
error_code() const170 JSONParser::JsonParseError JSONParser::error_code() const {
171   return error_code_;
172 }
173 
GetErrorMessage() const174 std::string JSONParser::GetErrorMessage() const {
175   return FormatErrorMessage(error_line_, error_column_,
176                             ErrorCodeToString(error_code_));
177 }
178 
error_line() const179 int JSONParser::error_line() const {
180   return error_line_;
181 }
182 
error_column() const183 int JSONParser::error_column() const {
184   return error_column_;
185 }
186 
187 // StringBuilder ///////////////////////////////////////////////////////////////
188 
StringBuilder()189 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
190 
StringBuilder(const char * pos)191 JSONParser::StringBuilder::StringBuilder(const char* pos)
192     : pos_(pos), length_(0) {}
193 
194 JSONParser::StringBuilder::~StringBuilder() = default;
195 
196 JSONParser::StringBuilder& JSONParser::StringBuilder::operator=(
197     StringBuilder&& other) = default;
198 
Append(base_icu::UChar32 point)199 void JSONParser::StringBuilder::Append(base_icu::UChar32 point) {
200   DCHECK(IsValidCodepoint(point));
201 
202   if (point < kExtendedASCIIStart) {
203     if (!string_) {
204       DCHECK_EQ(static_cast<char>(point), pos_[length_]);
205       ++length_;
206     } else {
207       string_->push_back(static_cast<char>(point));
208     }
209   } else {
210     Convert();
211     if (UNLIKELY(point == kUnicodeReplacementPoint)) {
212       string_->append(kUnicodeReplacementString);
213     } else {
214       WriteUnicodeCharacter(point, &*string_);
215     }
216   }
217 }
218 
Convert()219 void JSONParser::StringBuilder::Convert() {
220   if (string_)
221     return;
222   string_.emplace(pos_, length_);
223 }
224 
DestructiveAsString()225 std::string JSONParser::StringBuilder::DestructiveAsString() {
226   if (string_)
227     return std::move(*string_);
228   return std::string(pos_, length_);
229 }
230 
231 // JSONParser private //////////////////////////////////////////////////////////
232 
PeekChars(size_t count)233 std::optional<std::string_view> JSONParser::PeekChars(size_t count) {
234   if (index_ + count > input_.length())
235     return std::nullopt;
236   // Using StringPiece::substr() is significantly slower (according to
237   // base_perftests) than constructing a substring manually.
238   return std::string_view(input_.data() + index_, count);
239 }
240 
PeekChar()241 std::optional<char> JSONParser::PeekChar() {
242   std::optional<std::string_view> chars = PeekChars(1);
243   if (chars)
244     return (*chars)[0];
245   return std::nullopt;
246 }
247 
ConsumeChars(size_t count)248 std::optional<std::string_view> JSONParser::ConsumeChars(size_t count) {
249   std::optional<std::string_view> chars = PeekChars(count);
250   if (chars)
251     index_ += count;
252   return chars;
253 }
254 
ConsumeChar()255 std::optional<char> JSONParser::ConsumeChar() {
256   std::optional<std::string_view> chars = ConsumeChars(1);
257   if (chars)
258     return (*chars)[0];
259   return std::nullopt;
260 }
261 
pos()262 const char* JSONParser::pos() {
263   CHECK_LE(index_, input_.length());
264   return input_.data() + index_;
265 }
266 
GetNextToken()267 JSONParser::Token JSONParser::GetNextToken() {
268   EatWhitespaceAndComments();
269 
270   std::optional<char> c = PeekChar();
271   if (!c)
272     return T_END_OF_INPUT;
273 
274   switch (*c) {
275     case '{':
276       return T_OBJECT_BEGIN;
277     case '}':
278       return T_OBJECT_END;
279     case '[':
280       return T_ARRAY_BEGIN;
281     case ']':
282       return T_ARRAY_END;
283     case '"':
284       return T_STRING;
285     case '0':
286     case '1':
287     case '2':
288     case '3':
289     case '4':
290     case '5':
291     case '6':
292     case '7':
293     case '8':
294     case '9':
295     case '-':
296       return T_NUMBER;
297     case 't':
298       return T_BOOL_TRUE;
299     case 'f':
300       return T_BOOL_FALSE;
301     case 'n':
302       return T_NULL;
303     case ',':
304       return T_LIST_SEPARATOR;
305     case ':':
306       return T_OBJECT_PAIR_SEPARATOR;
307     default:
308       return T_INVALID_TOKEN;
309   }
310 }
311 
EatWhitespaceAndComments()312 void JSONParser::EatWhitespaceAndComments() {
313   while (std::optional<char> c = PeekChar()) {
314     switch (*c) {
315       case '\r':
316       case '\n':
317         index_last_line_ = index_;
318         // Don't increment line_number_ twice for "\r\n".
319         if (!(c == '\n' && index_ > 0 && input_[index_ - 1] == '\r')) {
320           ++line_number_;
321         }
322         [[fallthrough]];
323       case ' ':
324       case '\t':
325         ConsumeChar();
326         break;
327       case '/':
328         if (!EatComment())
329           return;
330         break;
331       default:
332         return;
333     }
334   }
335 }
336 
EatComment()337 bool JSONParser::EatComment() {
338   std::optional<std::string_view> comment_start = PeekChars(2);
339   if (!comment_start)
340     return false;
341 
342   const bool comments_allowed = options_ & JSON_ALLOW_COMMENTS;
343 
344   if (comment_start == "//") {
345     UmaHistogramEnumeration(kExtensionHistogramName,
346                             ChromiumJsonExtension::kCppComment);
347     if (!comments_allowed) {
348       ReportError(JSON_UNEXPECTED_TOKEN, 0);
349       return false;
350     }
351 
352     ConsumeChars(2);
353     // Single line comment, read to newline.
354     while (std::optional<char> c = PeekChar()) {
355       if (c == '\n' || c == '\r')
356         return true;
357       ConsumeChar();
358     }
359   } else if (comment_start == "/*") {
360     UmaHistogramEnumeration(kExtensionHistogramName,
361                             ChromiumJsonExtension::kCComment);
362     if (!comments_allowed) {
363       ReportError(JSON_UNEXPECTED_TOKEN, 0);
364       return false;
365     }
366 
367     ConsumeChars(2);
368     char previous_char = '\0';
369     // Block comment, read until end marker.
370     while (std::optional<char> c = PeekChar()) {
371       if (previous_char == '*' && c == '/') {
372         // EatWhitespaceAndComments will inspect pos(), which will still be on
373         // the last / of the comment, so advance once more (which may also be
374         // end of input).
375         ConsumeChar();
376         return true;
377       }
378       previous_char = *ConsumeChar();
379     }
380 
381     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
382   }
383 
384   return false;
385 }
386 
ParseNextToken()387 std::optional<Value> JSONParser::ParseNextToken() {
388   return ParseToken(GetNextToken());
389 }
390 
ParseToken(Token token)391 std::optional<Value> JSONParser::ParseToken(Token token) {
392   switch (token) {
393     case T_OBJECT_BEGIN:
394       return ConsumeDictionary();
395     case T_ARRAY_BEGIN:
396       return ConsumeList();
397     case T_STRING:
398       return ConsumeString();
399     case T_NUMBER:
400       return ConsumeNumber();
401     case T_BOOL_TRUE:
402     case T_BOOL_FALSE:
403     case T_NULL:
404       return ConsumeLiteral();
405     default:
406       ReportError(JSON_UNEXPECTED_TOKEN, 0);
407       return std::nullopt;
408   }
409 }
410 
ConsumeDictionary()411 std::optional<Value> JSONParser::ConsumeDictionary() {
412   if (ConsumeChar() != '{') {
413     ReportError(JSON_UNEXPECTED_TOKEN, 0);
414     return std::nullopt;
415   }
416 
417   StackMarker depth_check(max_depth_, &stack_depth_);
418   if (depth_check.IsTooDeep()) {
419     ReportError(JSON_TOO_MUCH_NESTING, -1);
420     return std::nullopt;
421   }
422 
423   std::vector<std::pair<std::string, Value>> values;
424 
425   Token token = GetNextToken();
426   while (token != T_OBJECT_END) {
427     if (token != T_STRING) {
428       ReportError(JSON_UNQUOTED_DICTIONARY_KEY, 0);
429       return std::nullopt;
430     }
431 
432     // First consume the key.
433     StringBuilder key;
434     if (!ConsumeStringRaw(&key)) {
435       return std::nullopt;
436     }
437 
438     // Read the separator.
439     token = GetNextToken();
440     if (token != T_OBJECT_PAIR_SEPARATOR) {
441       ReportError(JSON_SYNTAX_ERROR, 0);
442       return std::nullopt;
443     }
444 
445     // The next token is the value. Ownership transfers to |dict|.
446     ConsumeChar();
447     std::optional<Value> value = ParseNextToken();
448     if (!value) {
449       // ReportError from deeper level.
450       return std::nullopt;
451     }
452 
453     values.emplace_back(key.DestructiveAsString(), std::move(*value));
454 
455     token = GetNextToken();
456     if (token == T_LIST_SEPARATOR) {
457       ConsumeChar();
458       token = GetNextToken();
459       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
460         ReportError(JSON_TRAILING_COMMA, 0);
461         return std::nullopt;
462       }
463     } else if (token != T_OBJECT_END) {
464       ReportError(JSON_SYNTAX_ERROR, 0);
465       return std::nullopt;
466     }
467   }
468 
469   ConsumeChar();  // Closing '}'.
470   // Reverse |dict_storage| to keep the last of elements with the same key in
471   // the input.
472   ranges::reverse(values);
473   return Value(Value::Dict(std::make_move_iterator(values.begin()),
474                            std::make_move_iterator(values.end())));
475 }
476 
ConsumeList()477 std::optional<Value> JSONParser::ConsumeList() {
478   if (ConsumeChar() != '[') {
479     ReportError(JSON_UNEXPECTED_TOKEN, 0);
480     return std::nullopt;
481   }
482 
483   StackMarker depth_check(max_depth_, &stack_depth_);
484   if (depth_check.IsTooDeep()) {
485     ReportError(JSON_TOO_MUCH_NESTING, -1);
486     return std::nullopt;
487   }
488 
489   Value::List list;
490 
491   Token token = GetNextToken();
492   while (token != T_ARRAY_END) {
493     std::optional<Value> item = ParseToken(token);
494     if (!item) {
495       // ReportError from deeper level.
496       return std::nullopt;
497     }
498 
499     list.Append(std::move(*item));
500 
501     token = GetNextToken();
502     if (token == T_LIST_SEPARATOR) {
503       ConsumeChar();
504       token = GetNextToken();
505       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
506         ReportError(JSON_TRAILING_COMMA, 0);
507         return std::nullopt;
508       }
509     } else if (token != T_ARRAY_END) {
510       ReportError(JSON_SYNTAX_ERROR, 0);
511       return std::nullopt;
512     }
513   }
514 
515   ConsumeChar();  // Closing ']'.
516 
517   return Value(std::move(list));
518 }
519 
ConsumeString()520 std::optional<Value> JSONParser::ConsumeString() {
521   StringBuilder string;
522   if (!ConsumeStringRaw(&string))
523     return std::nullopt;
524   return Value(string.DestructiveAsString());
525 }
526 
ConsumeStringRaw(StringBuilder * out)527 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
528   if (ConsumeChar() != '"') {
529     ReportError(JSON_UNEXPECTED_TOKEN, 0);
530     return false;
531   }
532 
533   // StringBuilder will internally build a std::string_view unless a UTF-16
534   // conversion occurs, at which point it will perform a copy into a
535   // std::string.
536   StringBuilder string(pos());
537 
538   while (std::optional<char> c = PeekChar()) {
539     base_icu::UChar32 next_char = 0;
540     if (static_cast<unsigned char>(*c) < kExtendedASCIIStart) {
541       // Fast path for ASCII.
542       next_char = *c;
543     } else if (!ReadUnicodeCharacter(input_.data(), input_.length(), &index_,
544                                      &next_char) ||
545                !IsValidCodepoint(next_char)) {
546       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0) {
547         ReportError(JSON_UNSUPPORTED_ENCODING, 0);
548         return false;
549       }
550       ConsumeChar();
551       string.Append(kUnicodeReplacementPoint);
552       continue;
553     }
554 
555     if (next_char == '"') {
556       ConsumeChar();
557       *out = std::move(string);
558       return true;
559     }
560     if (next_char != '\\') {
561       // Per Section 7, "All Unicode characters may be placed within the
562       // quotation marks, except for the characters that MUST be escaped:
563       // quotation mark, reverse solidus, and the control characters (U+0000
564       // through U+001F)".
565       if (next_char == '\n' || next_char == '\r') {
566         UmaHistogramEnumeration(kExtensionHistogramName,
567                                 ChromiumJsonExtension::kNewlineInString);
568         if (!(options_ &
569               (JSON_ALLOW_NEWLINES_IN_STRINGS | JSON_ALLOW_CONTROL_CHARS))) {
570           ReportError(JSON_UNSUPPORTED_ENCODING, -1);
571           return false;
572         }
573       } else if (next_char <= 0x1F) {
574         UmaHistogramEnumeration(kExtensionHistogramName,
575                                 ChromiumJsonExtension::kControlCharacter);
576         if (!(options_ & JSON_ALLOW_CONTROL_CHARS)) {
577           ReportError(JSON_UNSUPPORTED_ENCODING, -1);
578           return false;
579         }
580       }
581 
582       // If this character is not an escape sequence, track any line breaks and
583       // copy next_char to the StringBuilder. The JSON spec forbids unescaped
584       // ASCII control characters within a string, including '\r' and '\n', but
585       // this implementation is more lenient.
586       if ((next_char == '\r') || (next_char == '\n')) {
587         index_last_line_ = index_;
588         // Don't increment line_number_ twice for "\r\n". We are guaranteed
589         // that (index_ > 0) because we are consuming a string, so we must have
590         // seen an opening '"' quote character.
591         if ((next_char == '\r') || (input_[index_ - 1] != '\r')) {
592           ++line_number_;
593         }
594       }
595       ConsumeChar();
596       string.Append(next_char);
597     } else {
598       // And if it is an escape sequence, the input string will be adjusted
599       // (either by combining the two characters of an encoded escape sequence,
600       // or with a UTF conversion), so using std::string_view isn't possible --
601       // force a conversion.
602       string.Convert();
603 
604       // Read past the escape '\' and ensure there's a character following.
605       std::optional<std::string_view> escape_sequence = ConsumeChars(2);
606       if (!escape_sequence) {
607         ReportError(JSON_INVALID_ESCAPE, -1);
608         return false;
609       }
610 
611       switch ((*escape_sequence)[1]) {
612         // Allowed esape sequences:
613         case 'x': {  // UTF-8 sequence.
614           // UTF-8 \x escape sequences are not allowed in the spec, but they
615           // are supported here for backwards-compatiblity with the old parser.
616           UmaHistogramEnumeration(kExtensionHistogramName,
617                                   ChromiumJsonExtension::kXEscape);
618           if (!(options_ & JSON_ALLOW_X_ESCAPES)) {
619             ReportError(JSON_INVALID_ESCAPE, -1);
620             return false;
621           }
622 
623           escape_sequence = ConsumeChars(2);
624           if (!escape_sequence) {
625             ReportError(JSON_INVALID_ESCAPE, -3);
626             return false;
627           }
628 
629           int hex_digit = 0;
630           if (!UnprefixedHexStringToInt(*escape_sequence, &hex_digit) ||
631               !IsValidCharacter(hex_digit)) {
632             ReportError(JSON_INVALID_ESCAPE, -3);
633             return false;
634           }
635 
636           string.Append(hex_digit);
637           break;
638         }
639         case 'u': {  // UTF-16 sequence.
640           // UTF units are of the form \uXXXX.
641           base_icu::UChar32 code_point;
642           if (!DecodeUTF16(&code_point)) {
643             ReportError(JSON_INVALID_ESCAPE, -1);
644             return false;
645           }
646           string.Append(code_point);
647           break;
648         }
649         case '"':
650           string.Append('"');
651           break;
652         case '\\':
653           string.Append('\\');
654           break;
655         case '/':
656           string.Append('/');
657           break;
658         case 'b':
659           string.Append('\b');
660           break;
661         case 'f':
662           string.Append('\f');
663           break;
664         case 'n':
665           string.Append('\n');
666           break;
667         case 'r':
668           string.Append('\r');
669           break;
670         case 't':
671           string.Append('\t');
672           break;
673         case 'v':  // Not listed as valid escape sequence in the RFC.
674           UmaHistogramEnumeration(kExtensionHistogramName,
675                                   ChromiumJsonExtension::kVerticalTabEscape);
676           if (!(options_ & JSON_ALLOW_VERT_TAB)) {
677             ReportError(JSON_INVALID_ESCAPE, -1);
678             return false;
679           }
680           string.Append('\v');
681           break;
682         // All other escape squences are illegal.
683         default:
684           ReportError(JSON_INVALID_ESCAPE, -1);
685           return false;
686       }
687     }
688   }
689 
690   ReportError(JSON_SYNTAX_ERROR, -1);
691   return false;
692 }
693 
694 // Entry is at the first X in \uXXXX.
DecodeUTF16(base_icu::UChar32 * out_code_point)695 bool JSONParser::DecodeUTF16(base_icu::UChar32* out_code_point) {
696   std::optional<std::string_view> escape_sequence = ConsumeChars(4);
697   if (!escape_sequence)
698     return false;
699 
700   // Consume the UTF-16 code unit, which may be a high surrogate.
701   int code_unit16_high = 0;
702   if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_high))
703     return false;
704 
705   // If this is a high surrogate, consume the next code unit to get the
706   // low surrogate.
707   if (CBU16_IS_SURROGATE(code_unit16_high)) {
708     // Make sure this is the high surrogate.
709     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) {
710       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
711         return false;
712       *out_code_point = kUnicodeReplacementPoint;
713       return true;
714     }
715 
716     // Make sure that the token has more characters to consume the
717     // lower surrogate.
718     if (!ConsumeIfMatch("\\u")) {
719       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
720         return false;
721       *out_code_point = kUnicodeReplacementPoint;
722       return true;
723     }
724 
725     escape_sequence = ConsumeChars(4);
726     if (!escape_sequence)
727       return false;
728 
729     int code_unit16_low = 0;
730     if (!UnprefixedHexStringToInt(*escape_sequence, &code_unit16_low))
731       return false;
732 
733     if (!CBU16_IS_TRAIL(code_unit16_low)) {
734       if ((options_ & JSON_REPLACE_INVALID_CHARACTERS) == 0)
735         return false;
736       *out_code_point = kUnicodeReplacementPoint;
737       return true;
738     }
739 
740     base_icu::UChar32 code_point =
741         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
742 
743     *out_code_point = code_point;
744   } else {
745     // Not a surrogate.
746     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
747 
748     *out_code_point = code_unit16_high;
749   }
750 
751   return true;
752 }
753 
ConsumeNumber()754 std::optional<Value> JSONParser::ConsumeNumber() {
755   const char* num_start = pos();
756   const size_t start_index = index_;
757   size_t end_index = start_index;
758 
759   if (PeekChar() == '-')
760     ConsumeChar();
761 
762   if (!ReadInt(false)) {
763     ReportError(JSON_SYNTAX_ERROR, 0);
764     return std::nullopt;
765   }
766   end_index = index_;
767 
768   // The optional fraction part.
769   if (PeekChar() == '.') {
770     ConsumeChar();
771     if (!ReadInt(true)) {
772       ReportError(JSON_SYNTAX_ERROR, 0);
773       return std::nullopt;
774     }
775     end_index = index_;
776   }
777 
778   // Optional exponent part.
779   std::optional<char> c = PeekChar();
780   if (c == 'e' || c == 'E') {
781     ConsumeChar();
782     if (PeekChar() == '-' || PeekChar() == '+') {
783       ConsumeChar();
784     }
785     if (!ReadInt(true)) {
786       ReportError(JSON_SYNTAX_ERROR, 0);
787       return std::nullopt;
788     }
789     end_index = index_;
790   }
791 
792   // ReadInt is greedy because numbers have no easily detectable sentinel,
793   // so save off where the parser should be on exit (see Consume invariant at
794   // the top of the header), then make sure the next token is one which is
795   // valid.
796   size_t exit_index = index_;
797 
798   switch (GetNextToken()) {
799     case T_OBJECT_END:
800     case T_ARRAY_END:
801     case T_LIST_SEPARATOR:
802     case T_END_OF_INPUT:
803       break;
804     default:
805       ReportError(JSON_SYNTAX_ERROR, 0);
806       return std::nullopt;
807   }
808 
809   index_ = exit_index;
810 
811   std::string_view num_string(num_start, end_index - start_index);
812 
813   int num_int;
814   if (StringToInt(num_string, &num_int)) {
815     // StringToInt will treat `-0` as zero, losing the significance of the
816     // negation.
817     if (num_int == 0 && num_string.starts_with('-')) {
818       if (base::FeatureList::IsEnabled(features::kJsonNegativeZero)) {
819         return Value(-0.0);
820       }
821     }
822     return Value(num_int);
823   }
824 
825   double num_double;
826   if (StringToDouble(num_string, &num_double) && std::isfinite(num_double)) {
827     return Value(num_double);
828   }
829 
830   ReportError(JSON_UNREPRESENTABLE_NUMBER, 0);
831   return std::nullopt;
832 }
833 
ReadInt(bool allow_leading_zeros)834 bool JSONParser::ReadInt(bool allow_leading_zeros) {
835   size_t len = 0;
836   char first = 0;
837 
838   while (std::optional<char> c = PeekChar()) {
839     if (!IsAsciiDigit(c))
840       break;
841 
842     if (len == 0)
843       first = *c;
844 
845     ++len;
846     ConsumeChar();
847   }
848 
849   if (len == 0)
850     return false;
851 
852   if (!allow_leading_zeros && len > 1 && first == '0')
853     return false;
854 
855   return true;
856 }
857 
ConsumeLiteral()858 std::optional<Value> JSONParser::ConsumeLiteral() {
859   if (ConsumeIfMatch("true"))
860     return Value(true);
861   if (ConsumeIfMatch("false"))
862     return Value(false);
863   if (ConsumeIfMatch("null"))
864     return Value(Value::Type::NONE);
865   ReportError(JSON_SYNTAX_ERROR, 0);
866   return std::nullopt;
867 }
868 
ConsumeIfMatch(std::string_view match)869 bool JSONParser::ConsumeIfMatch(std::string_view match) {
870   if (match == PeekChars(match.size())) {
871     ConsumeChars(match.size());
872     return true;
873   }
874   return false;
875 }
876 
ReportError(JsonParseError code,int column_adjust)877 void JSONParser::ReportError(JsonParseError code, int column_adjust) {
878   error_code_ = code;
879   error_line_ = line_number_;
880   error_column_ = static_cast<int>(index_ - index_last_line_) + column_adjust;
881 
882   // For a final blank line ('\n' and then EOF), a negative column_adjust may
883   // put us below 1, which doesn't really make sense for 1-based columns.
884   if (error_column_ < 1) {
885     error_column_ = 1;
886   }
887 }
888 
889 // static
FormatErrorMessage(int line,int column,const std::string & description)890 std::string JSONParser::FormatErrorMessage(int line, int column,
891                                            const std::string& description) {
892   if (line || column) {
893     return StringPrintf("Line: %i, column: %i, %s",
894         line, column, description.c_str());
895   }
896   return description;
897 }
898 
899 }  // namespace internal
900 }  // namespace base
901