1*6777b538SAndroid Build Coastguard Worker // Copyright 2012 The Chromium Authors 2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be 3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file. 4*6777b538SAndroid Build Coastguard Worker 5*6777b538SAndroid Build Coastguard Worker #ifndef BASE_JSON_JSON_PARSER_H_ 6*6777b538SAndroid Build Coastguard Worker #define BASE_JSON_JSON_PARSER_H_ 7*6777b538SAndroid Build Coastguard Worker 8*6777b538SAndroid Build Coastguard Worker #include <stddef.h> 9*6777b538SAndroid Build Coastguard Worker #include <stdint.h> 10*6777b538SAndroid Build Coastguard Worker 11*6777b538SAndroid Build Coastguard Worker #include <memory> 12*6777b538SAndroid Build Coastguard Worker #include <optional> 13*6777b538SAndroid Build Coastguard Worker #include <string> 14*6777b538SAndroid Build Coastguard Worker #include <string_view> 15*6777b538SAndroid Build Coastguard Worker 16*6777b538SAndroid Build Coastguard Worker #include "base/base_export.h" 17*6777b538SAndroid Build Coastguard Worker #include "base/compiler_specific.h" 18*6777b538SAndroid Build Coastguard Worker #include "base/gtest_prod_util.h" 19*6777b538SAndroid Build Coastguard Worker #include "base/json/json_common.h" 20*6777b538SAndroid Build Coastguard Worker #include "base/third_party/icu/icu_utf.h" 21*6777b538SAndroid Build Coastguard Worker #include "base/values.h" 22*6777b538SAndroid Build Coastguard Worker 23*6777b538SAndroid Build Coastguard Worker namespace base { 24*6777b538SAndroid Build Coastguard Worker 25*6777b538SAndroid Build Coastguard Worker class Value; 26*6777b538SAndroid Build Coastguard Worker 27*6777b538SAndroid Build Coastguard Worker namespace internal { 28*6777b538SAndroid Build Coastguard Worker 29*6777b538SAndroid Build Coastguard Worker class JSONParserTest; 30*6777b538SAndroid Build Coastguard Worker 31*6777b538SAndroid Build Coastguard Worker // The implementation behind the JSONReader interface. This class is not meant 32*6777b538SAndroid Build Coastguard Worker // to be used directly; it encapsulates logic that need not be exposed publicly. 33*6777b538SAndroid Build Coastguard Worker // 34*6777b538SAndroid Build Coastguard Worker // This parser guarantees O(n) time through the input string. Iteration happens 35*6777b538SAndroid Build Coastguard Worker // on the byte level, with the functions ConsumeChars() and ConsumeChar(). The 36*6777b538SAndroid Build Coastguard Worker // conversion from byte to JSON token happens without advancing the parser in 37*6777b538SAndroid Build Coastguard Worker // GetNextToken/ParseToken, that is tokenization operates on the current parser 38*6777b538SAndroid Build Coastguard Worker // position without advancing. 39*6777b538SAndroid Build Coastguard Worker // 40*6777b538SAndroid Build Coastguard Worker // Built on top of these are a family of Consume functions that iterate 41*6777b538SAndroid Build Coastguard Worker // internally. Invariant: on entry of a Consume function, the parser is wound 42*6777b538SAndroid Build Coastguard Worker // to the first byte of a valid JSON token. On exit, it is on the first byte 43*6777b538SAndroid Build Coastguard Worker // after the token that was just consumed, which would likely be the first byte 44*6777b538SAndroid Build Coastguard Worker // of the next token. 45*6777b538SAndroid Build Coastguard Worker class BASE_EXPORT JSONParser { 46*6777b538SAndroid Build Coastguard Worker public: 47*6777b538SAndroid Build Coastguard Worker // Error codes during parsing. 48*6777b538SAndroid Build Coastguard Worker enum JsonParseError { 49*6777b538SAndroid Build Coastguard Worker JSON_NO_ERROR = base::ValueDeserializer::kErrorCodeNoError, 50*6777b538SAndroid Build Coastguard Worker JSON_SYNTAX_ERROR = base::ValueDeserializer::kErrorCodeInvalidFormat, 51*6777b538SAndroid Build Coastguard Worker JSON_INVALID_ESCAPE, 52*6777b538SAndroid Build Coastguard Worker JSON_UNEXPECTED_TOKEN, 53*6777b538SAndroid Build Coastguard Worker JSON_TRAILING_COMMA, 54*6777b538SAndroid Build Coastguard Worker JSON_TOO_MUCH_NESTING, 55*6777b538SAndroid Build Coastguard Worker JSON_UNEXPECTED_DATA_AFTER_ROOT, 56*6777b538SAndroid Build Coastguard Worker JSON_UNSUPPORTED_ENCODING, 57*6777b538SAndroid Build Coastguard Worker JSON_UNQUOTED_DICTIONARY_KEY, 58*6777b538SAndroid Build Coastguard Worker JSON_UNREPRESENTABLE_NUMBER, 59*6777b538SAndroid Build Coastguard Worker JSON_PARSE_ERROR_COUNT 60*6777b538SAndroid Build Coastguard Worker }; 61*6777b538SAndroid Build Coastguard Worker 62*6777b538SAndroid Build Coastguard Worker // String versions of parse error codes. 63*6777b538SAndroid Build Coastguard Worker static const char kSyntaxError[]; 64*6777b538SAndroid Build Coastguard Worker static const char kInvalidEscape[]; 65*6777b538SAndroid Build Coastguard Worker static const char kUnexpectedToken[]; 66*6777b538SAndroid Build Coastguard Worker static const char kTrailingComma[]; 67*6777b538SAndroid Build Coastguard Worker static const char kTooMuchNesting[]; 68*6777b538SAndroid Build Coastguard Worker static const char kUnexpectedDataAfterRoot[]; 69*6777b538SAndroid Build Coastguard Worker static const char kUnsupportedEncoding[]; 70*6777b538SAndroid Build Coastguard Worker static const char kUnquotedDictionaryKey[]; 71*6777b538SAndroid Build Coastguard Worker static const char kUnrepresentableNumber[]; 72*6777b538SAndroid Build Coastguard Worker 73*6777b538SAndroid Build Coastguard Worker explicit JSONParser(int options, size_t max_depth = kAbsoluteMaxDepth); 74*6777b538SAndroid Build Coastguard Worker 75*6777b538SAndroid Build Coastguard Worker JSONParser(const JSONParser&) = delete; 76*6777b538SAndroid Build Coastguard Worker JSONParser& operator=(const JSONParser&) = delete; 77*6777b538SAndroid Build Coastguard Worker 78*6777b538SAndroid Build Coastguard Worker ~JSONParser(); 79*6777b538SAndroid Build Coastguard Worker 80*6777b538SAndroid Build Coastguard Worker // Parses the input string according to the set options and returns the 81*6777b538SAndroid Build Coastguard Worker // result as a Value. 82*6777b538SAndroid Build Coastguard Worker // Wrap this in base::FooValue::From() to check the Value is of type Foo and 83*6777b538SAndroid Build Coastguard Worker // convert to a FooValue at the same time. 84*6777b538SAndroid Build Coastguard Worker std::optional<Value> Parse(std::string_view input); 85*6777b538SAndroid Build Coastguard Worker 86*6777b538SAndroid Build Coastguard Worker // Returns the error code. 87*6777b538SAndroid Build Coastguard Worker JsonParseError error_code() const; 88*6777b538SAndroid Build Coastguard Worker 89*6777b538SAndroid Build Coastguard Worker // Returns the human-friendly error message. 90*6777b538SAndroid Build Coastguard Worker std::string GetErrorMessage() const; 91*6777b538SAndroid Build Coastguard Worker 92*6777b538SAndroid Build Coastguard Worker // Returns the error line number if parse error happened. Otherwise always 93*6777b538SAndroid Build Coastguard Worker // returns 0. 94*6777b538SAndroid Build Coastguard Worker int error_line() const; 95*6777b538SAndroid Build Coastguard Worker 96*6777b538SAndroid Build Coastguard Worker // Returns the error column number if parse error happened. Otherwise always 97*6777b538SAndroid Build Coastguard Worker // returns 0. 98*6777b538SAndroid Build Coastguard Worker int error_column() const; 99*6777b538SAndroid Build Coastguard Worker 100*6777b538SAndroid Build Coastguard Worker private: 101*6777b538SAndroid Build Coastguard Worker enum Token { 102*6777b538SAndroid Build Coastguard Worker T_OBJECT_BEGIN, // { 103*6777b538SAndroid Build Coastguard Worker T_OBJECT_END, // } 104*6777b538SAndroid Build Coastguard Worker T_ARRAY_BEGIN, // [ 105*6777b538SAndroid Build Coastguard Worker T_ARRAY_END, // ] 106*6777b538SAndroid Build Coastguard Worker T_STRING, 107*6777b538SAndroid Build Coastguard Worker T_NUMBER, 108*6777b538SAndroid Build Coastguard Worker T_BOOL_TRUE, // true 109*6777b538SAndroid Build Coastguard Worker T_BOOL_FALSE, // false 110*6777b538SAndroid Build Coastguard Worker T_NULL, // null 111*6777b538SAndroid Build Coastguard Worker T_LIST_SEPARATOR, // , 112*6777b538SAndroid Build Coastguard Worker T_OBJECT_PAIR_SEPARATOR, // : 113*6777b538SAndroid Build Coastguard Worker T_END_OF_INPUT, 114*6777b538SAndroid Build Coastguard Worker T_INVALID_TOKEN, 115*6777b538SAndroid Build Coastguard Worker }; 116*6777b538SAndroid Build Coastguard Worker 117*6777b538SAndroid Build Coastguard Worker // A helper class used for parsing strings. One optimization performed is to 118*6777b538SAndroid Build Coastguard Worker // create base::Value with a std::string_view to avoid unnecessary std::string 119*6777b538SAndroid Build Coastguard Worker // copies. This is not possible if the input string needs to be decoded from 120*6777b538SAndroid Build Coastguard Worker // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped. 121*6777b538SAndroid Build Coastguard Worker // This class centralizes that logic. 122*6777b538SAndroid Build Coastguard Worker class StringBuilder { 123*6777b538SAndroid Build Coastguard Worker public: 124*6777b538SAndroid Build Coastguard Worker // Empty constructor. Used for creating a builder with which to assign to. 125*6777b538SAndroid Build Coastguard Worker StringBuilder(); 126*6777b538SAndroid Build Coastguard Worker 127*6777b538SAndroid Build Coastguard Worker // |pos| is the beginning of an input string, excluding the |"|. 128*6777b538SAndroid Build Coastguard Worker explicit StringBuilder(const char* pos); 129*6777b538SAndroid Build Coastguard Worker 130*6777b538SAndroid Build Coastguard Worker ~StringBuilder(); 131*6777b538SAndroid Build Coastguard Worker 132*6777b538SAndroid Build Coastguard Worker StringBuilder& operator=(StringBuilder&& other); 133*6777b538SAndroid Build Coastguard Worker 134*6777b538SAndroid Build Coastguard Worker // Appends the Unicode code point |point| to the string, either by 135*6777b538SAndroid Build Coastguard Worker // increasing the |length_| of the string if the string has not been 136*6777b538SAndroid Build Coastguard Worker // converted, or by appending the UTF8 bytes for the code point. 137*6777b538SAndroid Build Coastguard Worker void Append(base_icu::UChar32 point); 138*6777b538SAndroid Build Coastguard Worker 139*6777b538SAndroid Build Coastguard Worker // Converts the builder from its default std::string_view to a full 140*6777b538SAndroid Build Coastguard Worker // std::string, performing a copy. Once a builder is converted, it cannot be 141*6777b538SAndroid Build Coastguard Worker // made a std::string_view again. 142*6777b538SAndroid Build Coastguard Worker void Convert(); 143*6777b538SAndroid Build Coastguard Worker 144*6777b538SAndroid Build Coastguard Worker // Returns the builder as a string, invalidating all state. This allows 145*6777b538SAndroid Build Coastguard Worker // the internal string buffer representation to be destructively moved 146*6777b538SAndroid Build Coastguard Worker // in cases where the builder will not be needed any more. 147*6777b538SAndroid Build Coastguard Worker std::string DestructiveAsString(); 148*6777b538SAndroid Build Coastguard Worker 149*6777b538SAndroid Build Coastguard Worker private: 150*6777b538SAndroid Build Coastguard Worker // The beginning of the input string. 151*6777b538SAndroid Build Coastguard Worker const char* pos_; 152*6777b538SAndroid Build Coastguard Worker 153*6777b538SAndroid Build Coastguard Worker // Number of bytes in |pos_| that make up the string being built. 154*6777b538SAndroid Build Coastguard Worker size_t length_; 155*6777b538SAndroid Build Coastguard Worker 156*6777b538SAndroid Build Coastguard Worker // The copied string representation. Will be unset until Convert() is 157*6777b538SAndroid Build Coastguard Worker // called. 158*6777b538SAndroid Build Coastguard Worker std::optional<std::string> string_; 159*6777b538SAndroid Build Coastguard Worker }; 160*6777b538SAndroid Build Coastguard Worker 161*6777b538SAndroid Build Coastguard Worker // Returns the next |count| bytes of the input stream, or nullopt if fewer 162*6777b538SAndroid Build Coastguard Worker // than |count| bytes remain. 163*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> PeekChars(size_t count); 164*6777b538SAndroid Build Coastguard Worker 165*6777b538SAndroid Build Coastguard Worker // Calls PeekChars() with a |count| of 1. 166*6777b538SAndroid Build Coastguard Worker std::optional<char> PeekChar(); 167*6777b538SAndroid Build Coastguard Worker 168*6777b538SAndroid Build Coastguard Worker // Returns the next |count| bytes of the input stream, or nullopt if fewer 169*6777b538SAndroid Build Coastguard Worker // than |count| bytes remain, and advances the parser position by |count|. 170*6777b538SAndroid Build Coastguard Worker std::optional<std::string_view> ConsumeChars(size_t count); 171*6777b538SAndroid Build Coastguard Worker 172*6777b538SAndroid Build Coastguard Worker // Calls ConsumeChars() with a |count| of 1. 173*6777b538SAndroid Build Coastguard Worker std::optional<char> ConsumeChar(); 174*6777b538SAndroid Build Coastguard Worker 175*6777b538SAndroid Build Coastguard Worker // Returns a pointer to the current character position. 176*6777b538SAndroid Build Coastguard Worker const char* pos(); 177*6777b538SAndroid Build Coastguard Worker 178*6777b538SAndroid Build Coastguard Worker // Skips over whitespace and comments to find the next token in the stream. 179*6777b538SAndroid Build Coastguard Worker // This does not advance the parser for non-whitespace or comment chars. 180*6777b538SAndroid Build Coastguard Worker Token GetNextToken(); 181*6777b538SAndroid Build Coastguard Worker 182*6777b538SAndroid Build Coastguard Worker // Consumes whitespace characters and comments until the next non-that is 183*6777b538SAndroid Build Coastguard Worker // encountered. 184*6777b538SAndroid Build Coastguard Worker void EatWhitespaceAndComments(); 185*6777b538SAndroid Build Coastguard Worker // Helper function that consumes a comment, assuming that the parser is 186*6777b538SAndroid Build Coastguard Worker // currently wound to a '/'. 187*6777b538SAndroid Build Coastguard Worker bool EatComment(); 188*6777b538SAndroid Build Coastguard Worker 189*6777b538SAndroid Build Coastguard Worker // Calls GetNextToken() and then ParseToken(). 190*6777b538SAndroid Build Coastguard Worker std::optional<Value> ParseNextToken(); 191*6777b538SAndroid Build Coastguard Worker 192*6777b538SAndroid Build Coastguard Worker // Takes a token that represents the start of a Value ("a structural token" 193*6777b538SAndroid Build Coastguard Worker // in RFC terms) and consumes it, returning the result as a Value. 194*6777b538SAndroid Build Coastguard Worker std::optional<Value> ParseToken(Token token); 195*6777b538SAndroid Build Coastguard Worker 196*6777b538SAndroid Build Coastguard Worker // Assuming that the parser is currently wound to '{', this parses a JSON 197*6777b538SAndroid Build Coastguard Worker // object into a Value. 198*6777b538SAndroid Build Coastguard Worker std::optional<Value> ConsumeDictionary(); 199*6777b538SAndroid Build Coastguard Worker 200*6777b538SAndroid Build Coastguard Worker // Assuming that the parser is wound to '[', this parses a JSON list into a 201*6777b538SAndroid Build Coastguard Worker // Value. 202*6777b538SAndroid Build Coastguard Worker std::optional<Value> ConsumeList(); 203*6777b538SAndroid Build Coastguard Worker 204*6777b538SAndroid Build Coastguard Worker // Calls through ConsumeStringRaw and wraps it in a value. 205*6777b538SAndroid Build Coastguard Worker std::optional<Value> ConsumeString(); 206*6777b538SAndroid Build Coastguard Worker 207*6777b538SAndroid Build Coastguard Worker // Assuming that the parser is wound to a double quote, this parses a string, 208*6777b538SAndroid Build Coastguard Worker // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on 209*6777b538SAndroid Build Coastguard Worker // success and places result into |out|. Returns false on failure with 210*6777b538SAndroid Build Coastguard Worker // error information set. 211*6777b538SAndroid Build Coastguard Worker bool ConsumeStringRaw(StringBuilder* out); 212*6777b538SAndroid Build Coastguard Worker // Helper function for ConsumeStringRaw() that consumes the next four or 10 213*6777b538SAndroid Build Coastguard Worker // bytes (parser is wound to the first character of a HEX sequence, with the 214*6777b538SAndroid Build Coastguard Worker // potential for consuming another \uXXXX for a surrogate). Returns true on 215*6777b538SAndroid Build Coastguard Worker // success and places the code point |out_code_point|, and false on failure. 216*6777b538SAndroid Build Coastguard Worker bool DecodeUTF16(base_icu::UChar32* out_code_point); 217*6777b538SAndroid Build Coastguard Worker 218*6777b538SAndroid Build Coastguard Worker // Assuming that the parser is wound to the start of a valid JSON number, 219*6777b538SAndroid Build Coastguard Worker // this parses and converts it to either an int or double value. 220*6777b538SAndroid Build Coastguard Worker std::optional<Value> ConsumeNumber(); 221*6777b538SAndroid Build Coastguard Worker // Helper that reads characters that are ints. Returns true if a number was 222*6777b538SAndroid Build Coastguard Worker // read and false on error. 223*6777b538SAndroid Build Coastguard Worker bool ReadInt(bool allow_leading_zeros); 224*6777b538SAndroid Build Coastguard Worker 225*6777b538SAndroid Build Coastguard Worker // Consumes the literal values of |true|, |false|, and |null|, assuming the 226*6777b538SAndroid Build Coastguard Worker // parser is wound to the first character of any of those. 227*6777b538SAndroid Build Coastguard Worker std::optional<Value> ConsumeLiteral(); 228*6777b538SAndroid Build Coastguard Worker 229*6777b538SAndroid Build Coastguard Worker // Helper function that returns true if the byte squence |match| can be 230*6777b538SAndroid Build Coastguard Worker // consumed at the current parser position. Returns false if there are fewer 231*6777b538SAndroid Build Coastguard Worker // than |match|-length bytes or if the sequence does not match, and the 232*6777b538SAndroid Build Coastguard Worker // parser state is unchanged. 233*6777b538SAndroid Build Coastguard Worker bool ConsumeIfMatch(std::string_view match); 234*6777b538SAndroid Build Coastguard Worker 235*6777b538SAndroid Build Coastguard Worker // Sets the error information to |code| at the current column, based on 236*6777b538SAndroid Build Coastguard Worker // |index_| and |index_last_line_|, with an optional positive/negative 237*6777b538SAndroid Build Coastguard Worker // adjustment by |column_adjust|. 238*6777b538SAndroid Build Coastguard Worker void ReportError(JsonParseError code, int column_adjust); 239*6777b538SAndroid Build Coastguard Worker 240*6777b538SAndroid Build Coastguard Worker // Given the line and column number of an error, formats one of the error 241*6777b538SAndroid Build Coastguard Worker // message contants from json_reader.h for human display. 242*6777b538SAndroid Build Coastguard Worker static std::string FormatErrorMessage(int line, int column, 243*6777b538SAndroid Build Coastguard Worker const std::string& description); 244*6777b538SAndroid Build Coastguard Worker 245*6777b538SAndroid Build Coastguard Worker // base::JSONParserOptions that control parsing. 246*6777b538SAndroid Build Coastguard Worker const int options_; 247*6777b538SAndroid Build Coastguard Worker 248*6777b538SAndroid Build Coastguard Worker // Maximum depth to parse. 249*6777b538SAndroid Build Coastguard Worker const size_t max_depth_; 250*6777b538SAndroid Build Coastguard Worker 251*6777b538SAndroid Build Coastguard Worker // The input stream being parsed. Note: Not guaranteed to NUL-terminated. 252*6777b538SAndroid Build Coastguard Worker std::string_view input_; 253*6777b538SAndroid Build Coastguard Worker 254*6777b538SAndroid Build Coastguard Worker // The index in the input stream to which the parser is wound. 255*6777b538SAndroid Build Coastguard Worker size_t index_; 256*6777b538SAndroid Build Coastguard Worker 257*6777b538SAndroid Build Coastguard Worker // The number of times the parser has recursed (current stack depth). 258*6777b538SAndroid Build Coastguard Worker size_t stack_depth_; 259*6777b538SAndroid Build Coastguard Worker 260*6777b538SAndroid Build Coastguard Worker // The line number that the parser is at currently. 261*6777b538SAndroid Build Coastguard Worker int line_number_; 262*6777b538SAndroid Build Coastguard Worker 263*6777b538SAndroid Build Coastguard Worker // The last value of |index_| on the previous line. 264*6777b538SAndroid Build Coastguard Worker size_t index_last_line_; 265*6777b538SAndroid Build Coastguard Worker 266*6777b538SAndroid Build Coastguard Worker // Error information. 267*6777b538SAndroid Build Coastguard Worker JsonParseError error_code_; 268*6777b538SAndroid Build Coastguard Worker int error_line_; 269*6777b538SAndroid Build Coastguard Worker int error_column_; 270*6777b538SAndroid Build Coastguard Worker 271*6777b538SAndroid Build Coastguard Worker friend class JSONParserTest; 272*6777b538SAndroid Build Coastguard Worker FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar); 273*6777b538SAndroid Build Coastguard Worker FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary); 274*6777b538SAndroid Build Coastguard Worker FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList); 275*6777b538SAndroid Build Coastguard Worker FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString); 276*6777b538SAndroid Build Coastguard Worker FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals); 277*6777b538SAndroid Build Coastguard Worker FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers); 278*6777b538SAndroid Build Coastguard Worker FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages); 279*6777b538SAndroid Build Coastguard Worker }; 280*6777b538SAndroid Build Coastguard Worker 281*6777b538SAndroid Build Coastguard Worker // Used when decoding and an invalid utf-8 sequence is encountered. 282*6777b538SAndroid Build Coastguard Worker BASE_EXPORT extern const char kUnicodeReplacementString[]; 283*6777b538SAndroid Build Coastguard Worker 284*6777b538SAndroid Build Coastguard Worker } // namespace internal 285*6777b538SAndroid Build Coastguard Worker } // namespace base 286*6777b538SAndroid Build Coastguard Worker 287*6777b538SAndroid Build Coastguard Worker #endif // BASE_JSON_JSON_PARSER_H_ 288