1 //! When serializing or deserializing JSON goes wrong.
2 
3 use crate::io;
4 use alloc::boxed::Box;
5 use alloc::string::{String, ToString};
6 use core::fmt::{self, Debug, Display};
7 use core::result;
8 use core::str::FromStr;
9 use serde::{de, ser};
10 #[cfg(feature = "std")]
11 use std::error;
12 
13 /// This type represents all possible errors that can occur when serializing or
14 /// deserializing JSON data.
15 pub struct Error {
16     /// This `Box` allows us to keep the size of `Error` as small as possible. A
17     /// larger `Error` type was substantially slower due to all the functions
18     /// that pass around `Result<T, Error>`.
19     err: Box<ErrorImpl>,
20 }
21 
22 /// Alias for a `Result` with the error type `serde_json::Error`.
23 pub type Result<T> = result::Result<T, Error>;
24 
25 impl Error {
26     /// One-based line number at which the error was detected.
27     ///
28     /// Characters in the first line of the input (before the first newline
29     /// character) are in line 1.
line(&self) -> usize30     pub fn line(&self) -> usize {
31         self.err.line
32     }
33 
34     /// One-based column number at which the error was detected.
35     ///
36     /// The first character in the input and any characters immediately
37     /// following a newline character are in column 1.
38     ///
39     /// Note that errors may occur in column 0, for example if a read from an IO
40     /// stream fails immediately following a previously read newline character.
column(&self) -> usize41     pub fn column(&self) -> usize {
42         self.err.column
43     }
44 
45     /// Categorizes the cause of this error.
46     ///
47     /// - `Category::Io` - failure to read or write bytes on an IO stream
48     /// - `Category::Syntax` - input that is not syntactically valid JSON
49     /// - `Category::Data` - input data that is semantically incorrect
50     /// - `Category::Eof` - unexpected end of the input data
classify(&self) -> Category51     pub fn classify(&self) -> Category {
52         match self.err.code {
53             ErrorCode::Message(_) => Category::Data,
54             ErrorCode::Io(_) => Category::Io,
55             ErrorCode::EofWhileParsingList
56             | ErrorCode::EofWhileParsingObject
57             | ErrorCode::EofWhileParsingString
58             | ErrorCode::EofWhileParsingValue => Category::Eof,
59             ErrorCode::ExpectedColon
60             | ErrorCode::ExpectedListCommaOrEnd
61             | ErrorCode::ExpectedObjectCommaOrEnd
62             | ErrorCode::ExpectedSomeIdent
63             | ErrorCode::ExpectedSomeValue
64             | ErrorCode::InvalidEscape
65             | ErrorCode::InvalidNumber
66             | ErrorCode::NumberOutOfRange
67             | ErrorCode::InvalidUnicodeCodePoint
68             | ErrorCode::ControlCharacterWhileParsingString
69             | ErrorCode::KeyMustBeAString
70             | ErrorCode::LoneLeadingSurrogateInHexEscape
71             | ErrorCode::TrailingComma
72             | ErrorCode::TrailingCharacters
73             | ErrorCode::UnexpectedEndOfHexEscape
74             | ErrorCode::RecursionLimitExceeded => Category::Syntax,
75         }
76     }
77 
78     /// Returns true if this error was caused by a failure to read or write
79     /// bytes on an IO stream.
is_io(&self) -> bool80     pub fn is_io(&self) -> bool {
81         self.classify() == Category::Io
82     }
83 
84     /// Returns true if this error was caused by input that was not
85     /// syntactically valid JSON.
is_syntax(&self) -> bool86     pub fn is_syntax(&self) -> bool {
87         self.classify() == Category::Syntax
88     }
89 
90     /// Returns true if this error was caused by input data that was
91     /// semantically incorrect.
92     ///
93     /// For example, JSON containing a number is semantically incorrect when the
94     /// type being deserialized into holds a String.
is_data(&self) -> bool95     pub fn is_data(&self) -> bool {
96         self.classify() == Category::Data
97     }
98 
99     /// Returns true if this error was caused by prematurely reaching the end of
100     /// the input data.
101     ///
102     /// Callers that process streaming input may be interested in retrying the
103     /// deserialization once more data is available.
is_eof(&self) -> bool104     pub fn is_eof(&self) -> bool {
105         self.classify() == Category::Eof
106     }
107 }
108 
109 /// Categorizes the cause of a `serde_json::Error`.
110 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
111 pub enum Category {
112     /// The error was caused by a failure to read or write bytes on an IO
113     /// stream.
114     Io,
115 
116     /// The error was caused by input that was not syntactically valid JSON.
117     Syntax,
118 
119     /// The error was caused by input data that was semantically incorrect.
120     ///
121     /// For example, JSON containing a number is semantically incorrect when the
122     /// type being deserialized into holds a String.
123     Data,
124 
125     /// The error was caused by prematurely reaching the end of the input data.
126     ///
127     /// Callers that process streaming input may be interested in retrying the
128     /// deserialization once more data is available.
129     Eof,
130 }
131 
132 #[cfg(feature = "std")]
133 #[allow(clippy::fallible_impl_from)]
134 impl From<Error> for io::Error {
135     /// Convert a `serde_json::Error` into an `io::Error`.
136     ///
137     /// JSON syntax and data errors are turned into `InvalidData` IO errors.
138     /// EOF errors are turned into `UnexpectedEof` IO errors.
139     ///
140     /// ```
141     /// use std::io;
142     ///
143     /// enum MyError {
144     ///     Io(io::Error),
145     ///     Json(serde_json::Error),
146     /// }
147     ///
148     /// impl From<serde_json::Error> for MyError {
149     ///     fn from(err: serde_json::Error) -> MyError {
150     ///         use serde_json::error::Category;
151     ///         match err.classify() {
152     ///             Category::Io => {
153     ///                 MyError::Io(err.into())
154     ///             }
155     ///             Category::Syntax | Category::Data | Category::Eof => {
156     ///                 MyError::Json(err)
157     ///             }
158     ///         }
159     ///     }
160     /// }
161     /// ```
from(j: Error) -> Self162     fn from(j: Error) -> Self {
163         if let ErrorCode::Io(err) = j.err.code {
164             err
165         } else {
166             match j.classify() {
167                 Category::Io => unreachable!(),
168                 Category::Syntax | Category::Data => io::Error::new(io::ErrorKind::InvalidData, j),
169                 Category::Eof => io::Error::new(io::ErrorKind::UnexpectedEof, j),
170             }
171         }
172     }
173 }
174 
175 struct ErrorImpl {
176     code: ErrorCode,
177     line: usize,
178     column: usize,
179 }
180 
181 pub(crate) enum ErrorCode {
182     /// Catchall for syntax error messages
183     Message(Box<str>),
184 
185     /// Some IO error occurred while serializing or deserializing.
186     Io(io::Error),
187 
188     /// EOF while parsing a list.
189     EofWhileParsingList,
190 
191     /// EOF while parsing an object.
192     EofWhileParsingObject,
193 
194     /// EOF while parsing a string.
195     EofWhileParsingString,
196 
197     /// EOF while parsing a JSON value.
198     EofWhileParsingValue,
199 
200     /// Expected this character to be a `':'`.
201     ExpectedColon,
202 
203     /// Expected this character to be either a `','` or a `']'`.
204     ExpectedListCommaOrEnd,
205 
206     /// Expected this character to be either a `','` or a `'}'`.
207     ExpectedObjectCommaOrEnd,
208 
209     /// Expected to parse either a `true`, `false`, or a `null`.
210     ExpectedSomeIdent,
211 
212     /// Expected this character to start a JSON value.
213     ExpectedSomeValue,
214 
215     /// Invalid hex escape code.
216     InvalidEscape,
217 
218     /// Invalid number.
219     InvalidNumber,
220 
221     /// Number is bigger than the maximum value of its type.
222     NumberOutOfRange,
223 
224     /// Invalid unicode code point.
225     InvalidUnicodeCodePoint,
226 
227     /// Control character found while parsing a string.
228     ControlCharacterWhileParsingString,
229 
230     /// Object key is not a string.
231     KeyMustBeAString,
232 
233     /// Lone leading surrogate in hex escape.
234     LoneLeadingSurrogateInHexEscape,
235 
236     /// JSON has a comma after the last value in an array or map.
237     TrailingComma,
238 
239     /// JSON has non-whitespace trailing characters after the value.
240     TrailingCharacters,
241 
242     /// Unexpected end of hex escape.
243     UnexpectedEndOfHexEscape,
244 
245     /// Encountered nesting of JSON maps and arrays more than 128 layers deep.
246     RecursionLimitExceeded,
247 }
248 
249 impl Error {
250     #[cold]
syntax(code: ErrorCode, line: usize, column: usize) -> Self251     pub(crate) fn syntax(code: ErrorCode, line: usize, column: usize) -> Self {
252         Error {
253             err: Box::new(ErrorImpl { code, line, column }),
254         }
255     }
256 
257     // Not public API. Should be pub(crate).
258     //
259     // Update `eager_json` crate when this function changes.
260     #[doc(hidden)]
261     #[cold]
io(error: io::Error) -> Self262     pub fn io(error: io::Error) -> Self {
263         Error {
264             err: Box::new(ErrorImpl {
265                 code: ErrorCode::Io(error),
266                 line: 0,
267                 column: 0,
268             }),
269         }
270     }
271 
272     #[cold]
fix_position<F>(self, f: F) -> Self where F: FnOnce(ErrorCode) -> Error,273     pub(crate) fn fix_position<F>(self, f: F) -> Self
274     where
275         F: FnOnce(ErrorCode) -> Error,
276     {
277         if self.err.line == 0 {
278             f(self.err.code)
279         } else {
280             self
281         }
282     }
283 }
284 
285 impl Display for ErrorCode {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result286     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
287         match self {
288             ErrorCode::Message(msg) => f.write_str(msg),
289             ErrorCode::Io(err) => Display::fmt(err, f),
290             ErrorCode::EofWhileParsingList => f.write_str("EOF while parsing a list"),
291             ErrorCode::EofWhileParsingObject => f.write_str("EOF while parsing an object"),
292             ErrorCode::EofWhileParsingString => f.write_str("EOF while parsing a string"),
293             ErrorCode::EofWhileParsingValue => f.write_str("EOF while parsing a value"),
294             ErrorCode::ExpectedColon => f.write_str("expected `:`"),
295             ErrorCode::ExpectedListCommaOrEnd => f.write_str("expected `,` or `]`"),
296             ErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"),
297             ErrorCode::ExpectedSomeIdent => f.write_str("expected ident"),
298             ErrorCode::ExpectedSomeValue => f.write_str("expected value"),
299             ErrorCode::InvalidEscape => f.write_str("invalid escape"),
300             ErrorCode::InvalidNumber => f.write_str("invalid number"),
301             ErrorCode::NumberOutOfRange => f.write_str("number out of range"),
302             ErrorCode::InvalidUnicodeCodePoint => f.write_str("invalid unicode code point"),
303             ErrorCode::ControlCharacterWhileParsingString => {
304                 f.write_str("control character (\\u0000-\\u001F) found while parsing a string")
305             }
306             ErrorCode::KeyMustBeAString => f.write_str("key must be a string"),
307             ErrorCode::LoneLeadingSurrogateInHexEscape => {
308                 f.write_str("lone leading surrogate in hex escape")
309             }
310             ErrorCode::TrailingComma => f.write_str("trailing comma"),
311             ErrorCode::TrailingCharacters => f.write_str("trailing characters"),
312             ErrorCode::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"),
313             ErrorCode::RecursionLimitExceeded => f.write_str("recursion limit exceeded"),
314         }
315     }
316 }
317 
318 impl serde::de::StdError for Error {
319     #[cfg(feature = "std")]
source(&self) -> Option<&(dyn error::Error + 'static)>320     fn source(&self) -> Option<&(dyn error::Error + 'static)> {
321         match &self.err.code {
322             ErrorCode::Io(err) => err.source(),
323             _ => None,
324         }
325     }
326 }
327 
328 impl Display for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result329     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
330         Display::fmt(&*self.err, f)
331     }
332 }
333 
334 impl Display for ErrorImpl {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result335     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
336         if self.line == 0 {
337             Display::fmt(&self.code, f)
338         } else {
339             write!(
340                 f,
341                 "{} at line {} column {}",
342                 self.code, self.line, self.column
343             )
344         }
345     }
346 }
347 
348 // Remove two layers of verbosity from the debug representation. Humans often
349 // end up seeing this representation because it is what unwrap() shows.
350 impl Debug for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result351     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
352         write!(
353             f,
354             "Error({:?}, line: {}, column: {})",
355             self.err.code.to_string(),
356             self.err.line,
357             self.err.column
358         )
359     }
360 }
361 
362 impl de::Error for Error {
363     #[cold]
custom<T: Display>(msg: T) -> Error364     fn custom<T: Display>(msg: T) -> Error {
365         make_error(msg.to_string())
366     }
367 
368     #[cold]
invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self369     fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
370         if let de::Unexpected::Unit = unexp {
371             Error::custom(format_args!("invalid type: null, expected {}", exp))
372         } else {
373             Error::custom(format_args!("invalid type: {}, expected {}", unexp, exp))
374         }
375     }
376 }
377 
378 impl ser::Error for Error {
379     #[cold]
custom<T: Display>(msg: T) -> Error380     fn custom<T: Display>(msg: T) -> Error {
381         make_error(msg.to_string())
382     }
383 }
384 
385 // Parse our own error message that looks like "{} at line {} column {}" to work
386 // around erased-serde round-tripping the error through de::Error::custom.
make_error(mut msg: String) -> Error387 fn make_error(mut msg: String) -> Error {
388     let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0));
389     Error {
390         err: Box::new(ErrorImpl {
391             code: ErrorCode::Message(msg.into_boxed_str()),
392             line,
393             column,
394         }),
395     }
396 }
397 
parse_line_col(msg: &mut String) -> Option<(usize, usize)>398 fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> {
399     let start_of_suffix = match msg.rfind(" at line ") {
400         Some(index) => index,
401         None => return None,
402     };
403 
404     // Find start and end of line number.
405     let start_of_line = start_of_suffix + " at line ".len();
406     let mut end_of_line = start_of_line;
407     while starts_with_digit(&msg[end_of_line..]) {
408         end_of_line += 1;
409     }
410 
411     if !msg[end_of_line..].starts_with(" column ") {
412         return None;
413     }
414 
415     // Find start and end of column number.
416     let start_of_column = end_of_line + " column ".len();
417     let mut end_of_column = start_of_column;
418     while starts_with_digit(&msg[end_of_column..]) {
419         end_of_column += 1;
420     }
421 
422     if end_of_column < msg.len() {
423         return None;
424     }
425 
426     // Parse numbers.
427     let line = match usize::from_str(&msg[start_of_line..end_of_line]) {
428         Ok(line) => line,
429         Err(_) => return None,
430     };
431     let column = match usize::from_str(&msg[start_of_column..end_of_column]) {
432         Ok(column) => column,
433         Err(_) => return None,
434     };
435 
436     msg.truncate(start_of_suffix);
437     Some((line, column))
438 }
439 
starts_with_digit(slice: &str) -> bool440 fn starts_with_digit(slice: &str) -> bool {
441     match slice.as_bytes().first() {
442         None => false,
443         Some(&byte) => byte >= b'0' && byte <= b'9',
444     }
445 }
446