1 //! When serializing or deserializing JSON goes wrong.
2 
3 use crate::io;
4 use alloc::boxed::Box;
5 use alloc::string::{String, ToString};
6 use core::fmt::{self, Debug, Display};
7 use core::result;
8 use core::str::FromStr;
9 use serde::{de, ser};
10 #[cfg(feature = "std")]
11 use std::error;
12 #[cfg(feature = "std")]
13 use std::io::ErrorKind;
14 
15 /// This type represents all possible errors that can occur when serializing or
16 /// deserializing JSON data.
17 pub struct Error {
18     /// This `Box` allows us to keep the size of `Error` as small as possible. A
19     /// larger `Error` type was substantially slower due to all the functions
20     /// that pass around `Result<T, Error>`.
21     err: Box<ErrorImpl>,
22 }
23 
24 /// Alias for a `Result` with the error type `serde_json_lenient::Error`.
25 pub type Result<T> = result::Result<T, Error>;
26 
27 impl Error {
28     /// One-based line number at which the error was detected.
29     ///
30     /// Characters in the first line of the input (before the first newline
31     /// character) are in line 1.
line(&self) -> usize32     pub fn line(&self) -> usize {
33         self.err.line
34     }
35 
36     /// One-based column number at which the error was detected.
37     ///
38     /// The first character in the input and any characters immediately
39     /// following a newline character are in column 1.
40     ///
41     /// Note that errors may occur in column 0, for example if a read from an
42     /// I/O stream fails immediately following a previously read newline
43     /// character.
column(&self) -> usize44     pub fn column(&self) -> usize {
45         self.err.column
46     }
47 
48     /// Categorizes the cause of this error.
49     ///
50     /// - `Category::Io` - failure to read or write bytes on an I/O stream
51     /// - `Category::Syntax` - input that is not syntactically valid JSON
52     /// - `Category::Data` - input data that is semantically incorrect
53     /// - `Category::Eof` - unexpected end of the input data
classify(&self) -> Category54     pub fn classify(&self) -> Category {
55         match self.err.code {
56             ErrorCode::Message(_) => Category::Data,
57             ErrorCode::Io(_) => Category::Io,
58             ErrorCode::EofWhileParsingBlockComment
59             | ErrorCode::EofWhileParsingList
60             | ErrorCode::EofWhileParsingObject
61             | ErrorCode::EofWhileParsingString
62             | ErrorCode::EofWhileParsingValue => Category::Eof,
63             ErrorCode::ExpectedColon
64             | ErrorCode::ExpectedCommentSlashOrStar
65             | ErrorCode::ExpectedListCommaOrEnd
66             | ErrorCode::ExpectedObjectCommaOrEnd
67             | ErrorCode::ExpectedSomeIdent
68             | ErrorCode::ExpectedSomeValue
69             | ErrorCode::ExpectedDoubleQuote
70             | ErrorCode::InvalidEscape
71             | ErrorCode::InvalidNumber
72             | ErrorCode::NumberOutOfRange
73             | ErrorCode::InvalidUnicodeCodePoint
74             | ErrorCode::ControlCharacterWhileParsingString
75             | ErrorCode::KeyMustBeAString
76             | ErrorCode::ExpectedNumericKey
77             | ErrorCode::FloatKeyMustBeFinite
78             | ErrorCode::LoneLeadingSurrogateInHexEscape
79             | ErrorCode::TrailingComma
80             | ErrorCode::TrailingCharacters
81             | ErrorCode::UnexpectedEndOfHexEscape
82             | ErrorCode::RecursionLimitExceeded => Category::Syntax,
83         }
84     }
85 
86     /// Returns true if this error was caused by a failure to read or write
87     /// bytes on an I/O stream.
is_io(&self) -> bool88     pub fn is_io(&self) -> bool {
89         self.classify() == Category::Io
90     }
91 
92     /// Returns true if this error was caused by input that was not
93     /// syntactically valid JSON.
is_syntax(&self) -> bool94     pub fn is_syntax(&self) -> bool {
95         self.classify() == Category::Syntax
96     }
97 
98     /// Returns true if this error was caused by input data that was
99     /// semantically incorrect.
100     ///
101     /// For example, JSON containing a number is semantically incorrect when the
102     /// type being deserialized into holds a String.
is_data(&self) -> bool103     pub fn is_data(&self) -> bool {
104         self.classify() == Category::Data
105     }
106 
107     /// Returns true if this error was caused by prematurely reaching the end of
108     /// the input data.
109     ///
110     /// Callers that process streaming input may be interested in retrying the
111     /// deserialization once more data is available.
is_eof(&self) -> bool112     pub fn is_eof(&self) -> bool {
113         self.classify() == Category::Eof
114     }
115 
116     /// The kind reported by the underlying standard library I/O error, if this
117     /// error was caused by a failure to read or write bytes on an I/O stream.
118     ///
119     /// # Example
120     ///
121     /// ```
122     /// use serde_json_lenient::Value;
123     /// use std::io::{self, ErrorKind, Read};
124     /// use std::process;
125     ///
126     /// struct ReaderThatWillTimeOut<'a>(&'a [u8]);
127     ///
128     /// impl<'a> Read for ReaderThatWillTimeOut<'a> {
129     ///     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
130     ///         if self.0.is_empty() {
131     ///             Err(io::Error::new(ErrorKind::TimedOut, "timed out"))
132     ///         } else {
133     ///             self.0.read(buf)
134     ///         }
135     ///     }
136     /// }
137     ///
138     /// fn main() {
139     ///     let reader = ReaderThatWillTimeOut(br#" {"k": "#);
140     ///
141     ///     let _: Value = match serde_json_lenient::from_reader(reader) {
142     ///         Ok(value) => value,
143     ///         Err(error) => {
144     ///             if error.io_error_kind() == Some(ErrorKind::TimedOut) {
145     ///                 // Maybe this application needs to retry certain kinds of errors.
146     ///
147     ///                 # return;
148     ///             } else {
149     ///                 eprintln!("error: {}", error);
150     ///                 process::exit(1);
151     ///             }
152     ///         }
153     ///     };
154     /// }
155     /// ```
156     #[cfg(feature = "std")]
io_error_kind(&self) -> Option<ErrorKind>157     pub fn io_error_kind(&self) -> Option<ErrorKind> {
158         if let ErrorCode::Io(io_error) = &self.err.code {
159             Some(io_error.kind())
160         } else {
161             None
162         }
163     }
164 }
165 
166 /// Categorizes the cause of a `serde_json_lenient::Error`.
167 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
168 pub enum Category {
169     /// The error was caused by a failure to read or write bytes on an I/O
170     /// stream.
171     Io,
172 
173     /// The error was caused by input that was not syntactically valid JSON.
174     Syntax,
175 
176     /// The error was caused by input data that was semantically incorrect.
177     ///
178     /// For example, JSON containing a number is semantically incorrect when the
179     /// type being deserialized into holds a String.
180     Data,
181 
182     /// The error was caused by prematurely reaching the end of the input data.
183     ///
184     /// Callers that process streaming input may be interested in retrying the
185     /// deserialization once more data is available.
186     Eof,
187 }
188 
189 #[cfg(feature = "std")]
190 #[allow(clippy::fallible_impl_from)]
191 impl From<Error> for io::Error {
192     /// Convert a `serde_json_lenient::Error` into an `io::Error`.
193     ///
194     /// JSON syntax and data errors are turned into `InvalidData` I/O errors.
195     /// EOF errors are turned into `UnexpectedEof` I/O errors.
196     ///
197     /// ```
198     /// use std::io;
199     ///
200     /// enum MyError {
201     ///     Io(io::Error),
202     ///     Json(serde_json_lenient::Error),
203     /// }
204     ///
205     /// impl From<serde_json_lenient::Error> for MyError {
206     ///     fn from(err: serde_json_lenient::Error) -> MyError {
207     ///         use serde_json_lenient::error::Category;
208     ///         match err.classify() {
209     ///             Category::Io => {
210     ///                 MyError::Io(err.into())
211     ///             }
212     ///             Category::Syntax | Category::Data | Category::Eof => {
213     ///                 MyError::Json(err)
214     ///             }
215     ///         }
216     ///     }
217     /// }
218     /// ```
from(j: Error) -> Self219     fn from(j: Error) -> Self {
220         if let ErrorCode::Io(err) = j.err.code {
221             err
222         } else {
223             match j.classify() {
224                 Category::Io => unreachable!(),
225                 Category::Syntax | Category::Data => io::Error::new(ErrorKind::InvalidData, j),
226                 Category::Eof => io::Error::new(ErrorKind::UnexpectedEof, j),
227             }
228         }
229     }
230 }
231 
232 struct ErrorImpl {
233     code: ErrorCode,
234     line: usize,
235     column: usize,
236 }
237 
238 pub(crate) enum ErrorCode {
239     /// Catchall for syntax error messages
240     Message(Box<str>),
241 
242     /// Some I/O error occurred while serializing or deserializing.
243     Io(io::Error),
244 
245     /// Saw an opening `'/*'` without a closing `'*/'`.
246     EofWhileParsingBlockComment,
247 
248     /// EOF while parsing a list.
249     EofWhileParsingList,
250 
251     /// EOF while parsing an object.
252     EofWhileParsingObject,
253 
254     /// EOF while parsing a string.
255     EofWhileParsingString,
256 
257     /// EOF while parsing a JSON value.
258     EofWhileParsingValue,
259 
260     /// Expected this character to be a `':'`.
261     ExpectedColon,
262 
263     /// Saw a `'/'` while parsing whitespace, so expected it to be
264     /// followed by either `'/'` or `'*'`.
265     ExpectedCommentSlashOrStar,
266 
267     /// Expected this character to be either a `','` or a `']'`.
268     ExpectedListCommaOrEnd,
269 
270     /// Expected this character to be either a `','` or a `'}'`.
271     ExpectedObjectCommaOrEnd,
272 
273     /// Expected to parse either a `true`, `false`, or a `null`.
274     ExpectedSomeIdent,
275 
276     /// Expected this character to start a JSON value.
277     ExpectedSomeValue,
278 
279     /// Expected this character to be a `"`.
280     ExpectedDoubleQuote,
281 
282     /// Invalid hex escape code.
283     InvalidEscape,
284 
285     /// Invalid number.
286     InvalidNumber,
287 
288     /// Number is bigger than the maximum value of its type.
289     NumberOutOfRange,
290 
291     /// Invalid unicode code point.
292     InvalidUnicodeCodePoint,
293 
294     /// Control character found while parsing a string.
295     ControlCharacterWhileParsingString,
296 
297     /// Object key is not a string.
298     KeyMustBeAString,
299 
300     /// Contents of key were supposed to be a number.
301     ExpectedNumericKey,
302 
303     /// Object key is a non-finite float value.
304     FloatKeyMustBeFinite,
305 
306     /// Lone leading surrogate in hex escape.
307     LoneLeadingSurrogateInHexEscape,
308 
309     /// JSON has a comma after the last value in an array or map.
310     TrailingComma,
311 
312     /// JSON has non-whitespace trailing characters after the value.
313     TrailingCharacters,
314 
315     /// Unexpected end of hex escape.
316     UnexpectedEndOfHexEscape,
317 
318     /// Encountered nesting of JSON maps and arrays more than 128 layers deep.
319     RecursionLimitExceeded,
320 }
321 
322 impl Error {
323     #[cold]
syntax(code: ErrorCode, line: usize, column: usize) -> Self324     pub(crate) fn syntax(code: ErrorCode, line: usize, column: usize) -> Self {
325         Error {
326             err: Box::new(ErrorImpl { code, line, column }),
327         }
328     }
329 
330     // Not public API. Should be pub(crate).
331     //
332     // Update `eager_json` crate when this function changes.
333     #[doc(hidden)]
334     #[cold]
io(error: io::Error) -> Self335     pub fn io(error: io::Error) -> Self {
336         Error {
337             err: Box::new(ErrorImpl {
338                 code: ErrorCode::Io(error),
339                 line: 0,
340                 column: 0,
341             }),
342         }
343     }
344 
345     #[cold]
fix_position<F>(self, f: F) -> Self where F: FnOnce(ErrorCode) -> Error,346     pub(crate) fn fix_position<F>(self, f: F) -> Self
347     where
348         F: FnOnce(ErrorCode) -> Error,
349     {
350         if self.err.line == 0 {
351             f(self.err.code)
352         } else {
353             self
354         }
355     }
356 }
357 
358 impl Display for ErrorCode {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result359     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
360         match self {
361             ErrorCode::Message(msg) => f.write_str(msg),
362             ErrorCode::Io(err) => Display::fmt(err, f),
363             ErrorCode::EofWhileParsingBlockComment => {
364                 f.write_str("EOF while parsing a block comment")
365             }
366             ErrorCode::EofWhileParsingList => f.write_str("EOF while parsing a list"),
367             ErrorCode::EofWhileParsingObject => f.write_str("EOF while parsing an object"),
368             ErrorCode::EofWhileParsingString => f.write_str("EOF while parsing a string"),
369             ErrorCode::EofWhileParsingValue => f.write_str("EOF while parsing a value"),
370             ErrorCode::ExpectedColon => f.write_str("expected `:`"),
371             ErrorCode::ExpectedCommentSlashOrStar => f.write_str("expected `/` or `*` after `/`"),
372             ErrorCode::ExpectedListCommaOrEnd => f.write_str("expected `,` or `]`"),
373             ErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"),
374             ErrorCode::ExpectedSomeIdent => f.write_str("expected ident"),
375             ErrorCode::ExpectedSomeValue => f.write_str("expected value"),
376             ErrorCode::ExpectedDoubleQuote => f.write_str("expected `\"`"),
377             ErrorCode::InvalidEscape => f.write_str("invalid escape"),
378             ErrorCode::InvalidNumber => f.write_str("invalid number"),
379             ErrorCode::NumberOutOfRange => f.write_str("number out of range"),
380             ErrorCode::InvalidUnicodeCodePoint => f.write_str("invalid unicode code point"),
381             ErrorCode::ControlCharacterWhileParsingString => {
382                 f.write_str("control character (\\u0000-\\u001F) found while parsing a string")
383             }
384             ErrorCode::KeyMustBeAString => f.write_str("key must be a string"),
385             ErrorCode::ExpectedNumericKey => {
386                 f.write_str("invalid value: expected key to be a number in quotes")
387             }
388             ErrorCode::FloatKeyMustBeFinite => {
389                 f.write_str("float key must be finite (got NaN or +/-inf)")
390             }
391             ErrorCode::LoneLeadingSurrogateInHexEscape => {
392                 f.write_str("lone leading surrogate in hex escape")
393             }
394             ErrorCode::TrailingComma => f.write_str("trailing comma"),
395             ErrorCode::TrailingCharacters => f.write_str("trailing characters"),
396             ErrorCode::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"),
397             ErrorCode::RecursionLimitExceeded => f.write_str("recursion limit exceeded"),
398         }
399     }
400 }
401 
402 impl serde::de::StdError for Error {
403     #[cfg(feature = "std")]
source(&self) -> Option<&(dyn error::Error + 'static)>404     fn source(&self) -> Option<&(dyn error::Error + 'static)> {
405         match &self.err.code {
406             ErrorCode::Io(err) => err.source(),
407             _ => None,
408         }
409     }
410 }
411 
412 impl Display for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result413     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
414         Display::fmt(&*self.err, f)
415     }
416 }
417 
418 impl Display for ErrorImpl {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result419     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
420         if self.line == 0 {
421             Display::fmt(&self.code, f)
422         } else {
423             write!(
424                 f,
425                 "{} at line {} column {}",
426                 self.code, self.line, self.column
427             )
428         }
429     }
430 }
431 
432 // Remove two layers of verbosity from the debug representation. Humans often
433 // end up seeing this representation because it is what unwrap() shows.
434 impl Debug for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result435     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
436         write!(
437             f,
438             "Error({:?}, line: {}, column: {})",
439             self.err.code.to_string(),
440             self.err.line,
441             self.err.column
442         )
443     }
444 }
445 
446 impl de::Error for Error {
447     #[cold]
custom<T: Display>(msg: T) -> Error448     fn custom<T: Display>(msg: T) -> Error {
449         make_error(msg.to_string())
450     }
451 
452     #[cold]
invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self453     fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
454         Error::custom(format_args!(
455             "invalid type: {}, expected {}",
456             JsonUnexpected(unexp),
457             exp,
458         ))
459     }
460 
461     #[cold]
invalid_value(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self462     fn invalid_value(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self {
463         Error::custom(format_args!(
464             "invalid value: {}, expected {}",
465             JsonUnexpected(unexp),
466             exp,
467         ))
468     }
469 }
470 
471 impl ser::Error for Error {
472     #[cold]
custom<T: Display>(msg: T) -> Error473     fn custom<T: Display>(msg: T) -> Error {
474         make_error(msg.to_string())
475     }
476 }
477 
478 struct JsonUnexpected<'a>(de::Unexpected<'a>);
479 
480 impl<'a> Display for JsonUnexpected<'a> {
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result481     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
482         match self.0 {
483             de::Unexpected::Unit => formatter.write_str("null"),
484             de::Unexpected::Float(value) => write!(
485                 formatter,
486                 "floating point `{}`",
487                 ryu::Buffer::new().format(value),
488             ),
489             unexp => Display::fmt(&unexp, formatter),
490         }
491     }
492 }
493 
494 // Parse our own error message that looks like "{} at line {} column {}" to work
495 // around erased-serde round-tripping the error through de::Error::custom.
make_error(mut msg: String) -> Error496 fn make_error(mut msg: String) -> Error {
497     let (line, column) = parse_line_col(&mut msg).unwrap_or((0, 0));
498     Error {
499         err: Box::new(ErrorImpl {
500             code: ErrorCode::Message(msg.into_boxed_str()),
501             line,
502             column,
503         }),
504     }
505 }
506 
parse_line_col(msg: &mut String) -> Option<(usize, usize)>507 fn parse_line_col(msg: &mut String) -> Option<(usize, usize)> {
508     let start_of_suffix = match msg.rfind(" at line ") {
509         Some(index) => index,
510         None => return None,
511     };
512 
513     // Find start and end of line number.
514     let start_of_line = start_of_suffix + " at line ".len();
515     let mut end_of_line = start_of_line;
516     while starts_with_digit(&msg[end_of_line..]) {
517         end_of_line += 1;
518     }
519 
520     if !msg[end_of_line..].starts_with(" column ") {
521         return None;
522     }
523 
524     // Find start and end of column number.
525     let start_of_column = end_of_line + " column ".len();
526     let mut end_of_column = start_of_column;
527     while starts_with_digit(&msg[end_of_column..]) {
528         end_of_column += 1;
529     }
530 
531     if end_of_column < msg.len() {
532         return None;
533     }
534 
535     // Parse numbers.
536     let line = match usize::from_str(&msg[start_of_line..end_of_line]) {
537         Ok(line) => line,
538         Err(_) => return None,
539     };
540     let column = match usize::from_str(&msg[start_of_column..end_of_column]) {
541         Ok(column) => column,
542         Err(_) => return None,
543     };
544 
545     msg.truncate(start_of_suffix);
546     Some((line, column))
547 }
548 
starts_with_digit(slice: &str) -> bool549 fn starts_with_digit(slice: &str) -> bool {
550     match slice.as_bytes().first() {
551         None => false,
552         Some(&byte) => byte >= b'0' && byte <= b'9',
553     }
554 }
555