1 use crate::error::{Error, ErrorCode, Result};
2 use alloc::vec::Vec;
3 use core::char;
4 use core::cmp;
5 use core::ops::Deref;
6 use core::str;
7 
8 #[cfg(feature = "std")]
9 use crate::io;
10 #[cfg(feature = "std")]
11 use crate::iter::LineColIterator;
12 
13 #[cfg(feature = "raw_value")]
14 use crate::raw::BorrowedRawDeserializer;
15 #[cfg(all(feature = "raw_value", feature = "std"))]
16 use crate::raw::OwnedRawDeserializer;
17 #[cfg(all(feature = "raw_value", feature = "std"))]
18 use alloc::string::String;
19 #[cfg(feature = "raw_value")]
20 use serde::de::Visitor;
21 
22 /// Trait used by the deserializer for iterating over input. This is manually
23 /// "specialized" for iterating over &[u8]. Once feature(specialization) is
24 /// stable we can use actual specialization.
25 ///
26 /// This trait is sealed and cannot be implemented for types outside of
27 /// `serde_json_lenient`.
28 pub trait Read<'de>: private::Sealed {
29     #[doc(hidden)]
next(&mut self) -> Result<Option<u8>>30     fn next(&mut self) -> Result<Option<u8>>;
31     #[doc(hidden)]
peek(&mut self) -> Result<Option<u8>>32     fn peek(&mut self) -> Result<Option<u8>>;
33 
34     /// Only valid after a call to peek(). Discards the peeked byte.
35     #[doc(hidden)]
discard(&mut self)36     fn discard(&mut self);
37 
38     /// Position of the most recent call to next().
39     ///
40     /// The most recent call was probably next() and not peek(), but this method
41     /// should try to return a sensible result if the most recent call was
42     /// actually peek() because we don't always know.
43     ///
44     /// Only called in case of an error, so performance is not important.
45     #[doc(hidden)]
position(&self) -> Position46     fn position(&self) -> Position;
47 
48     /// Position of the most recent call to peek().
49     ///
50     /// The most recent call was probably peek() and not next(), but this method
51     /// should try to return a sensible result if the most recent call was
52     /// actually next() because we don't always know.
53     ///
54     /// Only called in case of an error, so performance is not important.
55     #[doc(hidden)]
peek_position(&self) -> Position56     fn peek_position(&self) -> Position;
57 
58     /// Offset from the beginning of the input to the next byte that would be
59     /// returned by next() or peek().
60     #[doc(hidden)]
byte_offset(&self) -> usize61     fn byte_offset(&self) -> usize;
62 
63     /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
64     /// string until the next quotation mark using the given scratch space if
65     /// necessary. The scratch space is initially empty.
66     #[doc(hidden)]
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>67     fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
68 
69     /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
70     /// string until the next quotation mark using the given scratch space if
71     /// necessary. The scratch space is initially empty.
72     ///
73     /// This function returns the raw bytes in the string with escape sequences
74     /// expanded but without performing unicode validation.
75     #[doc(hidden)]
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>76     fn parse_str_raw<'s>(
77         &'s mut self,
78         scratch: &'s mut Vec<u8>,
79     ) -> Result<Reference<'de, 's, [u8]>>;
80 
81     /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
82     /// string until the next quotation mark but discards the data.
83     #[doc(hidden)]
ignore_str(&mut self) -> Result<()>84     fn ignore_str(&mut self) -> Result<()>;
85 
86     /// Assumes the previous byte was a hex escape sequence ('\u') in a string.
87     /// Parses next hexadecimal sequence.
88     #[doc(hidden)]
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>89     fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>;
90 
91     /// Switch raw buffering mode on.
92     ///
93     /// This is used when deserializing `RawValue`.
94     #[cfg(feature = "raw_value")]
95     #[doc(hidden)]
begin_raw_buffering(&mut self)96     fn begin_raw_buffering(&mut self);
97 
98     /// Switch raw buffering mode off and provides the raw buffered data to the
99     /// given visitor.
100     #[cfg(feature = "raw_value")]
101     #[doc(hidden)]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>102     fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
103     where
104         V: Visitor<'de>;
105 
106     /// Whether we should replace invalid unicode characters with \u{fffd}.
replace_invalid_unicode(&self) -> bool107     fn replace_invalid_unicode(&self) -> bool;
108 
109     /// Allow \v escapes
allow_v_escapes(&self) -> bool110     fn allow_v_escapes(&self) -> bool;
111 
112     /// Allow \x escapes
allow_x_escapes(&self) -> bool113     fn allow_x_escapes(&self) -> bool;
114 
115     /// Whether StreamDeserializer::next needs to check the failed flag. True
116     /// for IoRead, false for StrRead and SliceRead which can track failure by
117     /// truncating their input slice to avoid the extra check on every next
118     /// call.
119     #[doc(hidden)]
120     const should_early_return_if_failed: bool;
121 
122     /// Mark a persistent failure of StreamDeserializer, either by setting the
123     /// flag or by truncating the input data.
124     #[doc(hidden)]
set_failed(&mut self, failed: &mut bool)125     fn set_failed(&mut self, failed: &mut bool);
126 }
127 
128 pub struct Position {
129     pub line: usize,
130     pub column: usize,
131 }
132 
133 pub enum Reference<'b, 'c, T>
134 where
135     T: ?Sized + 'static,
136 {
137     Borrowed(&'b T),
138     Copied(&'c T),
139 }
140 
141 impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
142 where
143     T: ?Sized + 'static,
144 {
145     type Target = T;
146 
deref(&self) -> &Self::Target147     fn deref(&self) -> &Self::Target {
148         match *self {
149             Reference::Borrowed(b) => b,
150             Reference::Copied(c) => c,
151         }
152     }
153 }
154 
155 /// Trait used by parse_str_bytes to convert the resulting bytes
156 /// into a string-like thing. Depending on the original caller, this may
157 
158 /// be a &str or a &[u8].
159 trait UtfOutputStrategy<T: ?Sized> {
to_result_simple<'de, 's, R: Read<'de>>(&self, read: &R, slice: &'s [u8]) -> Result<&'s T>160     fn to_result_simple<'de, 's, R: Read<'de>>(&self, read: &R, slice: &'s [u8]) -> Result<&'s T>;
161 
to_result_direct<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], _: &'de mut Vec<u8>, ) -> Result<Reference<'s, 'de, T>>162     fn to_result_direct<'de, 's, R: Read<'de>>(
163         &self,
164         read: &R,
165         slice: &'s [u8],
166         _: &'de mut Vec<u8>,
167     ) -> Result<Reference<'s, 'de, T>> {
168         self.to_result_simple(read, slice)
169             .map(|r| Reference::Borrowed(r))
170     }
171 
to_result_from_scratch<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], ) -> Result<&'s T>172     fn to_result_from_scratch<'de, 's, R: Read<'de>>(
173         &self,
174         read: &R,
175         slice: &'s [u8],
176     ) -> Result<&'s T> {
177         self.to_result_simple(read, slice)
178     }
extend_scratch(&self, scratch: &mut Vec<u8>, slice: &[u8])179     fn extend_scratch(&self, scratch: &mut Vec<u8>, slice: &[u8]) {
180         scratch.extend(slice);
181     }
182 }
183 
convert_or_error<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str>184 fn convert_or_error<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
185     str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
186 }
187 
188 struct StrUtfOutputStrategy;
189 
190 impl UtfOutputStrategy<str> for StrUtfOutputStrategy {
to_result_simple<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], ) -> Result<&'s str>191     fn to_result_simple<'de, 's, R: Read<'de>>(
192         &self,
193         read: &R,
194         slice: &'s [u8],
195     ) -> Result<&'s str> {
196         convert_or_error(read, slice)
197     }
198 
to_result_from_scratch<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], ) -> Result<&'s str>199     fn to_result_from_scratch<'de, 's, R: Read<'de>>(
200         &self,
201         read: &R,
202         slice: &'s [u8],
203     ) -> Result<&'s str> {
204         match str::from_utf8(slice) {
205             Ok(s) => Ok(s),
206             Err(_) => error(read, ErrorCode::InvalidUnicodeCodePoint),
207         }
208     }
209 }
210 
211 struct SubstitutingStrUtfOutputStrategy;
212 
213 impl SubstitutingStrUtfOutputStrategy {
214     /// Returns whether conversion occurred. If not, output is unchanged
215     /// and the caller should just directly use the input slice.
convert_from_utf8_lossy(&self, output: &mut Vec<u8>, mut input: &[u8]) -> bool216     fn convert_from_utf8_lossy(&self, output: &mut Vec<u8>, mut input: &[u8]) -> bool {
217         let mut first = true;
218         loop {
219             match core::str::from_utf8(input) {
220                 Ok(valid) => {
221                     if first {
222                         return false;
223                     }
224                     output.extend(valid.as_bytes());
225                     break;
226                 }
227                 Err(error) => {
228                     let (valid, after_valid) = input.split_at(error.valid_up_to());
229                     output.extend(valid);
230                     output.extend("\u{fffd}".bytes());
231 
232                     if let Some(invalid_sequence_length) = error.error_len() {
233                         input = &after_valid[invalid_sequence_length..];
234                     } else {
235                         break;
236                     }
237                 }
238             }
239             first = false;
240         }
241         true
242     }
243 
convert_unchecked<'a>(&self, slice: &'a [u8]) -> &'a str244     fn convert_unchecked<'a>(&self, slice: &'a [u8]) -> &'a str {
245         unsafe { str::from_utf8_unchecked(slice) }
246     }
247 }
248 
249 impl UtfOutputStrategy<str> for SubstitutingStrUtfOutputStrategy {
to_result_simple<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], ) -> Result<&'s str>250     fn to_result_simple<'de, 's, R: Read<'de>>(
251         &self,
252         read: &R,
253         slice: &'s [u8],
254     ) -> Result<&'s str> {
255         convert_or_error(read, slice)
256     }
257 
to_result_direct<'de, 's, R: Read<'de>>( &self, _: &R, slice: &'s [u8], scratch: &'de mut Vec<u8>, ) -> Result<Reference<'s, 'de, str>>258     fn to_result_direct<'de, 's, R: Read<'de>>(
259         &self,
260         _: &R,
261         slice: &'s [u8],
262         scratch: &'de mut Vec<u8>,
263     ) -> Result<Reference<'s, 'de, str>> {
264         let r = self.convert_from_utf8_lossy(scratch, slice);
265         Ok(if r {
266             Reference::Copied(self.convert_unchecked(scratch))
267         } else {
268             Reference::Borrowed(self.convert_unchecked(slice))
269         })
270     }
271 
to_result_from_scratch<'de, 's, R: Read<'de>>( &self, _: &R, slice: &'s [u8], ) -> Result<&'s str>272     fn to_result_from_scratch<'de, 's, R: Read<'de>>(
273         &self,
274         _: &R,
275         slice: &'s [u8],
276     ) -> Result<&'s str> {
277         // We checked it on the way into the scratch buffer, so no need for further checks now
278         Ok(self.convert_unchecked(slice))
279     }
280 
extend_scratch(&self, scratch: &mut Vec<u8>, slice: &[u8])281     fn extend_scratch(&self, scratch: &mut Vec<u8>, slice: &[u8]) {
282         if !self.convert_from_utf8_lossy(scratch, slice) {
283             scratch.extend(slice);
284         }
285     }
286 }
287 
288 struct UncheckedStrUtfOutputStrategy;
289 
290 impl UtfOutputStrategy<str> for UncheckedStrUtfOutputStrategy {
to_result_simple<'de, 's, R: Read<'de>>(&self, _: &R, slice: &'s [u8]) -> Result<&'s str>291     fn to_result_simple<'de, 's, R: Read<'de>>(&self, _: &R, slice: &'s [u8]) -> Result<&'s str> {
292         // The input is assumed to be valid UTF-8 and the \u-escapes are
293         // checked along the way, so don't need to check here.
294         Ok(unsafe { str::from_utf8_unchecked(slice) })
295     }
296 }
297 
298 struct SliceUtfOutputStrategy;
299 
300 impl UtfOutputStrategy<[u8]> for SliceUtfOutputStrategy {
to_result_simple<'de, 's, R: Read<'de>>(&self, _: &R, slice: &'s [u8]) -> Result<&'s [u8]>301     fn to_result_simple<'de, 's, R: Read<'de>>(&self, _: &R, slice: &'s [u8]) -> Result<&'s [u8]> {
302         Ok(slice)
303     }
304 }
305 
306 /// JSON input source that reads from a std::io input stream.
307 #[cfg(feature = "std")]
308 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
309 pub struct IoRead<R>
310 where
311     R: io::Read,
312 {
313     iter: LineColIterator<io::Bytes<R>>,
314     /// Temporary storage of peeked byte.
315     ch: Option<u8>,
316     #[cfg(feature = "raw_value")]
317     raw_buffer: Option<Vec<u8>>,
318 }
319 
320 /// JSON input source that reads from a slice of bytes.
321 //
322 // This is more efficient than other iterators because peek() can be read-only
323 // and we can compute line/col position only if an error happens.
324 #[allow(clippy::struct_excessive_bools)]
325 pub struct SliceRead<'a> {
326     slice: &'a [u8],
327     /// Index of the *next* byte that will be returned by next() or peek().
328     index: usize,
329     replace_invalid_characters: bool,
330     allow_newlines_in_string: bool,
331     allow_control_characters_in_string: bool,
332     allow_x_escapes: bool,
333     allow_v_escapes: bool,
334     #[cfg(feature = "raw_value")]
335     raw_buffering_start_index: usize,
336 }
337 
338 /// JSON input source that reads from a UTF-8 string.
339 //
340 // Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
341 pub struct StrRead<'a> {
342     delegate: SliceRead<'a>,
343     #[cfg(feature = "raw_value")]
344     data: &'a str,
345 }
346 
347 // Prevent users from implementing the Read trait.
348 mod private {
349     pub trait Sealed {}
350 }
351 
352 //////////////////////////////////////////////////////////////////////////////
353 
354 #[cfg(feature = "std")]
355 impl<R> IoRead<R>
356 where
357     R: io::Read,
358 {
359     /// Create a JSON input source to read from a std::io input stream.
new(reader: R) -> Self360     pub fn new(reader: R) -> Self {
361         IoRead {
362             iter: LineColIterator::new(reader.bytes()),
363             ch: None,
364             #[cfg(feature = "raw_value")]
365             raw_buffer: None,
366         }
367     }
368 }
369 
370 #[cfg(feature = "std")]
371 impl<R> private::Sealed for IoRead<R> where R: io::Read {}
372 
373 #[cfg(feature = "std")]
374 impl<R> IoRead<R>
375 where
376     R: io::Read,
377 {
378     #[allow(clippy::needless_pass_by_value)]
parse_str_bytes<'s, T, S>( &'s mut self, scratch: &'s mut Vec<u8>, validate: bool, utf_strategy: S, ) -> Result<&'s T> where T: ?Sized, S: UtfOutputStrategy<T>,379     fn parse_str_bytes<'s, T, S>(
380         &'s mut self,
381         scratch: &'s mut Vec<u8>,
382         validate: bool,
383         utf_strategy: S,
384     ) -> Result<&'s T>
385     where
386         T: ?Sized,
387         S: UtfOutputStrategy<T>,
388     {
389         loop {
390             let ch = tri!(next_or_eof(self));
391             if !ESCAPE_ALL[ch as usize] {
392                 scratch.push(ch);
393                 continue;
394             }
395             match ch {
396                 b'"' => {
397                     return utf_strategy.to_result_simple(self, scratch);
398                 }
399                 b'\\' => {
400                     tri!(parse_escape(self, validate, scratch));
401                 }
402                 _ => {
403                     if validate {
404                         return error(self, ErrorCode::ControlCharacterWhileParsingString);
405                     }
406                     scratch.push(ch);
407                 }
408             }
409         }
410     }
411 }
412 
413 #[cfg(feature = "std")]
414 impl<'de, R> Read<'de> for IoRead<R>
415 where
416     R: io::Read,
417 {
replace_invalid_unicode(&self) -> bool418     fn replace_invalid_unicode(&self) -> bool {
419         false
420     }
421 
allow_x_escapes(&self) -> bool422     fn allow_x_escapes(&self) -> bool {
423         false
424     }
425 
allow_v_escapes(&self) -> bool426     fn allow_v_escapes(&self) -> bool {
427         false
428     }
429 
430     #[inline]
next(&mut self) -> Result<Option<u8>>431     fn next(&mut self) -> Result<Option<u8>> {
432         match self.ch.take() {
433             Some(ch) => {
434                 #[cfg(feature = "raw_value")]
435                 {
436                     if let Some(buf) = &mut self.raw_buffer {
437                         buf.push(ch);
438                     }
439                 }
440                 Ok(Some(ch))
441             }
442             None => match self.iter.next() {
443                 Some(Err(err)) => Err(Error::io(err)),
444                 Some(Ok(ch)) => {
445                     #[cfg(feature = "raw_value")]
446                     {
447                         if let Some(buf) = &mut self.raw_buffer {
448                             buf.push(ch);
449                         }
450                     }
451                     Ok(Some(ch))
452                 }
453                 None => Ok(None),
454             },
455         }
456     }
457 
458     #[inline]
peek(&mut self) -> Result<Option<u8>>459     fn peek(&mut self) -> Result<Option<u8>> {
460         match self.ch {
461             Some(ch) => Ok(Some(ch)),
462             None => match self.iter.next() {
463                 Some(Err(err)) => Err(Error::io(err)),
464                 Some(Ok(ch)) => {
465                     self.ch = Some(ch);
466                     Ok(self.ch)
467                 }
468                 None => Ok(None),
469             },
470         }
471     }
472 
473     #[cfg(not(feature = "raw_value"))]
474     #[inline]
discard(&mut self)475     fn discard(&mut self) {
476         self.ch = None;
477     }
478 
479     #[cfg(feature = "raw_value")]
discard(&mut self)480     fn discard(&mut self) {
481         if let Some(ch) = self.ch.take() {
482             if let Some(buf) = &mut self.raw_buffer {
483                 buf.push(ch);
484             }
485         }
486     }
487 
position(&self) -> Position488     fn position(&self) -> Position {
489         Position {
490             line: self.iter.line(),
491             column: self.iter.col(),
492         }
493     }
494 
peek_position(&self) -> Position495     fn peek_position(&self) -> Position {
496         // The LineColIterator updates its position during peek() so it has the
497         // right one here.
498         self.position()
499     }
500 
byte_offset(&self) -> usize501     fn byte_offset(&self) -> usize {
502         match self.ch {
503             Some(_) => self.iter.byte_offset() - 1,
504             None => self.iter.byte_offset(),
505         }
506     }
507 
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>508     fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
509         self.parse_str_bytes(scratch, true, StrUtfOutputStrategy)
510             .map(Reference::Copied)
511     }
512 
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>513     fn parse_str_raw<'s>(
514         &'s mut self,
515         scratch: &'s mut Vec<u8>,
516     ) -> Result<Reference<'de, 's, [u8]>> {
517         self.parse_str_bytes(scratch, false, SliceUtfOutputStrategy)
518             .map(Reference::Copied)
519     }
520 
ignore_str(&mut self) -> Result<()>521     fn ignore_str(&mut self) -> Result<()> {
522         loop {
523             let ch = tri!(next_or_eof(self));
524             if !ESCAPE_ALL[ch as usize] {
525                 continue;
526             }
527             match ch {
528                 b'"' => {
529                     return Ok(());
530                 }
531                 b'\\' => {
532                     tri!(ignore_escape(self));
533                 }
534                 _ => {
535                     return error(self, ErrorCode::ControlCharacterWhileParsingString);
536                 }
537             }
538         }
539     }
540 
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>541     fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16> {
542         let mut n = 0;
543         for _ in 0..num_digits {
544             match decode_hex_val(tri!(next_or_eof(self))) {
545                 None => return error(self, ErrorCode::InvalidEscape),
546                 Some(val) => {
547                     n = (n << 4) + val;
548                 }
549             }
550         }
551         Ok(n)
552     }
553 
554     #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)555     fn begin_raw_buffering(&mut self) {
556         self.raw_buffer = Some(Vec::new());
557     }
558 
559     #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>,560     fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
561     where
562         V: Visitor<'de>,
563     {
564         let raw = self.raw_buffer.take().unwrap();
565         let raw = match String::from_utf8(raw) {
566             Ok(raw) => raw,
567             Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
568         };
569         visitor.visit_map(OwnedRawDeserializer {
570             raw_value: Some(raw),
571         })
572     }
573 
574     const should_early_return_if_failed: bool = true;
575 
576     #[inline]
577     #[cold]
set_failed(&mut self, failed: &mut bool)578     fn set_failed(&mut self, failed: &mut bool) {
579         *failed = true;
580     }
581 }
582 
583 //////////////////////////////////////////////////////////////////////////////
584 
585 impl<'a> SliceRead<'a> {
586     /// Create a JSON input source to read from a slice of bytes.
587     ///
588     /// The options are as follows:
589     /// - `replace_invalid_characters` - replace invalid characters with U+FFFD.
590     /// - `allow_newlines_in_string` - allow CR and LF characters in strings.
591     /// - `allow_control_characters_in_string` - allow control characters other than CR/LF in
592     ///    strings.
593     /// - `allow_v_escapes` - allow `\v` in strings.
594     /// - `allow_x_escapes` - allow `\x##` in strings.
595     #[allow(clippy::fn_params_excessive_bools)]
new( slice: &'a [u8], replace_invalid_characters: bool, allow_newlines_in_string: bool, allow_control_characters_in_string: bool, allow_v_escapes: bool, allow_x_escapes: bool, ) -> Self596     pub fn new(
597         slice: &'a [u8],
598         replace_invalid_characters: bool,
599         allow_newlines_in_string: bool,
600         allow_control_characters_in_string: bool,
601         allow_v_escapes: bool,
602         allow_x_escapes: bool,
603     ) -> Self {
604         SliceRead {
605             slice,
606             index: 0,
607             replace_invalid_characters,
608             allow_newlines_in_string,
609             allow_control_characters_in_string,
610             allow_v_escapes,
611             allow_x_escapes,
612             #[cfg(feature = "raw_value")]
613             raw_buffering_start_index: 0,
614         }
615     }
616 
617     /// Find the appropriate escaping table for the current set of options.
escapes(&self) -> &[bool; 256]618     fn escapes(&self) -> &[bool; 256] {
619         match (
620             self.allow_newlines_in_string,
621             self.allow_control_characters_in_string,
622         ) {
623             (false, false) => &ESCAPE_ALL,
624             (true, false) => &ESCAPE_NL_OK,
625             (false, true) => &ESCAPE_CONTROL_OK,
626             (true, true) => &ESCAPE_CONTROL_NL_OK,
627         }
628     }
629 
position_of_index(&self, i: usize) -> Position630     fn position_of_index(&self, i: usize) -> Position {
631         let mut position = Position { line: 1, column: 0 };
632         for ch in &self.slice[..i] {
633             match *ch {
634                 b'\n' => {
635                     position.line += 1;
636                     position.column = 0;
637                 }
638                 _ => {
639                     position.column += 1;
640                 }
641             }
642         }
643         position
644     }
645 
646     /// The big optimization here over IoRead is that if the string contains no
647     /// backslash escape sequences, the returned &str is a slice of the raw JSON
648     /// data so we avoid copying into the scratch space.
649     #[allow(clippy::needless_pass_by_value)]
parse_str_bytes<'s, T, S>( &'s mut self, scratch: &'s mut Vec<u8>, validate: bool, utf_strategy: S, ) -> Result<Reference<'a, 's, T>> where T: ?Sized + 's, S: UtfOutputStrategy<T>,650     fn parse_str_bytes<'s, T, S>(
651         &'s mut self,
652         scratch: &'s mut Vec<u8>,
653         validate: bool,
654         utf_strategy: S,
655     ) -> Result<Reference<'a, 's, T>>
656     where
657         T: ?Sized + 's,
658         S: UtfOutputStrategy<T>,
659     {
660         // Index of the first byte not yet copied into the scratch space.
661         let mut start = self.index;
662 
663         loop {
664             while self.index < self.slice.len() && !self.escapes()[self.slice[self.index] as usize]
665             {
666                 self.index += 1;
667             }
668             if self.index == self.slice.len() {
669                 return error(self, ErrorCode::EofWhileParsingString);
670             }
671             match self.slice[self.index] {
672                 b'"' => {
673                     if scratch.is_empty() {
674                         // Fast path: return a slice of the raw JSON without any
675                         // copying.
676                         let borrowed = &self.slice[start..self.index];
677                         self.index += 1;
678                         return utf_strategy.to_result_direct(self, borrowed, scratch);
679                     } else {
680                         utf_strategy.extend_scratch(scratch, &self.slice[start..self.index]);
681                         self.index += 1;
682                         return utf_strategy
683                             .to_result_from_scratch(self, scratch)
684                             .map(|r| Reference::Copied(r));
685                     }
686                 }
687                 b'\\' => {
688                     utf_strategy.extend_scratch(scratch, &self.slice[start..self.index]);
689                     self.index += 1;
690                     tri!(parse_escape(self, validate, scratch));
691                     start = self.index;
692                 }
693                 _ => {
694                     self.index += 1;
695                     if validate {
696                         return error(self, ErrorCode::ControlCharacterWhileParsingString);
697                     }
698                 }
699             }
700         }
701     }
702 }
703 
704 impl<'a> private::Sealed for SliceRead<'a> {}
705 
706 impl<'a> Read<'a> for SliceRead<'a> {
replace_invalid_unicode(&self) -> bool707     fn replace_invalid_unicode(&self) -> bool {
708         self.replace_invalid_characters
709     }
710 
allow_x_escapes(&self) -> bool711     fn allow_x_escapes(&self) -> bool {
712         self.allow_x_escapes
713     }
714 
allow_v_escapes(&self) -> bool715     fn allow_v_escapes(&self) -> bool {
716         self.allow_v_escapes
717     }
718 
719     #[inline]
next(&mut self) -> Result<Option<u8>>720     fn next(&mut self) -> Result<Option<u8>> {
721         // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
722         // is about 10% slower.
723         Ok(if self.index < self.slice.len() {
724             let ch = self.slice[self.index];
725             self.index += 1;
726             Some(ch)
727         } else {
728             None
729         })
730     }
731 
732     #[inline]
peek(&mut self) -> Result<Option<u8>>733     fn peek(&mut self) -> Result<Option<u8>> {
734         // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
735         // for some reason.
736         Ok(if self.index < self.slice.len() {
737             Some(self.slice[self.index])
738         } else {
739             None
740         })
741     }
742 
743     #[inline]
discard(&mut self)744     fn discard(&mut self) {
745         self.index += 1;
746     }
747 
position(&self) -> Position748     fn position(&self) -> Position {
749         self.position_of_index(self.index)
750     }
751 
peek_position(&self) -> Position752     fn peek_position(&self) -> Position {
753         // Cap it at slice.len() just in case the most recent call was next()
754         // and it returned the last byte.
755         self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
756     }
757 
byte_offset(&self) -> usize758     fn byte_offset(&self) -> usize {
759         self.index
760     }
761 
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>>762     fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
763         if self.replace_invalid_characters {
764             self.parse_str_bytes(scratch, true, SubstitutingStrUtfOutputStrategy)
765         } else {
766             self.parse_str_bytes(scratch, true, StrUtfOutputStrategy)
767         }
768     }
769 
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'a, 's, [u8]>>770     fn parse_str_raw<'s>(
771         &'s mut self,
772         scratch: &'s mut Vec<u8>,
773     ) -> Result<Reference<'a, 's, [u8]>> {
774         self.parse_str_bytes(scratch, false, SliceUtfOutputStrategy)
775     }
776 
ignore_str(&mut self) -> Result<()>777     fn ignore_str(&mut self) -> Result<()> {
778         loop {
779             while self.index < self.slice.len() && !self.escapes()[self.slice[self.index] as usize]
780             {
781                 self.index += 1;
782             }
783             if self.index == self.slice.len() {
784                 return error(self, ErrorCode::EofWhileParsingString);
785             }
786             match self.slice[self.index] {
787                 b'"' => {
788                     self.index += 1;
789                     return Ok(());
790                 }
791                 b'\\' => {
792                     self.index += 1;
793                     tri!(ignore_escape(self));
794                 }
795                 _ => {
796                     return error(self, ErrorCode::ControlCharacterWhileParsingString);
797                 }
798             }
799         }
800     }
801 
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>802     fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16> {
803         if self.index + num_digits > self.slice.len() {
804             self.index = self.slice.len();
805             return error(self, ErrorCode::EofWhileParsingString);
806         }
807 
808         let mut n = 0;
809         for _ in 0..num_digits {
810             let ch = decode_hex_val(self.slice[self.index]);
811             self.index += 1;
812             match ch {
813                 None => return error(self, ErrorCode::InvalidEscape),
814                 Some(val) => {
815                     n = (n << 4) + val;
816                 }
817             }
818         }
819         Ok(n)
820     }
821 
822     #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)823     fn begin_raw_buffering(&mut self) {
824         self.raw_buffering_start_index = self.index;
825     }
826 
827     #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'a>,828     fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
829     where
830         V: Visitor<'a>,
831     {
832         let raw = &self.slice[self.raw_buffering_start_index..self.index];
833         let raw = match str::from_utf8(raw) {
834             Ok(raw) => raw,
835             Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
836         };
837         visitor.visit_map(BorrowedRawDeserializer {
838             raw_value: Some(raw),
839         })
840     }
841 
842     const should_early_return_if_failed: bool = false;
843 
844     #[inline]
845     #[cold]
set_failed(&mut self, _failed: &mut bool)846     fn set_failed(&mut self, _failed: &mut bool) {
847         self.slice = &self.slice[..self.index];
848     }
849 }
850 
851 //////////////////////////////////////////////////////////////////////////////
852 
853 impl<'a> StrRead<'a> {
854     /// Create a JSON input source to read from a UTF-8 string.
new(s: &'a str) -> Self855     pub fn new(s: &'a str) -> Self {
856         StrRead {
857             delegate: SliceRead::new(s.as_bytes(), false, false, false, false, false),
858             #[cfg(feature = "raw_value")]
859             data: s,
860         }
861     }
862 }
863 
864 impl<'a> private::Sealed for StrRead<'a> {}
865 
866 impl<'a> Read<'a> for StrRead<'a> {
replace_invalid_unicode(&self) -> bool867     fn replace_invalid_unicode(&self) -> bool {
868         false
869     }
870 
allow_x_escapes(&self) -> bool871     fn allow_x_escapes(&self) -> bool {
872         false
873     }
874 
allow_v_escapes(&self) -> bool875     fn allow_v_escapes(&self) -> bool {
876         false
877     }
878 
879     #[inline]
next(&mut self) -> Result<Option<u8>>880     fn next(&mut self) -> Result<Option<u8>> {
881         self.delegate.next()
882     }
883 
884     #[inline]
peek(&mut self) -> Result<Option<u8>>885     fn peek(&mut self) -> Result<Option<u8>> {
886         self.delegate.peek()
887     }
888 
889     #[inline]
discard(&mut self)890     fn discard(&mut self) {
891         self.delegate.discard();
892     }
893 
position(&self) -> Position894     fn position(&self) -> Position {
895         self.delegate.position()
896     }
897 
peek_position(&self) -> Position898     fn peek_position(&self) -> Position {
899         self.delegate.peek_position()
900     }
901 
byte_offset(&self) -> usize902     fn byte_offset(&self) -> usize {
903         self.delegate.byte_offset()
904     }
905 
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>>906     fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
907         self.delegate
908             .parse_str_bytes(scratch, true, UncheckedStrUtfOutputStrategy)
909     }
910 
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'a, 's, [u8]>>911     fn parse_str_raw<'s>(
912         &'s mut self,
913         scratch: &'s mut Vec<u8>,
914     ) -> Result<Reference<'a, 's, [u8]>> {
915         self.delegate.parse_str_raw(scratch)
916     }
917 
ignore_str(&mut self) -> Result<()>918     fn ignore_str(&mut self) -> Result<()> {
919         self.delegate.ignore_str()
920     }
921 
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>922     fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16> {
923         self.delegate.decode_hex_escape(num_digits)
924     }
925 
926     #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)927     fn begin_raw_buffering(&mut self) {
928         self.delegate.begin_raw_buffering();
929     }
930 
931     #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'a>,932     fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
933     where
934         V: Visitor<'a>,
935     {
936         let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
937         visitor.visit_map(BorrowedRawDeserializer {
938             raw_value: Some(raw),
939         })
940     }
941 
942     const should_early_return_if_failed: bool = false;
943 
944     #[inline]
945     #[cold]
set_failed(&mut self, failed: &mut bool)946     fn set_failed(&mut self, failed: &mut bool) {
947         self.delegate.set_failed(failed);
948     }
949 }
950 
951 //////////////////////////////////////////////////////////////////////////////
952 
953 impl<'a, 'de, R> private::Sealed for &'a mut R where R: Read<'de> {}
954 
955 impl<'a, 'de, R> Read<'de> for &'a mut R
956 where
957     R: Read<'de>,
958 {
next(&mut self) -> Result<Option<u8>>959     fn next(&mut self) -> Result<Option<u8>> {
960         R::next(self)
961     }
962 
peek(&mut self) -> Result<Option<u8>>963     fn peek(&mut self) -> Result<Option<u8>> {
964         R::peek(self)
965     }
966 
discard(&mut self)967     fn discard(&mut self) {
968         R::discard(self);
969     }
970 
position(&self) -> Position971     fn position(&self) -> Position {
972         R::position(self)
973     }
974 
peek_position(&self) -> Position975     fn peek_position(&self) -> Position {
976         R::peek_position(self)
977     }
978 
byte_offset(&self) -> usize979     fn byte_offset(&self) -> usize {
980         R::byte_offset(self)
981     }
982 
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>983     fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
984         R::parse_str(self, scratch)
985     }
986 
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>987     fn parse_str_raw<'s>(
988         &'s mut self,
989         scratch: &'s mut Vec<u8>,
990     ) -> Result<Reference<'de, 's, [u8]>> {
991         R::parse_str_raw(self, scratch)
992     }
993 
ignore_str(&mut self) -> Result<()>994     fn ignore_str(&mut self) -> Result<()> {
995         R::ignore_str(self)
996     }
997 
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>998     fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16> {
999         R::decode_hex_escape(self, num_digits)
1000     }
1001 
1002     #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)1003     fn begin_raw_buffering(&mut self) {
1004         R::begin_raw_buffering(self);
1005     }
1006 
1007     #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>,1008     fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
1009     where
1010         V: Visitor<'de>,
1011     {
1012         R::end_raw_buffering(self, visitor)
1013     }
1014 
1015     const should_early_return_if_failed: bool = R::should_early_return_if_failed;
1016 
set_failed(&mut self, failed: &mut bool)1017     fn set_failed(&mut self, failed: &mut bool) {
1018         R::set_failed(self, failed);
1019     }
1020 
replace_invalid_unicode(&self) -> bool1021     fn replace_invalid_unicode(&self) -> bool {
1022         R::replace_invalid_unicode(self)
1023     }
1024 
allow_x_escapes(&self) -> bool1025     fn allow_x_escapes(&self) -> bool {
1026         R::allow_x_escapes(self)
1027     }
1028 
allow_v_escapes(&self) -> bool1029     fn allow_v_escapes(&self) -> bool {
1030         R::allow_v_escapes(self)
1031     }
1032 }
1033 
1034 //////////////////////////////////////////////////////////////////////////////
1035 
1036 /// Marker for whether StreamDeserializer can implement FusedIterator.
1037 pub trait Fused: private::Sealed {}
1038 impl<'a> Fused for SliceRead<'a> {}
1039 impl<'a> Fused for StrRead<'a> {}
1040 
1041 const ESCAPE_ALL: [bool; 256] = get_escapes(false, false);
1042 const ESCAPE_CONTROL_OK: [bool; 256] = get_escapes(false, true);
1043 const ESCAPE_NL_OK: [bool; 256] = get_escapes(true, false);
1044 const ESCAPE_CONTROL_NL_OK: [bool; 256] = get_escapes(true, true);
1045 
1046 // Lookup table of bytes that must be escaped. A value of true at index i means
1047 // that byte i requires an escape sequence in the input.
get_escapes( allow_newlines_in_string: bool, allow_control_characters_in_string: bool, ) -> [bool; 256]1048 const fn get_escapes(
1049     allow_newlines_in_string: bool,
1050     allow_control_characters_in_string: bool,
1051 ) -> [bool; 256] {
1052     #![allow(non_snake_case)]
1053     const QU: bool = true; // quote \x22
1054     const BS: bool = true; // backslash \x5C
1055     const __: bool = false; // allow unescaped
1056     let NL: bool = !allow_newlines_in_string; // CR / LF
1057     let CT: bool = !allow_control_characters_in_string; // other control character \x00..=\x1F
1058     [
1059         //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
1060         CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, NL, CT, CT, NL, CT, CT, // 0
1061         CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1
1062         __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
1063         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
1064         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
1065         __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
1066         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
1067         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
1068         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
1069         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
1070         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
1071         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
1072         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
1073         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
1074         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
1075         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
1076     ]
1077 }
1078 
next_or_eof<'de, R>(read: &mut R) -> Result<u8> where R: ?Sized + Read<'de>,1079 fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
1080 where
1081     R: ?Sized + Read<'de>,
1082 {
1083     match tri!(read.next()) {
1084         Some(b) => Ok(b),
1085         None => error(read, ErrorCode::EofWhileParsingString),
1086     }
1087 }
1088 
peek_or_eof<'de, R>(read: &mut R) -> Result<u8> where R: ?Sized + Read<'de>,1089 fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
1090 where
1091     R: ?Sized + Read<'de>,
1092 {
1093     match tri!(read.peek()) {
1094         Some(b) => Ok(b),
1095         None => error(read, ErrorCode::EofWhileParsingString),
1096     }
1097 }
1098 
error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T> where R: ?Sized + Read<'de>,1099 fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
1100 where
1101     R: ?Sized + Read<'de>,
1102 {
1103     let position = read.position();
1104     Err(Error::syntax(reason, position.line, position.column))
1105 }
1106 
1107 /// Parses a JSON escape sequence and appends it into the scratch space. Assumes
1108 /// the previous byte read was a backslash.
parse_escape<'de, R: Read<'de>>( read: &mut R, validate: bool, scratch: &mut Vec<u8>, ) -> Result<()>1109 fn parse_escape<'de, R: Read<'de>>(
1110     read: &mut R,
1111     validate: bool,
1112     scratch: &mut Vec<u8>,
1113 ) -> Result<()> {
1114     let ch = tri!(next_or_eof(read));
1115 
1116     // In the event of an error, if replacing invalid unicode, just return REPLACEMENT CHARACTER.
1117     // Otherwise, discard the peeked byte representing the error if necessary and fall back to
1118     // error().
1119     let mut error_or_replace = |read: &mut R, need_discard, reason| {
1120         if read.replace_invalid_unicode() {
1121             scratch.extend("\u{fffd}".as_bytes());
1122             Ok(())
1123         } else {
1124             if need_discard {
1125                 read.discard();
1126             }
1127             error(read, reason)
1128         }
1129     };
1130 
1131     match ch {
1132         b'"' => scratch.push(b'"'),
1133         b'\\' => scratch.push(b'\\'),
1134         b'/' => scratch.push(b'/'),
1135         b'b' => scratch.push(b'\x08'),
1136         b'f' => scratch.push(b'\x0c'),
1137         b'n' => scratch.push(b'\n'),
1138         b'r' => scratch.push(b'\r'),
1139         b't' => scratch.push(b'\t'),
1140         b'v' if read.allow_v_escapes() => scratch.push(b'\x0b'),
1141         b'x' if read.allow_x_escapes() => {
1142             let c: u32 = tri!(read.decode_hex_escape(2)).into();
1143             let c = match char::from_u32(c) {
1144                 Some(c) => c,
1145                 None => {
1146                     return error_or_replace(read, false, ErrorCode::InvalidUnicodeCodePoint);
1147                 }
1148             };
1149             scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
1150         }
1151         b'u' => {
1152             fn encode_surrogate(scratch: &mut Vec<u8>, n: u16) {
1153                 scratch.extend_from_slice(&[
1154                     (n >> 12 & 0b0000_1111) as u8 | 0b1110_0000,
1155                     (n >> 6 & 0b0011_1111) as u8 | 0b1000_0000,
1156                     (n & 0b0011_1111) as u8 | 0b1000_0000,
1157                 ]);
1158             }
1159 
1160             let c = match tri!(read.decode_hex_escape(4)) {
1161                 n @ 0xDC00..=0xDFFF => {
1162                     return if validate {
1163                         error_or_replace(read, false, ErrorCode::LoneLeadingSurrogateInHexEscape)
1164                     } else {
1165                         encode_surrogate(scratch, n);
1166                         Ok(())
1167                     };
1168                 }
1169 
1170                 // Non-BMP characters are encoded as a sequence of two hex
1171                 // escapes, representing UTF-16 surrogates. If deserializing a
1172                 // utf-8 string the surrogates are required to be paired,
1173                 // whereas deserializing a byte string accepts lone surrogates.
1174                 n1 @ 0xD800..=0xDBFF => {
1175                     if tri!(peek_or_eof(read)) == b'\\' {
1176                         read.discard();
1177                     } else {
1178                         return if validate {
1179                             error_or_replace(read, true, ErrorCode::UnexpectedEndOfHexEscape)
1180                         } else {
1181                             encode_surrogate(scratch, n1);
1182                             Ok(())
1183                         };
1184                     }
1185 
1186                     if tri!(peek_or_eof(read)) == b'u' {
1187                         read.discard();
1188                     } else {
1189                         return if validate {
1190                             error_or_replace(read, true, ErrorCode::UnexpectedEndOfHexEscape)
1191                         } else {
1192                             encode_surrogate(scratch, n1);
1193                             // The \ prior to this byte started an escape sequence,
1194                             // so we need to parse that now. This recursive call
1195                             // does not blow the stack on malicious input because
1196                             // the escape is not \u, so it will be handled by one
1197                             // of the easy nonrecursive cases.
1198                             parse_escape(read, validate, scratch)
1199                         };
1200                     }
1201 
1202                     let n2 = tri!(read.decode_hex_escape(4));
1203 
1204                     if n2 < 0xDC00 || n2 > 0xDFFF {
1205                         return error_or_replace(
1206                             read,
1207                             false,
1208                             ErrorCode::LoneLeadingSurrogateInHexEscape,
1209                         );
1210                     }
1211 
1212                     let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
1213 
1214                     match char::from_u32(n) {
1215                         Some(c) => c,
1216                         None => {
1217                             return error_or_replace(
1218                                 read,
1219                                 false,
1220                                 ErrorCode::InvalidUnicodeCodePoint,
1221                             );
1222                         }
1223                     }
1224                 }
1225 
1226                 // Every u16 outside of the surrogate ranges above is guaranteed
1227                 // to be a legal char.
1228                 n => char::from_u32(n as u32).unwrap(),
1229             };
1230 
1231             scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
1232         }
1233         _ => {
1234             return error(read, ErrorCode::InvalidEscape);
1235         }
1236     }
1237 
1238     Ok(())
1239 }
1240 
1241 /// Parses a JSON escape sequence and discards the value. Assumes the previous
1242 /// byte read was a backslash.
ignore_escape<'de, R>(read: &mut R) -> Result<()> where R: ?Sized + Read<'de>,1243 fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
1244 where
1245     R: ?Sized + Read<'de>,
1246 {
1247     let ch = tri!(next_or_eof(read));
1248 
1249     match ch {
1250         b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' | b'v' => {}
1251         b'u' => {
1252             // At this point we don't care if the codepoint is valid. We just
1253             // want to consume it. We don't actually know what is valid or not
1254             // at this point, because that depends on if this string will
1255             // ultimately be parsed into a string or a byte buffer in the "real"
1256             // parse.
1257 
1258             tri!(read.decode_hex_escape(4));
1259         }
1260         b'x' => {
1261             let c: u32 = tri!(read.decode_hex_escape(2)).into();
1262             match char::from_u32(c) {
1263                 Some(_) => {}
1264                 None => {
1265                     return error(read, ErrorCode::InvalidUnicodeCodePoint);
1266                 }
1267             };
1268         }
1269         _ => {
1270             return error(read, ErrorCode::InvalidEscape);
1271         }
1272     }
1273 
1274     Ok(())
1275 }
1276 
1277 static HEX: [u8; 256] = {
1278     const __: u8 = 255; // not a hex digit
1279     [
1280         //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
1281         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
1282         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
1283         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
1284         00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3
1285         __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4
1286         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
1287         __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6
1288         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
1289         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
1290         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
1291         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
1292         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
1293         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
1294         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
1295         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
1296         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
1297     ]
1298 };
1299 
decode_hex_val(val: u8) -> Option<u16>1300 fn decode_hex_val(val: u8) -> Option<u16> {
1301     let n = HEX[val as usize] as u16;
1302     if n == 255 {
1303         None
1304     } else {
1305         Some(n)
1306     }
1307 }
1308