1 use crate::error::{Error, ErrorCode, Result};
2 use alloc::vec::Vec;
3 use core::char;
4 use core::cmp;
5 use core::ops::Deref;
6 use core::str;
7
8 #[cfg(feature = "std")]
9 use crate::io;
10 #[cfg(feature = "std")]
11 use crate::iter::LineColIterator;
12
13 #[cfg(feature = "raw_value")]
14 use crate::raw::BorrowedRawDeserializer;
15 #[cfg(all(feature = "raw_value", feature = "std"))]
16 use crate::raw::OwnedRawDeserializer;
17 #[cfg(all(feature = "raw_value", feature = "std"))]
18 use alloc::string::String;
19 #[cfg(feature = "raw_value")]
20 use serde::de::Visitor;
21
22 /// Trait used by the deserializer for iterating over input. This is manually
23 /// "specialized" for iterating over &[u8]. Once feature(specialization) is
24 /// stable we can use actual specialization.
25 ///
26 /// This trait is sealed and cannot be implemented for types outside of
27 /// `serde_json_lenient`.
28 pub trait Read<'de>: private::Sealed {
29 #[doc(hidden)]
next(&mut self) -> Result<Option<u8>>30 fn next(&mut self) -> Result<Option<u8>>;
31 #[doc(hidden)]
peek(&mut self) -> Result<Option<u8>>32 fn peek(&mut self) -> Result<Option<u8>>;
33
34 /// Only valid after a call to peek(). Discards the peeked byte.
35 #[doc(hidden)]
discard(&mut self)36 fn discard(&mut self);
37
38 /// Position of the most recent call to next().
39 ///
40 /// The most recent call was probably next() and not peek(), but this method
41 /// should try to return a sensible result if the most recent call was
42 /// actually peek() because we don't always know.
43 ///
44 /// Only called in case of an error, so performance is not important.
45 #[doc(hidden)]
position(&self) -> Position46 fn position(&self) -> Position;
47
48 /// Position of the most recent call to peek().
49 ///
50 /// The most recent call was probably peek() and not next(), but this method
51 /// should try to return a sensible result if the most recent call was
52 /// actually next() because we don't always know.
53 ///
54 /// Only called in case of an error, so performance is not important.
55 #[doc(hidden)]
peek_position(&self) -> Position56 fn peek_position(&self) -> Position;
57
58 /// Offset from the beginning of the input to the next byte that would be
59 /// returned by next() or peek().
60 #[doc(hidden)]
byte_offset(&self) -> usize61 fn byte_offset(&self) -> usize;
62
63 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
64 /// string until the next quotation mark using the given scratch space if
65 /// necessary. The scratch space is initially empty.
66 #[doc(hidden)]
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>67 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
68
69 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
70 /// string until the next quotation mark using the given scratch space if
71 /// necessary. The scratch space is initially empty.
72 ///
73 /// This function returns the raw bytes in the string with escape sequences
74 /// expanded but without performing unicode validation.
75 #[doc(hidden)]
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>76 fn parse_str_raw<'s>(
77 &'s mut self,
78 scratch: &'s mut Vec<u8>,
79 ) -> Result<Reference<'de, 's, [u8]>>;
80
81 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
82 /// string until the next quotation mark but discards the data.
83 #[doc(hidden)]
ignore_str(&mut self) -> Result<()>84 fn ignore_str(&mut self) -> Result<()>;
85
86 /// Assumes the previous byte was a hex escape sequence ('\u') in a string.
87 /// Parses next hexadecimal sequence.
88 #[doc(hidden)]
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>89 fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>;
90
91 /// Switch raw buffering mode on.
92 ///
93 /// This is used when deserializing `RawValue`.
94 #[cfg(feature = "raw_value")]
95 #[doc(hidden)]
begin_raw_buffering(&mut self)96 fn begin_raw_buffering(&mut self);
97
98 /// Switch raw buffering mode off and provides the raw buffered data to the
99 /// given visitor.
100 #[cfg(feature = "raw_value")]
101 #[doc(hidden)]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>102 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
103 where
104 V: Visitor<'de>;
105
106 /// Whether we should replace invalid unicode characters with \u{fffd}.
replace_invalid_unicode(&self) -> bool107 fn replace_invalid_unicode(&self) -> bool;
108
109 /// Allow \v escapes
allow_v_escapes(&self) -> bool110 fn allow_v_escapes(&self) -> bool;
111
112 /// Allow \x escapes
allow_x_escapes(&self) -> bool113 fn allow_x_escapes(&self) -> bool;
114
115 /// Whether StreamDeserializer::next needs to check the failed flag. True
116 /// for IoRead, false for StrRead and SliceRead which can track failure by
117 /// truncating their input slice to avoid the extra check on every next
118 /// call.
119 #[doc(hidden)]
120 const should_early_return_if_failed: bool;
121
122 /// Mark a persistent failure of StreamDeserializer, either by setting the
123 /// flag or by truncating the input data.
124 #[doc(hidden)]
set_failed(&mut self, failed: &mut bool)125 fn set_failed(&mut self, failed: &mut bool);
126 }
127
128 pub struct Position {
129 pub line: usize,
130 pub column: usize,
131 }
132
133 pub enum Reference<'b, 'c, T>
134 where
135 T: ?Sized + 'static,
136 {
137 Borrowed(&'b T),
138 Copied(&'c T),
139 }
140
141 impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
142 where
143 T: ?Sized + 'static,
144 {
145 type Target = T;
146
deref(&self) -> &Self::Target147 fn deref(&self) -> &Self::Target {
148 match *self {
149 Reference::Borrowed(b) => b,
150 Reference::Copied(c) => c,
151 }
152 }
153 }
154
155 /// Trait used by parse_str_bytes to convert the resulting bytes
156 /// into a string-like thing. Depending on the original caller, this may
157
158 /// be a &str or a &[u8].
159 trait UtfOutputStrategy<T: ?Sized> {
to_result_simple<'de, 's, R: Read<'de>>(&self, read: &R, slice: &'s [u8]) -> Result<&'s T>160 fn to_result_simple<'de, 's, R: Read<'de>>(&self, read: &R, slice: &'s [u8]) -> Result<&'s T>;
161
to_result_direct<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], _: &'de mut Vec<u8>, ) -> Result<Reference<'s, 'de, T>>162 fn to_result_direct<'de, 's, R: Read<'de>>(
163 &self,
164 read: &R,
165 slice: &'s [u8],
166 _: &'de mut Vec<u8>,
167 ) -> Result<Reference<'s, 'de, T>> {
168 self.to_result_simple(read, slice)
169 .map(|r| Reference::Borrowed(r))
170 }
171
to_result_from_scratch<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], ) -> Result<&'s T>172 fn to_result_from_scratch<'de, 's, R: Read<'de>>(
173 &self,
174 read: &R,
175 slice: &'s [u8],
176 ) -> Result<&'s T> {
177 self.to_result_simple(read, slice)
178 }
extend_scratch(&self, scratch: &mut Vec<u8>, slice: &[u8])179 fn extend_scratch(&self, scratch: &mut Vec<u8>, slice: &[u8]) {
180 scratch.extend(slice);
181 }
182 }
183
convert_or_error<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str>184 fn convert_or_error<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
185 str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
186 }
187
188 struct StrUtfOutputStrategy;
189
190 impl UtfOutputStrategy<str> for StrUtfOutputStrategy {
to_result_simple<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], ) -> Result<&'s str>191 fn to_result_simple<'de, 's, R: Read<'de>>(
192 &self,
193 read: &R,
194 slice: &'s [u8],
195 ) -> Result<&'s str> {
196 convert_or_error(read, slice)
197 }
198
to_result_from_scratch<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], ) -> Result<&'s str>199 fn to_result_from_scratch<'de, 's, R: Read<'de>>(
200 &self,
201 read: &R,
202 slice: &'s [u8],
203 ) -> Result<&'s str> {
204 match str::from_utf8(slice) {
205 Ok(s) => Ok(s),
206 Err(_) => error(read, ErrorCode::InvalidUnicodeCodePoint),
207 }
208 }
209 }
210
211 struct SubstitutingStrUtfOutputStrategy;
212
213 impl SubstitutingStrUtfOutputStrategy {
214 /// Returns whether conversion occurred. If not, output is unchanged
215 /// and the caller should just directly use the input slice.
convert_from_utf8_lossy(&self, output: &mut Vec<u8>, mut input: &[u8]) -> bool216 fn convert_from_utf8_lossy(&self, output: &mut Vec<u8>, mut input: &[u8]) -> bool {
217 let mut first = true;
218 loop {
219 match core::str::from_utf8(input) {
220 Ok(valid) => {
221 if first {
222 return false;
223 }
224 output.extend(valid.as_bytes());
225 break;
226 }
227 Err(error) => {
228 let (valid, after_valid) = input.split_at(error.valid_up_to());
229 output.extend(valid);
230 output.extend("\u{fffd}".bytes());
231
232 if let Some(invalid_sequence_length) = error.error_len() {
233 input = &after_valid[invalid_sequence_length..];
234 } else {
235 break;
236 }
237 }
238 }
239 first = false;
240 }
241 true
242 }
243
convert_unchecked<'a>(&self, slice: &'a [u8]) -> &'a str244 fn convert_unchecked<'a>(&self, slice: &'a [u8]) -> &'a str {
245 unsafe { str::from_utf8_unchecked(slice) }
246 }
247 }
248
249 impl UtfOutputStrategy<str> for SubstitutingStrUtfOutputStrategy {
to_result_simple<'de, 's, R: Read<'de>>( &self, read: &R, slice: &'s [u8], ) -> Result<&'s str>250 fn to_result_simple<'de, 's, R: Read<'de>>(
251 &self,
252 read: &R,
253 slice: &'s [u8],
254 ) -> Result<&'s str> {
255 convert_or_error(read, slice)
256 }
257
to_result_direct<'de, 's, R: Read<'de>>( &self, _: &R, slice: &'s [u8], scratch: &'de mut Vec<u8>, ) -> Result<Reference<'s, 'de, str>>258 fn to_result_direct<'de, 's, R: Read<'de>>(
259 &self,
260 _: &R,
261 slice: &'s [u8],
262 scratch: &'de mut Vec<u8>,
263 ) -> Result<Reference<'s, 'de, str>> {
264 let r = self.convert_from_utf8_lossy(scratch, slice);
265 Ok(if r {
266 Reference::Copied(self.convert_unchecked(scratch))
267 } else {
268 Reference::Borrowed(self.convert_unchecked(slice))
269 })
270 }
271
to_result_from_scratch<'de, 's, R: Read<'de>>( &self, _: &R, slice: &'s [u8], ) -> Result<&'s str>272 fn to_result_from_scratch<'de, 's, R: Read<'de>>(
273 &self,
274 _: &R,
275 slice: &'s [u8],
276 ) -> Result<&'s str> {
277 // We checked it on the way into the scratch buffer, so no need for further checks now
278 Ok(self.convert_unchecked(slice))
279 }
280
extend_scratch(&self, scratch: &mut Vec<u8>, slice: &[u8])281 fn extend_scratch(&self, scratch: &mut Vec<u8>, slice: &[u8]) {
282 if !self.convert_from_utf8_lossy(scratch, slice) {
283 scratch.extend(slice);
284 }
285 }
286 }
287
288 struct UncheckedStrUtfOutputStrategy;
289
290 impl UtfOutputStrategy<str> for UncheckedStrUtfOutputStrategy {
to_result_simple<'de, 's, R: Read<'de>>(&self, _: &R, slice: &'s [u8]) -> Result<&'s str>291 fn to_result_simple<'de, 's, R: Read<'de>>(&self, _: &R, slice: &'s [u8]) -> Result<&'s str> {
292 // The input is assumed to be valid UTF-8 and the \u-escapes are
293 // checked along the way, so don't need to check here.
294 Ok(unsafe { str::from_utf8_unchecked(slice) })
295 }
296 }
297
298 struct SliceUtfOutputStrategy;
299
300 impl UtfOutputStrategy<[u8]> for SliceUtfOutputStrategy {
to_result_simple<'de, 's, R: Read<'de>>(&self, _: &R, slice: &'s [u8]) -> Result<&'s [u8]>301 fn to_result_simple<'de, 's, R: Read<'de>>(&self, _: &R, slice: &'s [u8]) -> Result<&'s [u8]> {
302 Ok(slice)
303 }
304 }
305
306 /// JSON input source that reads from a std::io input stream.
307 #[cfg(feature = "std")]
308 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
309 pub struct IoRead<R>
310 where
311 R: io::Read,
312 {
313 iter: LineColIterator<io::Bytes<R>>,
314 /// Temporary storage of peeked byte.
315 ch: Option<u8>,
316 #[cfg(feature = "raw_value")]
317 raw_buffer: Option<Vec<u8>>,
318 }
319
320 /// JSON input source that reads from a slice of bytes.
321 //
322 // This is more efficient than other iterators because peek() can be read-only
323 // and we can compute line/col position only if an error happens.
324 #[allow(clippy::struct_excessive_bools)]
325 pub struct SliceRead<'a> {
326 slice: &'a [u8],
327 /// Index of the *next* byte that will be returned by next() or peek().
328 index: usize,
329 replace_invalid_characters: bool,
330 allow_newlines_in_string: bool,
331 allow_control_characters_in_string: bool,
332 allow_x_escapes: bool,
333 allow_v_escapes: bool,
334 #[cfg(feature = "raw_value")]
335 raw_buffering_start_index: usize,
336 }
337
338 /// JSON input source that reads from a UTF-8 string.
339 //
340 // Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
341 pub struct StrRead<'a> {
342 delegate: SliceRead<'a>,
343 #[cfg(feature = "raw_value")]
344 data: &'a str,
345 }
346
347 // Prevent users from implementing the Read trait.
348 mod private {
349 pub trait Sealed {}
350 }
351
352 //////////////////////////////////////////////////////////////////////////////
353
354 #[cfg(feature = "std")]
355 impl<R> IoRead<R>
356 where
357 R: io::Read,
358 {
359 /// Create a JSON input source to read from a std::io input stream.
new(reader: R) -> Self360 pub fn new(reader: R) -> Self {
361 IoRead {
362 iter: LineColIterator::new(reader.bytes()),
363 ch: None,
364 #[cfg(feature = "raw_value")]
365 raw_buffer: None,
366 }
367 }
368 }
369
370 #[cfg(feature = "std")]
371 impl<R> private::Sealed for IoRead<R> where R: io::Read {}
372
373 #[cfg(feature = "std")]
374 impl<R> IoRead<R>
375 where
376 R: io::Read,
377 {
378 #[allow(clippy::needless_pass_by_value)]
parse_str_bytes<'s, T, S>( &'s mut self, scratch: &'s mut Vec<u8>, validate: bool, utf_strategy: S, ) -> Result<&'s T> where T: ?Sized, S: UtfOutputStrategy<T>,379 fn parse_str_bytes<'s, T, S>(
380 &'s mut self,
381 scratch: &'s mut Vec<u8>,
382 validate: bool,
383 utf_strategy: S,
384 ) -> Result<&'s T>
385 where
386 T: ?Sized,
387 S: UtfOutputStrategy<T>,
388 {
389 loop {
390 let ch = tri!(next_or_eof(self));
391 if !ESCAPE_ALL[ch as usize] {
392 scratch.push(ch);
393 continue;
394 }
395 match ch {
396 b'"' => {
397 return utf_strategy.to_result_simple(self, scratch);
398 }
399 b'\\' => {
400 tri!(parse_escape(self, validate, scratch));
401 }
402 _ => {
403 if validate {
404 return error(self, ErrorCode::ControlCharacterWhileParsingString);
405 }
406 scratch.push(ch);
407 }
408 }
409 }
410 }
411 }
412
413 #[cfg(feature = "std")]
414 impl<'de, R> Read<'de> for IoRead<R>
415 where
416 R: io::Read,
417 {
replace_invalid_unicode(&self) -> bool418 fn replace_invalid_unicode(&self) -> bool {
419 false
420 }
421
allow_x_escapes(&self) -> bool422 fn allow_x_escapes(&self) -> bool {
423 false
424 }
425
allow_v_escapes(&self) -> bool426 fn allow_v_escapes(&self) -> bool {
427 false
428 }
429
430 #[inline]
next(&mut self) -> Result<Option<u8>>431 fn next(&mut self) -> Result<Option<u8>> {
432 match self.ch.take() {
433 Some(ch) => {
434 #[cfg(feature = "raw_value")]
435 {
436 if let Some(buf) = &mut self.raw_buffer {
437 buf.push(ch);
438 }
439 }
440 Ok(Some(ch))
441 }
442 None => match self.iter.next() {
443 Some(Err(err)) => Err(Error::io(err)),
444 Some(Ok(ch)) => {
445 #[cfg(feature = "raw_value")]
446 {
447 if let Some(buf) = &mut self.raw_buffer {
448 buf.push(ch);
449 }
450 }
451 Ok(Some(ch))
452 }
453 None => Ok(None),
454 },
455 }
456 }
457
458 #[inline]
peek(&mut self) -> Result<Option<u8>>459 fn peek(&mut self) -> Result<Option<u8>> {
460 match self.ch {
461 Some(ch) => Ok(Some(ch)),
462 None => match self.iter.next() {
463 Some(Err(err)) => Err(Error::io(err)),
464 Some(Ok(ch)) => {
465 self.ch = Some(ch);
466 Ok(self.ch)
467 }
468 None => Ok(None),
469 },
470 }
471 }
472
473 #[cfg(not(feature = "raw_value"))]
474 #[inline]
discard(&mut self)475 fn discard(&mut self) {
476 self.ch = None;
477 }
478
479 #[cfg(feature = "raw_value")]
discard(&mut self)480 fn discard(&mut self) {
481 if let Some(ch) = self.ch.take() {
482 if let Some(buf) = &mut self.raw_buffer {
483 buf.push(ch);
484 }
485 }
486 }
487
position(&self) -> Position488 fn position(&self) -> Position {
489 Position {
490 line: self.iter.line(),
491 column: self.iter.col(),
492 }
493 }
494
peek_position(&self) -> Position495 fn peek_position(&self) -> Position {
496 // The LineColIterator updates its position during peek() so it has the
497 // right one here.
498 self.position()
499 }
500
byte_offset(&self) -> usize501 fn byte_offset(&self) -> usize {
502 match self.ch {
503 Some(_) => self.iter.byte_offset() - 1,
504 None => self.iter.byte_offset(),
505 }
506 }
507
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>508 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
509 self.parse_str_bytes(scratch, true, StrUtfOutputStrategy)
510 .map(Reference::Copied)
511 }
512
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>513 fn parse_str_raw<'s>(
514 &'s mut self,
515 scratch: &'s mut Vec<u8>,
516 ) -> Result<Reference<'de, 's, [u8]>> {
517 self.parse_str_bytes(scratch, false, SliceUtfOutputStrategy)
518 .map(Reference::Copied)
519 }
520
ignore_str(&mut self) -> Result<()>521 fn ignore_str(&mut self) -> Result<()> {
522 loop {
523 let ch = tri!(next_or_eof(self));
524 if !ESCAPE_ALL[ch as usize] {
525 continue;
526 }
527 match ch {
528 b'"' => {
529 return Ok(());
530 }
531 b'\\' => {
532 tri!(ignore_escape(self));
533 }
534 _ => {
535 return error(self, ErrorCode::ControlCharacterWhileParsingString);
536 }
537 }
538 }
539 }
540
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>541 fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16> {
542 let mut n = 0;
543 for _ in 0..num_digits {
544 match decode_hex_val(tri!(next_or_eof(self))) {
545 None => return error(self, ErrorCode::InvalidEscape),
546 Some(val) => {
547 n = (n << 4) + val;
548 }
549 }
550 }
551 Ok(n)
552 }
553
554 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)555 fn begin_raw_buffering(&mut self) {
556 self.raw_buffer = Some(Vec::new());
557 }
558
559 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>,560 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
561 where
562 V: Visitor<'de>,
563 {
564 let raw = self.raw_buffer.take().unwrap();
565 let raw = match String::from_utf8(raw) {
566 Ok(raw) => raw,
567 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
568 };
569 visitor.visit_map(OwnedRawDeserializer {
570 raw_value: Some(raw),
571 })
572 }
573
574 const should_early_return_if_failed: bool = true;
575
576 #[inline]
577 #[cold]
set_failed(&mut self, failed: &mut bool)578 fn set_failed(&mut self, failed: &mut bool) {
579 *failed = true;
580 }
581 }
582
583 //////////////////////////////////////////////////////////////////////////////
584
585 impl<'a> SliceRead<'a> {
586 /// Create a JSON input source to read from a slice of bytes.
587 ///
588 /// The options are as follows:
589 /// - `replace_invalid_characters` - replace invalid characters with U+FFFD.
590 /// - `allow_newlines_in_string` - allow CR and LF characters in strings.
591 /// - `allow_control_characters_in_string` - allow control characters other than CR/LF in
592 /// strings.
593 /// - `allow_v_escapes` - allow `\v` in strings.
594 /// - `allow_x_escapes` - allow `\x##` in strings.
595 #[allow(clippy::fn_params_excessive_bools)]
new( slice: &'a [u8], replace_invalid_characters: bool, allow_newlines_in_string: bool, allow_control_characters_in_string: bool, allow_v_escapes: bool, allow_x_escapes: bool, ) -> Self596 pub fn new(
597 slice: &'a [u8],
598 replace_invalid_characters: bool,
599 allow_newlines_in_string: bool,
600 allow_control_characters_in_string: bool,
601 allow_v_escapes: bool,
602 allow_x_escapes: bool,
603 ) -> Self {
604 SliceRead {
605 slice,
606 index: 0,
607 replace_invalid_characters,
608 allow_newlines_in_string,
609 allow_control_characters_in_string,
610 allow_v_escapes,
611 allow_x_escapes,
612 #[cfg(feature = "raw_value")]
613 raw_buffering_start_index: 0,
614 }
615 }
616
617 /// Find the appropriate escaping table for the current set of options.
escapes(&self) -> &[bool; 256]618 fn escapes(&self) -> &[bool; 256] {
619 match (
620 self.allow_newlines_in_string,
621 self.allow_control_characters_in_string,
622 ) {
623 (false, false) => &ESCAPE_ALL,
624 (true, false) => &ESCAPE_NL_OK,
625 (false, true) => &ESCAPE_CONTROL_OK,
626 (true, true) => &ESCAPE_CONTROL_NL_OK,
627 }
628 }
629
position_of_index(&self, i: usize) -> Position630 fn position_of_index(&self, i: usize) -> Position {
631 let mut position = Position { line: 1, column: 0 };
632 for ch in &self.slice[..i] {
633 match *ch {
634 b'\n' => {
635 position.line += 1;
636 position.column = 0;
637 }
638 _ => {
639 position.column += 1;
640 }
641 }
642 }
643 position
644 }
645
646 /// The big optimization here over IoRead is that if the string contains no
647 /// backslash escape sequences, the returned &str is a slice of the raw JSON
648 /// data so we avoid copying into the scratch space.
649 #[allow(clippy::needless_pass_by_value)]
parse_str_bytes<'s, T, S>( &'s mut self, scratch: &'s mut Vec<u8>, validate: bool, utf_strategy: S, ) -> Result<Reference<'a, 's, T>> where T: ?Sized + 's, S: UtfOutputStrategy<T>,650 fn parse_str_bytes<'s, T, S>(
651 &'s mut self,
652 scratch: &'s mut Vec<u8>,
653 validate: bool,
654 utf_strategy: S,
655 ) -> Result<Reference<'a, 's, T>>
656 where
657 T: ?Sized + 's,
658 S: UtfOutputStrategy<T>,
659 {
660 // Index of the first byte not yet copied into the scratch space.
661 let mut start = self.index;
662
663 loop {
664 while self.index < self.slice.len() && !self.escapes()[self.slice[self.index] as usize]
665 {
666 self.index += 1;
667 }
668 if self.index == self.slice.len() {
669 return error(self, ErrorCode::EofWhileParsingString);
670 }
671 match self.slice[self.index] {
672 b'"' => {
673 if scratch.is_empty() {
674 // Fast path: return a slice of the raw JSON without any
675 // copying.
676 let borrowed = &self.slice[start..self.index];
677 self.index += 1;
678 return utf_strategy.to_result_direct(self, borrowed, scratch);
679 } else {
680 utf_strategy.extend_scratch(scratch, &self.slice[start..self.index]);
681 self.index += 1;
682 return utf_strategy
683 .to_result_from_scratch(self, scratch)
684 .map(|r| Reference::Copied(r));
685 }
686 }
687 b'\\' => {
688 utf_strategy.extend_scratch(scratch, &self.slice[start..self.index]);
689 self.index += 1;
690 tri!(parse_escape(self, validate, scratch));
691 start = self.index;
692 }
693 _ => {
694 self.index += 1;
695 if validate {
696 return error(self, ErrorCode::ControlCharacterWhileParsingString);
697 }
698 }
699 }
700 }
701 }
702 }
703
704 impl<'a> private::Sealed for SliceRead<'a> {}
705
706 impl<'a> Read<'a> for SliceRead<'a> {
replace_invalid_unicode(&self) -> bool707 fn replace_invalid_unicode(&self) -> bool {
708 self.replace_invalid_characters
709 }
710
allow_x_escapes(&self) -> bool711 fn allow_x_escapes(&self) -> bool {
712 self.allow_x_escapes
713 }
714
allow_v_escapes(&self) -> bool715 fn allow_v_escapes(&self) -> bool {
716 self.allow_v_escapes
717 }
718
719 #[inline]
next(&mut self) -> Result<Option<u8>>720 fn next(&mut self) -> Result<Option<u8>> {
721 // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
722 // is about 10% slower.
723 Ok(if self.index < self.slice.len() {
724 let ch = self.slice[self.index];
725 self.index += 1;
726 Some(ch)
727 } else {
728 None
729 })
730 }
731
732 #[inline]
peek(&mut self) -> Result<Option<u8>>733 fn peek(&mut self) -> Result<Option<u8>> {
734 // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
735 // for some reason.
736 Ok(if self.index < self.slice.len() {
737 Some(self.slice[self.index])
738 } else {
739 None
740 })
741 }
742
743 #[inline]
discard(&mut self)744 fn discard(&mut self) {
745 self.index += 1;
746 }
747
position(&self) -> Position748 fn position(&self) -> Position {
749 self.position_of_index(self.index)
750 }
751
peek_position(&self) -> Position752 fn peek_position(&self) -> Position {
753 // Cap it at slice.len() just in case the most recent call was next()
754 // and it returned the last byte.
755 self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
756 }
757
byte_offset(&self) -> usize758 fn byte_offset(&self) -> usize {
759 self.index
760 }
761
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>>762 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
763 if self.replace_invalid_characters {
764 self.parse_str_bytes(scratch, true, SubstitutingStrUtfOutputStrategy)
765 } else {
766 self.parse_str_bytes(scratch, true, StrUtfOutputStrategy)
767 }
768 }
769
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'a, 's, [u8]>>770 fn parse_str_raw<'s>(
771 &'s mut self,
772 scratch: &'s mut Vec<u8>,
773 ) -> Result<Reference<'a, 's, [u8]>> {
774 self.parse_str_bytes(scratch, false, SliceUtfOutputStrategy)
775 }
776
ignore_str(&mut self) -> Result<()>777 fn ignore_str(&mut self) -> Result<()> {
778 loop {
779 while self.index < self.slice.len() && !self.escapes()[self.slice[self.index] as usize]
780 {
781 self.index += 1;
782 }
783 if self.index == self.slice.len() {
784 return error(self, ErrorCode::EofWhileParsingString);
785 }
786 match self.slice[self.index] {
787 b'"' => {
788 self.index += 1;
789 return Ok(());
790 }
791 b'\\' => {
792 self.index += 1;
793 tri!(ignore_escape(self));
794 }
795 _ => {
796 return error(self, ErrorCode::ControlCharacterWhileParsingString);
797 }
798 }
799 }
800 }
801
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>802 fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16> {
803 if self.index + num_digits > self.slice.len() {
804 self.index = self.slice.len();
805 return error(self, ErrorCode::EofWhileParsingString);
806 }
807
808 let mut n = 0;
809 for _ in 0..num_digits {
810 let ch = decode_hex_val(self.slice[self.index]);
811 self.index += 1;
812 match ch {
813 None => return error(self, ErrorCode::InvalidEscape),
814 Some(val) => {
815 n = (n << 4) + val;
816 }
817 }
818 }
819 Ok(n)
820 }
821
822 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)823 fn begin_raw_buffering(&mut self) {
824 self.raw_buffering_start_index = self.index;
825 }
826
827 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'a>,828 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
829 where
830 V: Visitor<'a>,
831 {
832 let raw = &self.slice[self.raw_buffering_start_index..self.index];
833 let raw = match str::from_utf8(raw) {
834 Ok(raw) => raw,
835 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
836 };
837 visitor.visit_map(BorrowedRawDeserializer {
838 raw_value: Some(raw),
839 })
840 }
841
842 const should_early_return_if_failed: bool = false;
843
844 #[inline]
845 #[cold]
set_failed(&mut self, _failed: &mut bool)846 fn set_failed(&mut self, _failed: &mut bool) {
847 self.slice = &self.slice[..self.index];
848 }
849 }
850
851 //////////////////////////////////////////////////////////////////////////////
852
853 impl<'a> StrRead<'a> {
854 /// Create a JSON input source to read from a UTF-8 string.
new(s: &'a str) -> Self855 pub fn new(s: &'a str) -> Self {
856 StrRead {
857 delegate: SliceRead::new(s.as_bytes(), false, false, false, false, false),
858 #[cfg(feature = "raw_value")]
859 data: s,
860 }
861 }
862 }
863
864 impl<'a> private::Sealed for StrRead<'a> {}
865
866 impl<'a> Read<'a> for StrRead<'a> {
replace_invalid_unicode(&self) -> bool867 fn replace_invalid_unicode(&self) -> bool {
868 false
869 }
870
allow_x_escapes(&self) -> bool871 fn allow_x_escapes(&self) -> bool {
872 false
873 }
874
allow_v_escapes(&self) -> bool875 fn allow_v_escapes(&self) -> bool {
876 false
877 }
878
879 #[inline]
next(&mut self) -> Result<Option<u8>>880 fn next(&mut self) -> Result<Option<u8>> {
881 self.delegate.next()
882 }
883
884 #[inline]
peek(&mut self) -> Result<Option<u8>>885 fn peek(&mut self) -> Result<Option<u8>> {
886 self.delegate.peek()
887 }
888
889 #[inline]
discard(&mut self)890 fn discard(&mut self) {
891 self.delegate.discard();
892 }
893
position(&self) -> Position894 fn position(&self) -> Position {
895 self.delegate.position()
896 }
897
peek_position(&self) -> Position898 fn peek_position(&self) -> Position {
899 self.delegate.peek_position()
900 }
901
byte_offset(&self) -> usize902 fn byte_offset(&self) -> usize {
903 self.delegate.byte_offset()
904 }
905
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>>906 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
907 self.delegate
908 .parse_str_bytes(scratch, true, UncheckedStrUtfOutputStrategy)
909 }
910
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'a, 's, [u8]>>911 fn parse_str_raw<'s>(
912 &'s mut self,
913 scratch: &'s mut Vec<u8>,
914 ) -> Result<Reference<'a, 's, [u8]>> {
915 self.delegate.parse_str_raw(scratch)
916 }
917
ignore_str(&mut self) -> Result<()>918 fn ignore_str(&mut self) -> Result<()> {
919 self.delegate.ignore_str()
920 }
921
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>922 fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16> {
923 self.delegate.decode_hex_escape(num_digits)
924 }
925
926 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)927 fn begin_raw_buffering(&mut self) {
928 self.delegate.begin_raw_buffering();
929 }
930
931 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'a>,932 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
933 where
934 V: Visitor<'a>,
935 {
936 let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
937 visitor.visit_map(BorrowedRawDeserializer {
938 raw_value: Some(raw),
939 })
940 }
941
942 const should_early_return_if_failed: bool = false;
943
944 #[inline]
945 #[cold]
set_failed(&mut self, failed: &mut bool)946 fn set_failed(&mut self, failed: &mut bool) {
947 self.delegate.set_failed(failed);
948 }
949 }
950
951 //////////////////////////////////////////////////////////////////////////////
952
953 impl<'a, 'de, R> private::Sealed for &'a mut R where R: Read<'de> {}
954
955 impl<'a, 'de, R> Read<'de> for &'a mut R
956 where
957 R: Read<'de>,
958 {
next(&mut self) -> Result<Option<u8>>959 fn next(&mut self) -> Result<Option<u8>> {
960 R::next(self)
961 }
962
peek(&mut self) -> Result<Option<u8>>963 fn peek(&mut self) -> Result<Option<u8>> {
964 R::peek(self)
965 }
966
discard(&mut self)967 fn discard(&mut self) {
968 R::discard(self);
969 }
970
position(&self) -> Position971 fn position(&self) -> Position {
972 R::position(self)
973 }
974
peek_position(&self) -> Position975 fn peek_position(&self) -> Position {
976 R::peek_position(self)
977 }
978
byte_offset(&self) -> usize979 fn byte_offset(&self) -> usize {
980 R::byte_offset(self)
981 }
982
parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>983 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
984 R::parse_str(self, scratch)
985 }
986
parse_str_raw<'s>( &'s mut self, scratch: &'s mut Vec<u8>, ) -> Result<Reference<'de, 's, [u8]>>987 fn parse_str_raw<'s>(
988 &'s mut self,
989 scratch: &'s mut Vec<u8>,
990 ) -> Result<Reference<'de, 's, [u8]>> {
991 R::parse_str_raw(self, scratch)
992 }
993
ignore_str(&mut self) -> Result<()>994 fn ignore_str(&mut self) -> Result<()> {
995 R::ignore_str(self)
996 }
997
decode_hex_escape(&mut self, num_digits: usize) -> Result<u16>998 fn decode_hex_escape(&mut self, num_digits: usize) -> Result<u16> {
999 R::decode_hex_escape(self, num_digits)
1000 }
1001
1002 #[cfg(feature = "raw_value")]
begin_raw_buffering(&mut self)1003 fn begin_raw_buffering(&mut self) {
1004 R::begin_raw_buffering(self);
1005 }
1006
1007 #[cfg(feature = "raw_value")]
end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> where V: Visitor<'de>,1008 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
1009 where
1010 V: Visitor<'de>,
1011 {
1012 R::end_raw_buffering(self, visitor)
1013 }
1014
1015 const should_early_return_if_failed: bool = R::should_early_return_if_failed;
1016
set_failed(&mut self, failed: &mut bool)1017 fn set_failed(&mut self, failed: &mut bool) {
1018 R::set_failed(self, failed);
1019 }
1020
replace_invalid_unicode(&self) -> bool1021 fn replace_invalid_unicode(&self) -> bool {
1022 R::replace_invalid_unicode(self)
1023 }
1024
allow_x_escapes(&self) -> bool1025 fn allow_x_escapes(&self) -> bool {
1026 R::allow_x_escapes(self)
1027 }
1028
allow_v_escapes(&self) -> bool1029 fn allow_v_escapes(&self) -> bool {
1030 R::allow_v_escapes(self)
1031 }
1032 }
1033
1034 //////////////////////////////////////////////////////////////////////////////
1035
1036 /// Marker for whether StreamDeserializer can implement FusedIterator.
1037 pub trait Fused: private::Sealed {}
1038 impl<'a> Fused for SliceRead<'a> {}
1039 impl<'a> Fused for StrRead<'a> {}
1040
1041 const ESCAPE_ALL: [bool; 256] = get_escapes(false, false);
1042 const ESCAPE_CONTROL_OK: [bool; 256] = get_escapes(false, true);
1043 const ESCAPE_NL_OK: [bool; 256] = get_escapes(true, false);
1044 const ESCAPE_CONTROL_NL_OK: [bool; 256] = get_escapes(true, true);
1045
1046 // Lookup table of bytes that must be escaped. A value of true at index i means
1047 // that byte i requires an escape sequence in the input.
get_escapes( allow_newlines_in_string: bool, allow_control_characters_in_string: bool, ) -> [bool; 256]1048 const fn get_escapes(
1049 allow_newlines_in_string: bool,
1050 allow_control_characters_in_string: bool,
1051 ) -> [bool; 256] {
1052 #![allow(non_snake_case)]
1053 const QU: bool = true; // quote \x22
1054 const BS: bool = true; // backslash \x5C
1055 const __: bool = false; // allow unescaped
1056 let NL: bool = !allow_newlines_in_string; // CR / LF
1057 let CT: bool = !allow_control_characters_in_string; // other control character \x00..=\x1F
1058 [
1059 // 1 2 3 4 5 6 7 8 9 A B C D E F
1060 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, NL, CT, CT, NL, CT, CT, // 0
1061 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1
1062 __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
1063 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
1064 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
1065 __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
1066 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
1067 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
1068 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
1069 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
1070 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
1071 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
1072 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
1073 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
1074 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
1075 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
1076 ]
1077 }
1078
next_or_eof<'de, R>(read: &mut R) -> Result<u8> where R: ?Sized + Read<'de>,1079 fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
1080 where
1081 R: ?Sized + Read<'de>,
1082 {
1083 match tri!(read.next()) {
1084 Some(b) => Ok(b),
1085 None => error(read, ErrorCode::EofWhileParsingString),
1086 }
1087 }
1088
peek_or_eof<'de, R>(read: &mut R) -> Result<u8> where R: ?Sized + Read<'de>,1089 fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
1090 where
1091 R: ?Sized + Read<'de>,
1092 {
1093 match tri!(read.peek()) {
1094 Some(b) => Ok(b),
1095 None => error(read, ErrorCode::EofWhileParsingString),
1096 }
1097 }
1098
error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T> where R: ?Sized + Read<'de>,1099 fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
1100 where
1101 R: ?Sized + Read<'de>,
1102 {
1103 let position = read.position();
1104 Err(Error::syntax(reason, position.line, position.column))
1105 }
1106
1107 /// Parses a JSON escape sequence and appends it into the scratch space. Assumes
1108 /// the previous byte read was a backslash.
parse_escape<'de, R: Read<'de>>( read: &mut R, validate: bool, scratch: &mut Vec<u8>, ) -> Result<()>1109 fn parse_escape<'de, R: Read<'de>>(
1110 read: &mut R,
1111 validate: bool,
1112 scratch: &mut Vec<u8>,
1113 ) -> Result<()> {
1114 let ch = tri!(next_or_eof(read));
1115
1116 // In the event of an error, if replacing invalid unicode, just return REPLACEMENT CHARACTER.
1117 // Otherwise, discard the peeked byte representing the error if necessary and fall back to
1118 // error().
1119 let mut error_or_replace = |read: &mut R, need_discard, reason| {
1120 if read.replace_invalid_unicode() {
1121 scratch.extend("\u{fffd}".as_bytes());
1122 Ok(())
1123 } else {
1124 if need_discard {
1125 read.discard();
1126 }
1127 error(read, reason)
1128 }
1129 };
1130
1131 match ch {
1132 b'"' => scratch.push(b'"'),
1133 b'\\' => scratch.push(b'\\'),
1134 b'/' => scratch.push(b'/'),
1135 b'b' => scratch.push(b'\x08'),
1136 b'f' => scratch.push(b'\x0c'),
1137 b'n' => scratch.push(b'\n'),
1138 b'r' => scratch.push(b'\r'),
1139 b't' => scratch.push(b'\t'),
1140 b'v' if read.allow_v_escapes() => scratch.push(b'\x0b'),
1141 b'x' if read.allow_x_escapes() => {
1142 let c: u32 = tri!(read.decode_hex_escape(2)).into();
1143 let c = match char::from_u32(c) {
1144 Some(c) => c,
1145 None => {
1146 return error_or_replace(read, false, ErrorCode::InvalidUnicodeCodePoint);
1147 }
1148 };
1149 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
1150 }
1151 b'u' => {
1152 fn encode_surrogate(scratch: &mut Vec<u8>, n: u16) {
1153 scratch.extend_from_slice(&[
1154 (n >> 12 & 0b0000_1111) as u8 | 0b1110_0000,
1155 (n >> 6 & 0b0011_1111) as u8 | 0b1000_0000,
1156 (n & 0b0011_1111) as u8 | 0b1000_0000,
1157 ]);
1158 }
1159
1160 let c = match tri!(read.decode_hex_escape(4)) {
1161 n @ 0xDC00..=0xDFFF => {
1162 return if validate {
1163 error_or_replace(read, false, ErrorCode::LoneLeadingSurrogateInHexEscape)
1164 } else {
1165 encode_surrogate(scratch, n);
1166 Ok(())
1167 };
1168 }
1169
1170 // Non-BMP characters are encoded as a sequence of two hex
1171 // escapes, representing UTF-16 surrogates. If deserializing a
1172 // utf-8 string the surrogates are required to be paired,
1173 // whereas deserializing a byte string accepts lone surrogates.
1174 n1 @ 0xD800..=0xDBFF => {
1175 if tri!(peek_or_eof(read)) == b'\\' {
1176 read.discard();
1177 } else {
1178 return if validate {
1179 error_or_replace(read, true, ErrorCode::UnexpectedEndOfHexEscape)
1180 } else {
1181 encode_surrogate(scratch, n1);
1182 Ok(())
1183 };
1184 }
1185
1186 if tri!(peek_or_eof(read)) == b'u' {
1187 read.discard();
1188 } else {
1189 return if validate {
1190 error_or_replace(read, true, ErrorCode::UnexpectedEndOfHexEscape)
1191 } else {
1192 encode_surrogate(scratch, n1);
1193 // The \ prior to this byte started an escape sequence,
1194 // so we need to parse that now. This recursive call
1195 // does not blow the stack on malicious input because
1196 // the escape is not \u, so it will be handled by one
1197 // of the easy nonrecursive cases.
1198 parse_escape(read, validate, scratch)
1199 };
1200 }
1201
1202 let n2 = tri!(read.decode_hex_escape(4));
1203
1204 if n2 < 0xDC00 || n2 > 0xDFFF {
1205 return error_or_replace(
1206 read,
1207 false,
1208 ErrorCode::LoneLeadingSurrogateInHexEscape,
1209 );
1210 }
1211
1212 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
1213
1214 match char::from_u32(n) {
1215 Some(c) => c,
1216 None => {
1217 return error_or_replace(
1218 read,
1219 false,
1220 ErrorCode::InvalidUnicodeCodePoint,
1221 );
1222 }
1223 }
1224 }
1225
1226 // Every u16 outside of the surrogate ranges above is guaranteed
1227 // to be a legal char.
1228 n => char::from_u32(n as u32).unwrap(),
1229 };
1230
1231 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
1232 }
1233 _ => {
1234 return error(read, ErrorCode::InvalidEscape);
1235 }
1236 }
1237
1238 Ok(())
1239 }
1240
1241 /// Parses a JSON escape sequence and discards the value. Assumes the previous
1242 /// byte read was a backslash.
ignore_escape<'de, R>(read: &mut R) -> Result<()> where R: ?Sized + Read<'de>,1243 fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
1244 where
1245 R: ?Sized + Read<'de>,
1246 {
1247 let ch = tri!(next_or_eof(read));
1248
1249 match ch {
1250 b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' | b'v' => {}
1251 b'u' => {
1252 // At this point we don't care if the codepoint is valid. We just
1253 // want to consume it. We don't actually know what is valid or not
1254 // at this point, because that depends on if this string will
1255 // ultimately be parsed into a string or a byte buffer in the "real"
1256 // parse.
1257
1258 tri!(read.decode_hex_escape(4));
1259 }
1260 b'x' => {
1261 let c: u32 = tri!(read.decode_hex_escape(2)).into();
1262 match char::from_u32(c) {
1263 Some(_) => {}
1264 None => {
1265 return error(read, ErrorCode::InvalidUnicodeCodePoint);
1266 }
1267 };
1268 }
1269 _ => {
1270 return error(read, ErrorCode::InvalidEscape);
1271 }
1272 }
1273
1274 Ok(())
1275 }
1276
1277 static HEX: [u8; 256] = {
1278 const __: u8 = 255; // not a hex digit
1279 [
1280 // 1 2 3 4 5 6 7 8 9 A B C D E F
1281 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
1282 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
1283 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
1284 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3
1285 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4
1286 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
1287 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6
1288 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
1289 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
1290 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
1291 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
1292 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
1293 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
1294 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
1295 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
1296 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
1297 ]
1298 };
1299
decode_hex_val(val: u8) -> Option<u16>1300 fn decode_hex_val(val: u8) -> Option<u16> {
1301 let n = HEX[val as usize] as u16;
1302 if n == 255 {
1303 None
1304 } else {
1305 Some(n)
1306 }
1307 }
1308