1 use super::*;
2 
3 use ciborium_io::Read;
4 
5 use core::marker::PhantomData;
6 
7 /// A parser for incoming segments
8 pub trait Parser: Default {
9     /// The type of item that is parsed
10     type Item: ?Sized;
11 
12     /// The parsing error that may occur
13     type Error;
14 
15     /// The main parsing function
16     ///
17     /// This function processes the incoming bytes and returns the item.
18     ///
19     /// One important detail that **MUST NOT** be overlooked is that the
20     /// parser may save data from a previous parsing attempt. The number of
21     /// bytes saved is indicated by the `Parser::saved()` function. The saved
22     /// bytes will be copied into the beginning of the `bytes` array before
23     /// processing. Therefore, two requirements should be met.
24     ///
25     /// First, the incoming byte slice should be larger than the saved bytes.
26     ///
27     /// Second, the incoming byte slice should contain new bytes only after
28     /// the saved byte prefix.
29     ///
30     /// If both criteria are met, this allows the parser to prepend its saved
31     /// bytes without any additional allocation.
parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>32     fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
33 
34     /// Indicates the number of saved bytes in the parser
saved(&self) -> usize35     fn saved(&self) -> usize {
36         0
37     }
38 }
39 
40 /// A bytes parser
41 ///
42 /// No actual processing is performed and the input bytes are directly
43 /// returned. This implies that this parser never saves any bytes internally.
44 #[derive(Default)]
45 pub struct Bytes(());
46 
47 impl Parser for Bytes {
48     type Item = [u8];
49     type Error = core::convert::Infallible;
50 
parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error>51     fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
52         Ok(bytes)
53     }
54 }
55 
56 /// A text parser
57 ///
58 /// This parser converts the input bytes to a `str`. This parser preserves
59 /// trailing invalid UTF-8 sequences in the case that chunking fell in the
60 /// middle of a valid UTF-8 character.
61 #[derive(Default)]
62 pub struct Text {
63     stored: usize,
64     buffer: [u8; 3],
65 }
66 
67 impl Parser for Text {
68     type Item = str;
69     type Error = core::str::Utf8Error;
70 
parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error>71     fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
72         // If we cannot advance, return nothing.
73         if bytes.len() <= self.stored {
74             return Ok("");
75         }
76 
77         // Copy previously invalid data into place.
78         bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
79 
80         Ok(match core::str::from_utf8(bytes) {
81             Ok(s) => {
82                 self.stored = 0;
83                 s
84             }
85             Err(e) => {
86                 let valid_len = e.valid_up_to();
87                 let invalid_len = bytes.len() - valid_len;
88 
89                 // If the size of the invalid UTF-8 is large enough to hold
90                 // all valid UTF-8 characters, we have a syntax error.
91                 if invalid_len > self.buffer.len() {
92                     return Err(e);
93                 }
94 
95                 // Otherwise, store the invalid bytes for the next read cycle.
96                 self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
97                 self.stored = invalid_len;
98 
99                 // Decode the valid part of the string.
100                 core::str::from_utf8(&bytes[..valid_len]).unwrap()
101             }
102         })
103     }
104 
saved(&self) -> usize105     fn saved(&self) -> usize {
106         self.stored
107     }
108 }
109 
110 /// A CBOR segment
111 ///
112 /// This type represents a single bytes or text segment on the wire. It can be
113 /// read out in parsed chunks based on the size of the input scratch buffer.
114 pub struct Segment<'r, R: Read, P: Parser> {
115     reader: &'r mut Decoder<R>,
116     unread: usize,
117     offset: usize,
118     parser: P,
119 }
120 
121 impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
122     /// Gets the number of unprocessed bytes
123     #[inline]
left(&self) -> usize124     pub fn left(&self) -> usize {
125         self.unread + self.parser.saved()
126     }
127 
128     /// Gets the next parsed chunk within the segment
129     ///
130     /// Returns `Ok(None)` when all chunks have been read.
131     #[inline]
pull<'a>( &mut self, buffer: &'a mut [u8], ) -> Result<Option<&'a P::Item>, Error<R::Error>>132     pub fn pull<'a>(
133         &mut self,
134         buffer: &'a mut [u8],
135     ) -> Result<Option<&'a P::Item>, Error<R::Error>> {
136         use core::cmp::min;
137 
138         let prev = self.parser.saved();
139         match self.unread {
140             0 if prev == 0 => return Ok(None),
141             0 => return Err(Error::Syntax(self.offset)),
142             _ => (),
143         }
144 
145         // Determine how many bytes to read.
146         let size = min(buffer.len(), prev + self.unread);
147         let full = &mut buffer[..size];
148         let next = &mut full[min(size, prev)..];
149 
150         // Read additional bytes.
151         self.reader.read_exact(next)?;
152         self.unread -= next.len();
153 
154         self.parser
155             .parse(full)
156             .or(Err(Error::Syntax(self.offset)))
157             .map(Some)
158     }
159 }
160 
161 /// A sequence of CBOR segments
162 ///
163 /// CBOR allows for bytes or text items to be segmented. This type represents
164 /// the state of that segmented input stream.
165 pub struct Segments<'r, R: Read, P: Parser> {
166     reader: &'r mut Decoder<R>,
167     finish: bool,
168     nested: usize,
169     parser: PhantomData<P>,
170     unwrap: fn(Header) -> Result<Option<usize>, ()>,
171 }
172 
173 impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
174     #[inline]
new( decoder: &'r mut Decoder<R>, unwrap: fn(Header) -> Result<Option<usize>, ()>, ) -> Self175     pub(crate) fn new(
176         decoder: &'r mut Decoder<R>,
177         unwrap: fn(Header) -> Result<Option<usize>, ()>,
178     ) -> Self {
179         Self {
180             reader: decoder,
181             finish: false,
182             nested: 0,
183             parser: PhantomData,
184             unwrap,
185         }
186     }
187 
188     /// Gets the next segment in the stream
189     ///
190     /// Returns `Ok(None)` at the conclusion of the stream.
191     #[inline]
pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>>192     pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
193         while !self.finish {
194             let offset = self.reader.offset();
195             match self.reader.pull()? {
196                 Header::Break if self.nested == 1 => return Ok(None),
197                 Header::Break if self.nested > 1 => self.nested -= 1,
198                 header => match (self.unwrap)(header) {
199                     Err(..) => return Err(Error::Syntax(offset)),
200                     Ok(None) => self.nested += 1,
201                     Ok(Some(len)) => {
202                         self.finish = self.nested == 0;
203                         return Ok(Some(Segment {
204                             reader: self.reader,
205                             unread: len,
206                             offset,
207                             parser: P::default(),
208                         }));
209                     }
210                 },
211             }
212         }
213 
214         Ok(None)
215     }
216 }
217