1 use super::*; 2 3 use ciborium_io::Read; 4 5 use core::marker::PhantomData; 6 7 /// A parser for incoming segments 8 pub trait Parser: Default { 9 /// The type of item that is parsed 10 type Item: ?Sized; 11 12 /// The parsing error that may occur 13 type Error; 14 15 /// The main parsing function 16 /// 17 /// This function processes the incoming bytes and returns the item. 18 /// 19 /// One important detail that **MUST NOT** be overlooked is that the 20 /// parser may save data from a previous parsing attempt. The number of 21 /// bytes saved is indicated by the `Parser::saved()` function. The saved 22 /// bytes will be copied into the beginning of the `bytes` array before 23 /// processing. Therefore, two requirements should be met. 24 /// 25 /// First, the incoming byte slice should be larger than the saved bytes. 26 /// 27 /// Second, the incoming byte slice should contain new bytes only after 28 /// the saved byte prefix. 29 /// 30 /// If both criteria are met, this allows the parser to prepend its saved 31 /// bytes without any additional allocation. parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>32 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>; 33 34 /// Indicates the number of saved bytes in the parser saved(&self) -> usize35 fn saved(&self) -> usize { 36 0 37 } 38 } 39 40 /// A bytes parser 41 /// 42 /// No actual processing is performed and the input bytes are directly 43 /// returned. This implies that this parser never saves any bytes internally. 44 #[derive(Default)] 45 pub struct Bytes(()); 46 47 impl Parser for Bytes { 48 type Item = [u8]; 49 type Error = core::convert::Infallible; 50 parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error>51 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> { 52 Ok(bytes) 53 } 54 } 55 56 /// A text parser 57 /// 58 /// This parser converts the input bytes to a `str`. This parser preserves 59 /// trailing invalid UTF-8 sequences in the case that chunking fell in the 60 /// middle of a valid UTF-8 character. 61 #[derive(Default)] 62 pub struct Text { 63 stored: usize, 64 buffer: [u8; 3], 65 } 66 67 impl Parser for Text { 68 type Item = str; 69 type Error = core::str::Utf8Error; 70 parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error>71 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> { 72 // If we cannot advance, return nothing. 73 if bytes.len() <= self.stored { 74 return Ok(""); 75 } 76 77 // Copy previously invalid data into place. 78 bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]); 79 80 Ok(match core::str::from_utf8(bytes) { 81 Ok(s) => { 82 self.stored = 0; 83 s 84 } 85 Err(e) => { 86 let valid_len = e.valid_up_to(); 87 let invalid_len = bytes.len() - valid_len; 88 89 // If the size of the invalid UTF-8 is large enough to hold 90 // all valid UTF-8 characters, we have a syntax error. 91 if invalid_len > self.buffer.len() { 92 return Err(e); 93 } 94 95 // Otherwise, store the invalid bytes for the next read cycle. 96 self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]); 97 self.stored = invalid_len; 98 99 // Decode the valid part of the string. 100 core::str::from_utf8(&bytes[..valid_len]).unwrap() 101 } 102 }) 103 } 104 saved(&self) -> usize105 fn saved(&self) -> usize { 106 self.stored 107 } 108 } 109 110 /// A CBOR segment 111 /// 112 /// This type represents a single bytes or text segment on the wire. It can be 113 /// read out in parsed chunks based on the size of the input scratch buffer. 114 pub struct Segment<'r, R: Read, P: Parser> { 115 reader: &'r mut Decoder<R>, 116 unread: usize, 117 offset: usize, 118 parser: P, 119 } 120 121 impl<'r, R: Read, P: Parser> Segment<'r, R, P> { 122 /// Gets the number of unprocessed bytes 123 #[inline] left(&self) -> usize124 pub fn left(&self) -> usize { 125 self.unread + self.parser.saved() 126 } 127 128 /// Gets the next parsed chunk within the segment 129 /// 130 /// Returns `Ok(None)` when all chunks have been read. 131 #[inline] pull<'a>( &mut self, buffer: &'a mut [u8], ) -> Result<Option<&'a P::Item>, Error<R::Error>>132 pub fn pull<'a>( 133 &mut self, 134 buffer: &'a mut [u8], 135 ) -> Result<Option<&'a P::Item>, Error<R::Error>> { 136 use core::cmp::min; 137 138 let prev = self.parser.saved(); 139 match self.unread { 140 0 if prev == 0 => return Ok(None), 141 0 => return Err(Error::Syntax(self.offset)), 142 _ => (), 143 } 144 145 // Determine how many bytes to read. 146 let size = min(buffer.len(), prev + self.unread); 147 let full = &mut buffer[..size]; 148 let next = &mut full[min(size, prev)..]; 149 150 // Read additional bytes. 151 self.reader.read_exact(next)?; 152 self.unread -= next.len(); 153 154 self.parser 155 .parse(full) 156 .or(Err(Error::Syntax(self.offset))) 157 .map(Some) 158 } 159 } 160 161 /// A sequence of CBOR segments 162 /// 163 /// CBOR allows for bytes or text items to be segmented. This type represents 164 /// the state of that segmented input stream. 165 pub struct Segments<'r, R: Read, P: Parser> { 166 reader: &'r mut Decoder<R>, 167 finish: bool, 168 nested: usize, 169 parser: PhantomData<P>, 170 unwrap: fn(Header) -> Result<Option<usize>, ()>, 171 } 172 173 impl<'r, R: Read, P: Parser> Segments<'r, R, P> { 174 #[inline] new( decoder: &'r mut Decoder<R>, unwrap: fn(Header) -> Result<Option<usize>, ()>, ) -> Self175 pub(crate) fn new( 176 decoder: &'r mut Decoder<R>, 177 unwrap: fn(Header) -> Result<Option<usize>, ()>, 178 ) -> Self { 179 Self { 180 reader: decoder, 181 finish: false, 182 nested: 0, 183 parser: PhantomData, 184 unwrap, 185 } 186 } 187 188 /// Gets the next segment in the stream 189 /// 190 /// Returns `Ok(None)` at the conclusion of the stream. 191 #[inline] pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>>192 pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> { 193 while !self.finish { 194 let offset = self.reader.offset(); 195 match self.reader.pull()? { 196 Header::Break if self.nested == 1 => return Ok(None), 197 Header::Break if self.nested > 1 => self.nested -= 1, 198 header => match (self.unwrap)(header) { 199 Err(..) => return Err(Error::Syntax(offset)), 200 Ok(None) => self.nested += 1, 201 Ok(Some(len)) => { 202 self.finish = self.nested == 0; 203 return Ok(Some(Segment { 204 reader: self.reader, 205 unread: len, 206 offset, 207 parser: P::default(), 208 })); 209 } 210 }, 211 } 212 } 213 214 Ok(None) 215 } 216 } 217