1 // Copyright 2024 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //! Parsing logic for V1 data elements, header + contents 16 17 use crate::extended::{de_requires_extended_bit, de_type::DeType, deserialize, DeLength}; 18 use array_view::ArrayView; 19 use nom::{branch, bytes, combinator, error, number, sequence}; 20 use np_hkdf::v1_salt; 21 22 #[cfg(test)] 23 mod tests; 24 25 /// A deserialized data element in a section. 26 /// 27 /// The DE has been processed to the point of exposing a DE type and its contents as a `&[u8]`, but 28 /// no DE-type-specific processing has been performed. 29 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 30 pub struct DataElement<'adv> { 31 offset: v1_salt::DataElementOffset, 32 de_type: DeType, 33 contents: &'adv [u8], 34 } 35 36 impl<'adv> DataElement<'adv> { new( offset: v1_salt::DataElementOffset, de_type: DeType, contents: &'adv [u8], ) -> Self37 pub(crate) fn new( 38 offset: v1_salt::DataElementOffset, 39 de_type: DeType, 40 contents: &'adv [u8], 41 ) -> Self { 42 Self { offset, de_type, contents } 43 } 44 } 45 46 impl DeHeader { parse(input: &[u8]) -> nom::IResult<&[u8], DeHeader>47 pub(crate) fn parse(input: &[u8]) -> nom::IResult<&[u8], DeHeader> { 48 // 1-byte header: 0b0LLLTTTT 49 let parse_single_byte_de_header = 50 combinator::map_opt::<&[u8], _, DeHeader, error::Error<&[u8]>, _, _>( 51 combinator::consumed(combinator::map_res( 52 combinator::verify(number::complete::u8, |&b| !deserialize::hi_bit_set(b)), 53 |b| { 54 // L bits 55 let len = (b >> 4) & 0x07; 56 // T bits 57 let de_type = ((b & 0x0F) as u32).into(); 58 59 len.try_into().map(|l| (l, de_type)) 60 }, 61 )), 62 |(header_bytes, (len, de_type))| { 63 ArrayView::try_from_slice(header_bytes).map(|header_bytes| DeHeader { 64 header_bytes, 65 contents_len: len, 66 de_type, 67 }) 68 }, 69 ); 70 71 // multi-byte headers: 0b1LLLLLLL (0b1TTTTTTT)* 0b0TTTTTTT 72 // leading 1 in first byte = multibyte format 73 // leading 1 in subsequent bytes = there is at least 1 more type bytes 74 // leading 0 = this is the last header byte 75 // 127-bit length, effectively infinite type bit length 76 77 // It's conceivable to have non-canonical extended type sequences where 1 or more leading 78 // bytes don't have any bits set (other than the marker hi bit), thereby contributing nothing 79 // to the final value. 80 // To prevent that, we require that either there be only 1 type byte, or that the first of the 81 // multiple type bytes must have a value bit set. It's OK to have no value bits in subsequent 82 // type bytes. 83 84 let parse_ext_de_header = combinator::verify( 85 combinator::map_opt( 86 combinator::consumed(sequence::pair( 87 // length byte w/ leading 1 88 combinator::map_res( 89 combinator::verify(number::complete::u8::<&[u8], _>, |&b| { 90 deserialize::hi_bit_set(b) 91 }), 92 // snag the lower 7 bits 93 |b| (b & 0x7F).try_into(), 94 ), 95 branch::alt(( 96 // 1 type byte case 97 combinator::recognize( 98 // 0-hi-bit type code byte 99 combinator::verify(number::complete::u8, |&b| { 100 !deserialize::hi_bit_set(b) 101 }), 102 ), 103 // multiple type byte case: leading type byte must have at least 1 value bit 104 combinator::recognize(sequence::tuple(( 105 // hi bit and at least 1 value bit, otherwise it would be non-canonical 106 combinator::verify(number::complete::u8, |&b| { 107 deserialize::hi_bit_set(b) && (b & 0x7F != 0) 108 }), 109 // 0-3 1-hi-bit type code bytes with any bit pattern. Max is 3 since two 7 110 // bit type chunks are processed before and after this, for a total of 5, 111 // and that's as many 7-bit chunks as are needed to support a 32-bit type. 112 bytes::complete::take_while_m_n(0, 3, deserialize::hi_bit_set), 113 // final 0-hi-bit type code byte 114 combinator::verify(number::complete::u8, |&b| { 115 !deserialize::hi_bit_set(b) 116 }), 117 ))), 118 )), 119 )), 120 |(header_bytes, (len, type_bytes))| { 121 // snag the low 7 bits of each type byte and accumulate 122 type_bytes 123 .iter() 124 .try_fold(0_u64, |accum, b| { 125 accum.checked_shl(7).map(|n| n + ((b & 0x7F) as u64)) 126 }) 127 .and_then(|type_code| u32::try_from(type_code).ok()) 128 .and_then(|type_code| { 129 ArrayView::try_from_slice(header_bytes).map(|header_bytes| DeHeader { 130 header_bytes, 131 contents_len: len, 132 de_type: type_code.into(), 133 }) 134 }) 135 }, 136 ), 137 |header| { 138 // verify that the length and type code actually require use of the extended bit 139 de_requires_extended_bit(header.de_type.as_u32(), header.contents_len.len) 140 }, 141 ); 142 143 branch::alt((parse_single_byte_de_header, parse_ext_de_header))(input) 144 } 145 } 146 147 impl<'adv> DataElement<'adv> { 148 /// The offset of the DE in its containing Section. 149 /// 150 /// Used with the section salt to derive per-DE salt. offset(&self) -> v1_salt::DataElementOffset151 pub fn offset(&self) -> v1_salt::DataElementOffset { 152 self.offset 153 } 154 /// The type of the DE de_type(&self) -> DeType155 pub fn de_type(&self) -> DeType { 156 self.de_type 157 } 158 /// The contents of the DE contents(&self) -> &'adv [u8]159 pub fn contents(&self) -> &'adv [u8] { 160 self.contents 161 } 162 } 163 164 /// An iterator that parses the given data elements iteratively. In environments where memory is 165 /// not severely constrained, it is usually safer to collect this into `Result<Vec<DataElement>>` 166 /// so the validity of the whole advertisement can be checked before proceeding with further 167 /// processing. 168 #[derive(Debug)] 169 pub struct DataElementParsingIterator<'adv> { 170 input: &'adv [u8], 171 // The index of the data element this is currently at 172 offset: u8, 173 } 174 175 impl<'adv> DataElementParsingIterator<'adv> { new(input: &'adv [u8]) -> Self176 pub(crate) fn new(input: &'adv [u8]) -> Self { 177 Self { input, offset: 0 } 178 } 179 } 180 181 impl<'adv> Iterator for DataElementParsingIterator<'adv> { 182 type Item = Result<DataElement<'adv>, DataElementParseError>; 183 next(&mut self) -> Option<Self::Item>184 fn next(&mut self) -> Option<Self::Item> { 185 match ProtoDataElement::parse(self.input) { 186 Ok((rem, pde)) => { 187 self.input = rem; 188 let current_offset = self.offset; 189 self.offset = if let Some(offset) = self.offset.checked_add(1) { 190 offset 191 } else { 192 return Some(Err(DataElementParseError::TooManyDataElements)); 193 }; 194 Some(Ok(pde.into_data_element(v1_salt::DataElementOffset::from(current_offset)))) 195 } 196 Err(nom::Err::Failure(e)) => Some(Err(DataElementParseError::NomError(e.code))), 197 Err(nom::Err::Incomplete(_)) => { 198 panic!("Should always complete since we are parsing using the `nom::complete` APIs") 199 } 200 Err(nom::Err::Error(_)) => { 201 // nom `Error` is recoverable, it usually means we should move on the parsing the 202 // next section. There is nothing after data elements within a section, so we just 203 // check that there is no remaining data. 204 if !self.input.is_empty() { 205 return Some(Err(DataElementParseError::UnexpectedDataAfterEnd)); 206 } 207 None 208 } 209 } 210 } 211 } 212 213 /// The error that may arise while parsing data elements. 214 #[derive(Debug, PartialEq, Eq)] 215 pub enum DataElementParseError { 216 /// Unexpected data found after the end of the data elements portion. This means either the 217 /// parser was fed with additional data (it should only be given the bytes within a section, 218 /// not the whole advertisement), or the length field in the header of the data element is 219 /// malformed. 220 UnexpectedDataAfterEnd, 221 /// There are too many data elements in the advertisement. The maximum number supported by the 222 /// current parsing logic is 255. 223 TooManyDataElements, 224 /// A parse error is returned during nom. 225 NomError(error::ErrorKind), 226 } 227 228 /// Deserialize-specific version of a DE header that incorporates the header length. 229 /// This is needed for encrypted identities that need to construct a slice of everything in the 230 /// section following the identity DE header. 231 #[derive(Debug, PartialEq, Eq, Clone)] 232 pub(crate) struct DeHeader { 233 /// The original bytes of the header, at most 6 bytes long (1 byte len, 5 bytes type) 234 pub(crate) header_bytes: ArrayView<u8, 6>, 235 pub(crate) de_type: DeType, 236 pub(crate) contents_len: DeLength, 237 } 238 239 /// An intermediate stage in parsing a [DataElement] that lacks `offset`. 240 #[derive(Debug, PartialEq, Eq)] 241 pub struct ProtoDataElement<'d> { 242 header: DeHeader, 243 /// `len()` must equal `header.contents_len` 244 contents: &'d [u8], 245 } 246 247 impl<'d> ProtoDataElement<'d> { parse(input: &[u8]) -> nom::IResult<&[u8], ProtoDataElement>248 pub(crate) fn parse(input: &[u8]) -> nom::IResult<&[u8], ProtoDataElement> { 249 let (remaining, header) = DeHeader::parse(input)?; 250 let len = header.contents_len; 251 combinator::map(bytes::complete::take(len.as_u8()), move |slice| { 252 let header_clone = header.clone(); 253 ProtoDataElement { header: header_clone, contents: slice } 254 })(remaining) 255 } 256 into_data_element(self, offset: v1_salt::DataElementOffset) -> DataElement<'d>257 fn into_data_element(self, offset: v1_salt::DataElementOffset) -> DataElement<'d> { 258 DataElement::new(offset, self.header.de_type, self.contents) 259 } 260 } 261