1 // Copyright 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 //! Parsing logic for V1 data elements, header + contents
16 
17 use crate::extended::{de_requires_extended_bit, de_type::DeType, deserialize, DeLength};
18 use array_view::ArrayView;
19 use nom::{branch, bytes, combinator, error, number, sequence};
20 use np_hkdf::v1_salt;
21 
22 #[cfg(test)]
23 mod tests;
24 
25 /// A deserialized data element in a section.
26 ///
27 /// The DE has been processed to the point of exposing a DE type and its contents as a `&[u8]`, but
28 /// no DE-type-specific processing has been performed.
29 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
30 pub struct DataElement<'adv> {
31     offset: v1_salt::DataElementOffset,
32     de_type: DeType,
33     contents: &'adv [u8],
34 }
35 
36 impl<'adv> DataElement<'adv> {
new( offset: v1_salt::DataElementOffset, de_type: DeType, contents: &'adv [u8], ) -> Self37     pub(crate) fn new(
38         offset: v1_salt::DataElementOffset,
39         de_type: DeType,
40         contents: &'adv [u8],
41     ) -> Self {
42         Self { offset, de_type, contents }
43     }
44 }
45 
46 impl DeHeader {
parse(input: &[u8]) -> nom::IResult<&[u8], DeHeader>47     pub(crate) fn parse(input: &[u8]) -> nom::IResult<&[u8], DeHeader> {
48         // 1-byte header: 0b0LLLTTTT
49         let parse_single_byte_de_header =
50             combinator::map_opt::<&[u8], _, DeHeader, error::Error<&[u8]>, _, _>(
51                 combinator::consumed(combinator::map_res(
52                     combinator::verify(number::complete::u8, |&b| !deserialize::hi_bit_set(b)),
53                     |b| {
54                         // L bits
55                         let len = (b >> 4) & 0x07;
56                         // T bits
57                         let de_type = ((b & 0x0F) as u32).into();
58 
59                         len.try_into().map(|l| (l, de_type))
60                     },
61                 )),
62                 |(header_bytes, (len, de_type))| {
63                     ArrayView::try_from_slice(header_bytes).map(|header_bytes| DeHeader {
64                         header_bytes,
65                         contents_len: len,
66                         de_type,
67                     })
68                 },
69             );
70 
71         // multi-byte headers: 0b1LLLLLLL (0b1TTTTTTT)* 0b0TTTTTTT
72         // leading 1 in first byte = multibyte format
73         // leading 1 in subsequent bytes = there is at least 1 more type bytes
74         // leading 0 = this is the last header byte
75         // 127-bit length, effectively infinite type bit length
76 
77         // It's conceivable to have non-canonical extended type sequences where 1 or more leading
78         // bytes don't have any bits set (other than the marker hi bit), thereby contributing nothing
79         // to the final value.
80         // To prevent that, we require that either there be only 1 type byte, or that the first of the
81         // multiple type bytes must have a value bit set. It's OK to have no value bits in subsequent
82         // type bytes.
83 
84         let parse_ext_de_header = combinator::verify(
85             combinator::map_opt(
86                 combinator::consumed(sequence::pair(
87                     // length byte w/ leading 1
88                     combinator::map_res(
89                         combinator::verify(number::complete::u8::<&[u8], _>, |&b| {
90                             deserialize::hi_bit_set(b)
91                         }),
92                         // snag the lower 7 bits
93                         |b| (b & 0x7F).try_into(),
94                     ),
95                     branch::alt((
96                         // 1 type byte case
97                         combinator::recognize(
98                             // 0-hi-bit type code byte
99                             combinator::verify(number::complete::u8, |&b| {
100                                 !deserialize::hi_bit_set(b)
101                             }),
102                         ),
103                         // multiple type byte case: leading type byte must have at least 1 value bit
104                         combinator::recognize(sequence::tuple((
105                             // hi bit and at least 1 value bit, otherwise it would be non-canonical
106                             combinator::verify(number::complete::u8, |&b| {
107                                 deserialize::hi_bit_set(b) && (b & 0x7F != 0)
108                             }),
109                             // 0-3 1-hi-bit type code bytes with any bit pattern. Max is 3 since two 7
110                             // bit type chunks are processed before and after this, for a total of 5,
111                             // and that's as many 7-bit chunks as are needed to support a 32-bit type.
112                             bytes::complete::take_while_m_n(0, 3, deserialize::hi_bit_set),
113                             // final 0-hi-bit type code byte
114                             combinator::verify(number::complete::u8, |&b| {
115                                 !deserialize::hi_bit_set(b)
116                             }),
117                         ))),
118                     )),
119                 )),
120                 |(header_bytes, (len, type_bytes))| {
121                     // snag the low 7 bits of each type byte and accumulate
122                     type_bytes
123                         .iter()
124                         .try_fold(0_u64, |accum, b| {
125                             accum.checked_shl(7).map(|n| n + ((b & 0x7F) as u64))
126                         })
127                         .and_then(|type_code| u32::try_from(type_code).ok())
128                         .and_then(|type_code| {
129                             ArrayView::try_from_slice(header_bytes).map(|header_bytes| DeHeader {
130                                 header_bytes,
131                                 contents_len: len,
132                                 de_type: type_code.into(),
133                             })
134                         })
135                 },
136             ),
137             |header| {
138                 // verify that the length and type code actually require use of the extended bit
139                 de_requires_extended_bit(header.de_type.as_u32(), header.contents_len.len)
140             },
141         );
142 
143         branch::alt((parse_single_byte_de_header, parse_ext_de_header))(input)
144     }
145 }
146 
147 impl<'adv> DataElement<'adv> {
148     /// The offset of the DE in its containing Section.
149     ///
150     /// Used with the section salt to derive per-DE salt.
offset(&self) -> v1_salt::DataElementOffset151     pub fn offset(&self) -> v1_salt::DataElementOffset {
152         self.offset
153     }
154     /// The type of the DE
de_type(&self) -> DeType155     pub fn de_type(&self) -> DeType {
156         self.de_type
157     }
158     /// The contents of the DE
contents(&self) -> &'adv [u8]159     pub fn contents(&self) -> &'adv [u8] {
160         self.contents
161     }
162 }
163 
164 /// An iterator that parses the given data elements iteratively. In environments where memory is
165 /// not severely constrained, it is usually safer to collect this into `Result<Vec<DataElement>>`
166 /// so the validity of the whole advertisement can be checked before proceeding with further
167 /// processing.
168 #[derive(Debug)]
169 pub struct DataElementParsingIterator<'adv> {
170     input: &'adv [u8],
171     // The index of the data element this is currently at
172     offset: u8,
173 }
174 
175 impl<'adv> DataElementParsingIterator<'adv> {
new(input: &'adv [u8]) -> Self176     pub(crate) fn new(input: &'adv [u8]) -> Self {
177         Self { input, offset: 0 }
178     }
179 }
180 
181 impl<'adv> Iterator for DataElementParsingIterator<'adv> {
182     type Item = Result<DataElement<'adv>, DataElementParseError>;
183 
next(&mut self) -> Option<Self::Item>184     fn next(&mut self) -> Option<Self::Item> {
185         match ProtoDataElement::parse(self.input) {
186             Ok((rem, pde)) => {
187                 self.input = rem;
188                 let current_offset = self.offset;
189                 self.offset = if let Some(offset) = self.offset.checked_add(1) {
190                     offset
191                 } else {
192                     return Some(Err(DataElementParseError::TooManyDataElements));
193                 };
194                 Some(Ok(pde.into_data_element(v1_salt::DataElementOffset::from(current_offset))))
195             }
196             Err(nom::Err::Failure(e)) => Some(Err(DataElementParseError::NomError(e.code))),
197             Err(nom::Err::Incomplete(_)) => {
198                 panic!("Should always complete since we are parsing using the `nom::complete` APIs")
199             }
200             Err(nom::Err::Error(_)) => {
201                 // nom `Error` is recoverable, it usually means we should move on the parsing the
202                 // next section. There is nothing after data elements within a section, so we just
203                 // check that there is no remaining data.
204                 if !self.input.is_empty() {
205                     return Some(Err(DataElementParseError::UnexpectedDataAfterEnd));
206                 }
207                 None
208             }
209         }
210     }
211 }
212 
213 /// The error that may arise while parsing data elements.
214 #[derive(Debug, PartialEq, Eq)]
215 pub enum DataElementParseError {
216     /// Unexpected data found after the end of the data elements portion. This means either the
217     /// parser was fed with additional data (it should only be given the bytes within a section,
218     /// not the whole advertisement), or the length field in the header of the data element is
219     /// malformed.
220     UnexpectedDataAfterEnd,
221     /// There are too many data elements in the advertisement. The maximum number supported by the
222     /// current parsing logic is 255.
223     TooManyDataElements,
224     /// A parse error is returned during nom.
225     NomError(error::ErrorKind),
226 }
227 
228 /// Deserialize-specific version of a DE header that incorporates the header length.
229 /// This is needed for encrypted identities that need to construct a slice of everything in the
230 /// section following the identity DE header.
231 #[derive(Debug, PartialEq, Eq, Clone)]
232 pub(crate) struct DeHeader {
233     /// The original bytes of the header, at most 6 bytes long (1 byte len, 5 bytes type)
234     pub(crate) header_bytes: ArrayView<u8, 6>,
235     pub(crate) de_type: DeType,
236     pub(crate) contents_len: DeLength,
237 }
238 
239 /// An intermediate stage in parsing a [DataElement] that lacks `offset`.
240 #[derive(Debug, PartialEq, Eq)]
241 pub struct ProtoDataElement<'d> {
242     header: DeHeader,
243     /// `len()` must equal `header.contents_len`
244     contents: &'d [u8],
245 }
246 
247 impl<'d> ProtoDataElement<'d> {
parse(input: &[u8]) -> nom::IResult<&[u8], ProtoDataElement>248     pub(crate) fn parse(input: &[u8]) -> nom::IResult<&[u8], ProtoDataElement> {
249         let (remaining, header) = DeHeader::parse(input)?;
250         let len = header.contents_len;
251         combinator::map(bytes::complete::take(len.as_u8()), move |slice| {
252             let header_clone = header.clone();
253             ProtoDataElement { header: header_clone, contents: slice }
254         })(remaining)
255     }
256 
into_data_element(self, offset: v1_salt::DataElementOffset) -> DataElement<'d>257     fn into_data_element(self, offset: v1_salt::DataElementOffset) -> DataElement<'d> {
258         DataElement::new(offset, self.header.de_type, self.contents)
259     }
260 }
261