1 //! The block decompression algorithm.
2 
3 use core::convert::TryInto;
4 
5 use crate::block::DecompressError;
6 use crate::block::MINMATCH;
7 use crate::sink::Sink;
8 use crate::sink::SliceSink;
9 use alloc::vec;
10 use alloc::vec::Vec;
11 
12 /// Read an integer.
13 ///
14 /// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In
15 /// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add
16 /// this byte to our sum and terminate the loop.
17 ///
18 /// # Example
19 ///
20 /// ```notest
21 ///     255, 255, 255, 4, 2, 3, 4, 6, 7
22 /// ```
23 ///
24 /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
25 /// 4 is the first non-0xFF byte.
26 #[inline]
read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError>27 fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError> {
28     // We start at zero and count upwards.
29     let mut n: u32 = 0;
30     // If this byte takes value 255 (the maximum value it can take), another byte is read
31     // and added to the sum. This repeats until a byte lower than 255 is read.
32     loop {
33         // We add the next byte until we get a byte which we add to the counting variable.
34         let extra: u8 = *input
35             .get(*input_pos)
36             .ok_or(DecompressError::ExpectedAnotherByte)?;
37         *input_pos += 1;
38         n += extra as u32;
39 
40         // We continue if we got 255, break otherwise.
41         if extra != 0xFF {
42             break;
43         }
44     }
45 
46     // 255, 255, 255, 8
47     // 111, 111, 111, 101
48 
49     Ok(n)
50 }
51 
52 /// Read a little-endian 16-bit integer from the input stream.
53 #[inline]
read_u16(input: &[u8], input_pos: &mut usize) -> Result<u16, DecompressError>54 fn read_u16(input: &[u8], input_pos: &mut usize) -> Result<u16, DecompressError> {
55     let dst = input
56         .get(*input_pos..*input_pos + 2)
57         .ok_or(DecompressError::ExpectedAnotherByte)?;
58     *input_pos += 2;
59     Ok(u16::from_le_bytes(dst.try_into().unwrap()))
60 }
61 
62 const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111;
63 const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000;
64 
65 #[test]
check_token()66 fn check_token() {
67     assert!(!does_token_fit(15));
68     assert!(does_token_fit(14));
69     assert!(does_token_fit(114));
70     assert!(!does_token_fit(0b11110000));
71     assert!(does_token_fit(0b10110000));
72 }
73 
74 /// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4
75 /// bits) if the literal length and match_length are both below 15, we don't need to read additional
76 /// data, so the token does fit the metadata.
77 #[inline]
does_token_fit(token: u8) -> bool78 fn does_token_fit(token: u8) -> bool {
79     !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL
80         || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH)
81 }
82 
83 /// Decompress all bytes of `input` into `output`.
84 ///
85 /// Returns the number of bytes written (decompressed) into `output`.
86 #[inline(always)] // (always) necessary to get the best performance in non LTO builds
decompress_internal<const USE_DICT: bool, S: Sink>( input: &[u8], output: &mut S, ext_dict: &[u8], ) -> Result<usize, DecompressError>87 pub(crate) fn decompress_internal<const USE_DICT: bool, S: Sink>(
88     input: &[u8],
89     output: &mut S,
90     ext_dict: &[u8],
91 ) -> Result<usize, DecompressError> {
92     let mut input_pos = 0;
93     let initial_output_pos = output.pos();
94 
95     let safe_input_pos = input
96         .len()
97         .saturating_sub(16 /* literal copy */ +  2 /* u16 match offset */);
98     let mut safe_output_pos = output
99         .capacity()
100         .saturating_sub(16 /* literal copy */ + 18 /* match copy */);
101 
102     if USE_DICT {
103         // In the dictionary case the output pointer is moved by the match length in the dictionary.
104         // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have
105         // at least additional 17 bytes of space left in the output buffer in the fast loop.
106         safe_output_pos = safe_output_pos.saturating_sub(17);
107     };
108 
109     // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is
110     // empty.
111     loop {
112         // Read the token. The token is the first byte in a block. It is divided into two 4-bit
113         // subtokens, the higher and the lower.
114         // This token contains to 4-bit "fields", a higher and a lower, representing the literals'
115         // length and the back reference's length, respectively.
116         let token = *input
117             .get(input_pos)
118             .ok_or(DecompressError::ExpectedAnotherByte)?;
119         input_pos += 1;
120 
121         // Checking for hot-loop.
122         // In most cases the metadata does fit in a single 1byte token (statistically) and we are in
123         // a safe-distance to the end. This enables some optimized handling.
124         //
125         // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But
126         // that doesn't work when the safe_output_pos is 0 due to saturated_sub. So we use
127         // `<` instead of `<=`, which covers that case.
128         if does_token_fit(token) && input_pos <= safe_input_pos && output.pos() < safe_output_pos {
129             let literal_length = (token >> 4) as usize;
130 
131             // casting to [u8;u16] doesn't seem to make a difference vs &[u8] (same assembly)
132             let input: &[u8; 16] = input[input_pos..input_pos + 16].try_into().unwrap();
133 
134             // Copy the literal
135             // The literal is at max 14 bytes, and the is_safe_distance check assures
136             // that we are far away enough from the end so we can safely copy 16 bytes
137             output.extend_from_slice_wild(input, literal_length);
138             input_pos += literal_length;
139 
140             // clone as we don't want to mutate
141             let offset = read_u16(input, &mut literal_length.clone())? as usize;
142             input_pos += 2;
143 
144             let mut match_length = MINMATCH + (token & 0xF) as usize;
145 
146             if USE_DICT && offset > output.pos() {
147                 let copied = copy_from_dict(output, ext_dict, offset, match_length)?;
148                 if copied == match_length {
149                     continue;
150                 }
151                 // match crosses ext_dict and output, offset is still correct as output pos
152                 // increased
153                 match_length -= copied;
154             }
155 
156             // In this branch we know that match_length is at most 18 (14 + MINMATCH).
157             // But the blocks can overlap, so make sure they are at least 18 bytes apart
158             // to enable an optimized copy of 18 bytes.
159             let start = output.pos().saturating_sub(offset);
160             if offset >= match_length {
161                 output.extend_from_within(start, 18, match_length);
162             } else {
163                 output.extend_from_within_overlapping(start, match_length)
164             }
165 
166             continue;
167         }
168 
169         // Now, we read the literals section.
170         // Literal Section
171         // If the initial value is 15, it is indicated that another byte will be read and added to
172         // it
173         let mut literal_length = (token >> 4) as usize;
174         if literal_length != 0 {
175             if literal_length == 15 {
176                 // The literal_length length took the maximal value, indicating that there is more
177                 // than 15 literal_length bytes. We read the extra integer.
178                 literal_length += read_integer(input, &mut input_pos)? as usize;
179             }
180 
181             if literal_length > input.len() - input_pos {
182                 return Err(DecompressError::LiteralOutOfBounds);
183             }
184             #[cfg(not(feature = "unchecked-decode"))]
185             if literal_length > output.capacity() - output.pos() {
186                 return Err(DecompressError::OutputTooSmall {
187                     expected: output.pos() + literal_length,
188                     actual: output.capacity(),
189                 });
190             }
191             output.extend_from_slice(&input[input_pos..input_pos + literal_length]);
192             input_pos += literal_length;
193         }
194 
195         // If the input stream is emptied, we break out of the loop. This is only the case
196         // in the end of the stream, since the block is intact otherwise.
197         if input_pos >= input.len() {
198             break;
199         }
200 
201         let offset = read_u16(input, &mut input_pos)? as usize;
202         // Obtain the initial match length. The match length is the length of the duplicate segment
203         // which will later be copied from data previously decompressed into the output buffer. The
204         // initial length is derived from the second part of the token (the lower nibble), we read
205         // earlier. Since having a match length of less than 4 would mean negative compression
206         // ratio, we start at 4 (MINMATCH).
207 
208         // The initial match length can maximally be 19. As with the literal length, this indicates
209         // that there are more bytes to read.
210         let mut match_length = MINMATCH + (token & 0xF) as usize;
211         if match_length == MINMATCH + 15 {
212             // The match length took the maximal value, indicating that there is more bytes. We
213             // read the extra integer.
214             match_length += read_integer(input, &mut input_pos)? as usize;
215         }
216 
217         #[cfg(not(feature = "unchecked-decode"))]
218         if output.pos() + match_length > output.capacity() {
219             return Err(DecompressError::OutputTooSmall {
220                 expected: output.pos() + match_length,
221                 actual: output.capacity(),
222             });
223         }
224         if USE_DICT && offset > output.pos() {
225             let copied = copy_from_dict(output, ext_dict, offset, match_length)?;
226             if copied == match_length {
227                 continue;
228             }
229             // match crosses ext_dict and output, offset is still correct as output_len was
230             // increased
231             match_length -= copied;
232         }
233         // We now copy from the already decompressed buffer. This allows us for storing duplicates
234         // by simply referencing the other location.
235         duplicate_slice(output, offset, match_length)?;
236     }
237     Ok(output.pos() - initial_output_pos)
238 }
239 
240 #[inline]
copy_from_dict( output: &mut impl Sink, ext_dict: &[u8], offset: usize, match_length: usize, ) -> Result<usize, DecompressError>241 fn copy_from_dict(
242     output: &mut impl Sink,
243     ext_dict: &[u8],
244     offset: usize,
245     match_length: usize,
246 ) -> Result<usize, DecompressError> {
247     // If we're here we know offset > output.pos
248     debug_assert!(offset > output.pos());
249     let (dict_offset, did_overflow) = ext_dict.len().overflowing_sub(offset - output.pos());
250     if did_overflow {
251         return Err(DecompressError::OffsetOutOfBounds);
252     }
253     // Can't copy past ext_dict len, the match may cross dict and output
254     let dict_match_length = match_length.min(ext_dict.len() - dict_offset);
255     let ext_match = &ext_dict[dict_offset..dict_offset + dict_match_length];
256     output.extend_from_slice(ext_match);
257     Ok(dict_match_length)
258 }
259 
260 /// Extends output by self-referential copies
261 #[inline(always)] // (always) necessary otherwise compiler fails to inline it
duplicate_slice( output: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError>262 fn duplicate_slice(
263     output: &mut impl Sink,
264     offset: usize,
265     match_length: usize,
266 ) -> Result<(), DecompressError> {
267     // This function assumes output will fit match_length, it might panic otherwise.
268     if match_length > offset {
269         duplicate_overlapping_slice(output, offset, match_length)?;
270     } else {
271         let (start, did_overflow) = output.pos().overflowing_sub(offset);
272         if did_overflow {
273             return Err(DecompressError::OffsetOutOfBounds);
274         }
275 
276         match match_length {
277             0..=32 if output.pos() + 32 <= output.capacity() => {
278                 output.extend_from_within(start, 32, match_length)
279             }
280             33..=64 if output.pos() + 64 <= output.capacity() => {
281                 output.extend_from_within(start, 64, match_length)
282             }
283             _ => output.extend_from_within(start, match_length, match_length),
284         }
285     }
286     Ok(())
287 }
288 
289 /// self-referential copy for the case data start (end of output - offset) + match_length overlaps
290 /// into output
291 #[inline]
duplicate_overlapping_slice( sink: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError>292 fn duplicate_overlapping_slice(
293     sink: &mut impl Sink,
294     offset: usize,
295     match_length: usize,
296 ) -> Result<(), DecompressError> {
297     // This function assumes output will fit match_length, it might panic otherwise.
298     let (start, did_overflow) = sink.pos().overflowing_sub(offset);
299     if did_overflow {
300         return Err(DecompressError::OffsetOutOfBounds);
301     }
302     if offset == 1 {
303         let val = sink.byte_at(start);
304         sink.extend_with_fill(val, match_length);
305     } else {
306         sink.extend_from_within_overlapping(start, match_length);
307     }
308     Ok(())
309 }
310 
311 /// Decompress all bytes of `input` into `output`.
312 /// `output` should be preallocated with a size of of the uncompressed data.
313 #[inline]
decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError>314 pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError> {
315     decompress_internal::<false, _>(input, &mut SliceSink::new(output, 0), b"")
316 }
317 
318 /// Decompress all bytes of `input` into `output`.
319 ///
320 /// Returns the number of bytes written (decompressed) into `output`.
321 #[inline]
decompress_into_with_dict( input: &[u8], output: &mut [u8], ext_dict: &[u8], ) -> Result<usize, DecompressError>322 pub fn decompress_into_with_dict(
323     input: &[u8],
324     output: &mut [u8],
325     ext_dict: &[u8],
326 ) -> Result<usize, DecompressError> {
327     decompress_internal::<true, _>(input, &mut SliceSink::new(output, 0), ext_dict)
328 }
329 
330 /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
331 /// litte endian. Can be used in conjunction with `compress_prepend_size`
332 #[inline]
decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError>333 pub fn decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
334     let (uncompressed_size, input) = super::uncompressed_size(input)?;
335     decompress(input, uncompressed_size)
336 }
337 
338 /// Decompress all bytes of `input` into a new vec.
339 /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
340 ///
341 /// # Panics
342 /// May panic if the parameter `min_uncompressed_size` is smaller than the
343 /// uncompressed data.
344 #[inline]
decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError>345 pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError> {
346     let mut decompressed: Vec<u8> = vec![0; min_uncompressed_size];
347     let decomp_len =
348         decompress_internal::<false, _>(input, &mut SliceSink::new(&mut decompressed, 0), b"")?;
349     decompressed.truncate(decomp_len);
350     Ok(decompressed)
351 }
352 
353 /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
354 /// little endian. Can be used in conjunction with `compress_prepend_size_with_dict`
355 #[inline]
decompress_size_prepended_with_dict( input: &[u8], ext_dict: &[u8], ) -> Result<Vec<u8>, DecompressError>356 pub fn decompress_size_prepended_with_dict(
357     input: &[u8],
358     ext_dict: &[u8],
359 ) -> Result<Vec<u8>, DecompressError> {
360     let (uncompressed_size, input) = super::uncompressed_size(input)?;
361     decompress_with_dict(input, uncompressed_size, ext_dict)
362 }
363 
364 /// Decompress all bytes of `input` into a new vec.
365 /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
366 ///
367 /// # Panics
368 /// May panic if the parameter `min_uncompressed_size` is smaller than the
369 /// uncompressed data.
370 #[inline]
decompress_with_dict( input: &[u8], min_uncompressed_size: usize, ext_dict: &[u8], ) -> Result<Vec<u8>, DecompressError>371 pub fn decompress_with_dict(
372     input: &[u8],
373     min_uncompressed_size: usize,
374     ext_dict: &[u8],
375 ) -> Result<Vec<u8>, DecompressError> {
376     let mut decompressed: Vec<u8> = vec![0; min_uncompressed_size];
377     let decomp_len =
378         decompress_internal::<true, _>(input, &mut SliceSink::new(&mut decompressed, 0), ext_dict)?;
379     decompressed.truncate(decomp_len);
380     Ok(decompressed)
381 }
382 
383 #[cfg(test)]
384 mod test {
385     use super::*;
386 
387     #[test]
all_literal()388     fn all_literal() {
389         assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49");
390     }
391 
392     // this error test is only valid in safe-decode.
393     #[cfg(feature = "safe-decode")]
394     #[test]
offset_oob()395     fn offset_oob() {
396         decompress(&[0x10, b'a', 2, 0], 4).unwrap_err();
397         decompress(&[0x40, b'a', 1, 0], 4).unwrap_err();
398     }
399 }
400