1 use twox_hash::XxHash32;
2 
3 use super::Error;
4 use std::{
5     convert::TryInto,
6     fmt::Debug,
7     hash::Hasher,
8     io,
9     io::{Read, Write},
10 };
11 
12 const FLG_RESERVED_MASK: u8 = 0b00000010;
13 const FLG_VERSION_MASK: u8 = 0b11000000;
14 const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000;
15 
16 const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000;
17 const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000;
18 const FLG_CONTENT_SIZE: u8 = 0b00001000;
19 const FLG_CONTENT_CHECKSUM: u8 = 0b00000100;
20 const FLG_DICTIONARY_ID: u8 = 0b00000001;
21 
22 const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK;
23 const BD_BLOCK_SIZE_MASK: u8 = 0b01110000;
24 const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4;
25 
26 const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000;
27 
28 const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204;
29 pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102;
30 const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F;
31 
32 pub(crate) const MAGIC_NUMBER_SIZE: usize = 4;
33 pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7;
34 pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19;
35 pub(crate) const BLOCK_INFO_SIZE: usize = 4;
36 
37 #[derive(Clone, Copy, PartialEq, Debug)]
38 /// Different predefines blocksizes to choose when compressing data.
39 #[derive(Default)]
40 pub enum BlockSize {
41     /// Will detect optimal frame size based on the size of the first write call
42     #[default]
43     Auto = 0,
44     /// The default block size.
45     Max64KB = 4,
46     /// 256KB block size.
47     Max256KB = 5,
48     /// 1MB block size.
49     Max1MB = 6,
50     /// 4MB block size.
51     Max4MB = 7,
52     /// 8MB block size.
53     Max8MB = 8,
54 }
55 
56 impl BlockSize {
57     /// Try to find optimal size based on passed buffer length.
from_buf_length(buf_len: usize) -> Self58     pub(crate) fn from_buf_length(buf_len: usize) -> Self {
59         let mut blocksize = BlockSize::Max4MB;
60 
61         for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] {
62             if buf_len > candidate.get_size() {
63                 return blocksize;
64             }
65             blocksize = candidate;
66         }
67         BlockSize::Max64KB
68     }
get_size(&self) -> usize69     pub(crate) fn get_size(&self) -> usize {
70         match self {
71             BlockSize::Auto => unreachable!(),
72             BlockSize::Max64KB => 64 * 1024,
73             BlockSize::Max256KB => 256 * 1024,
74             BlockSize::Max1MB => 1024 * 1024,
75             BlockSize::Max4MB => 4 * 1024 * 1024,
76             BlockSize::Max8MB => 8 * 1024 * 1024,
77         }
78     }
79 }
80 
81 #[derive(Clone, Copy, PartialEq, Debug)]
82 /// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo]
83 #[derive(Default)]
84 pub enum BlockMode {
85     /// Every block is compressed independently. The default.
86     #[default]
87     Independent,
88     /// Blocks can reference data from previous blocks.
89     ///
90     /// Effective when the stream contains small blocks.
91     Linked,
92 }
93 
94 // From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
95 //
96 // General Structure of LZ4 Frame format
97 // -------------------------------------
98 //
99 // | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
100 // |:-------:|:-------------:| ----- | ----- | ------- | ----------- |
101 // | 4 bytes |  3-15 bytes   |       |       | 4 bytes | 0-4 bytes   |
102 //
103 // Frame Descriptor
104 // ----------------
105 //
106 // | FLG     | BD      | (Content Size) | (Dictionary ID) | HC      |
107 // | ------- | ------- |:--------------:|:---------------:| ------- |
108 // | 1 byte  | 1 byte  |  0 - 8 bytes   |   0 - 4 bytes   | 1 byte  |
109 //
110 // __FLG byte__
111 //
112 // |  BitNb  |  7-6  |   5   |    4     |  3   |    2     |    1     |   0  |
113 // | ------- |-------|-------|----------|------|----------|----------|------|
114 // |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID|
115 //
116 // __BD byte__
117 //
118 // |  BitNb  |     7    |     6-5-4     |  3-2-1-0 |
119 // | ------- | -------- | ------------- | -------- |
120 // |FieldName|*Reserved*| Block MaxSize |*Reserved*|
121 //
122 // Data Blocks
123 // -----------
124 //
125 // | Block Size |  data  | (Block Checksum) |
126 // |:----------:| ------ |:----------------:|
127 // |  4 bytes   |        |   0 - 4 bytes    |
128 //
129 #[derive(Debug, Default, Clone)]
130 /// The metadata for de/compressing with lz4 frame format.
131 pub struct FrameInfo {
132     /// If set, includes the total uncompressed size of data in the frame.
133     pub content_size: Option<u64>,
134     /// The identifier for the dictionary that must be used to correctly decode data.
135     /// The compressor and the decompressor must use exactly the same dictionary.
136     ///
137     /// Note that this is currently unsupported and for this reason it's not pub.
138     pub(crate) dict_id: Option<u32>,
139     /// The maximum uncompressed size of each data block.
140     pub block_size: BlockSize,
141     /// The block mode.
142     pub block_mode: BlockMode,
143     /// If set, includes a checksum for each data block in the frame.
144     pub block_checksums: bool,
145     /// If set, includes a content checksum to verify that the full frame contents have been
146     /// decoded correctly.
147     pub content_checksum: bool,
148     /// If set, use the legacy frame format
149     pub legacy_frame: bool,
150 }
151 
152 impl FrameInfo {
153     /// Create a new `FrameInfo`.
new() -> Self154     pub fn new() -> Self {
155         Self::default()
156     }
157 
158     /// Whether to include the total uncompressed size of data in the frame.
content_size(mut self, content_size: Option<u64>) -> Self159     pub fn content_size(mut self, content_size: Option<u64>) -> Self {
160         self.content_size = content_size;
161         self
162     }
163 
164     /// The maximum uncompressed size of each data block.
block_size(mut self, block_size: BlockSize) -> Self165     pub fn block_size(mut self, block_size: BlockSize) -> Self {
166         self.block_size = block_size;
167         self
168     }
169 
170     /// The block mode.
block_mode(mut self, block_mode: BlockMode) -> Self171     pub fn block_mode(mut self, block_mode: BlockMode) -> Self {
172         self.block_mode = block_mode;
173         self
174     }
175 
176     /// If set, includes a checksum for each data block in the frame.
block_checksums(mut self, block_checksums: bool) -> Self177     pub fn block_checksums(mut self, block_checksums: bool) -> Self {
178         self.block_checksums = block_checksums;
179         self
180     }
181 
182     /// If set, includes a content checksum to verify that the full frame contents have been
183     /// decoded correctly.
content_checksum(mut self, content_checksum: bool) -> Self184     pub fn content_checksum(mut self, content_checksum: bool) -> Self {
185         self.content_checksum = content_checksum;
186         self
187     }
188 
189     /// If set, use the legacy frame format.
legacy_frame(mut self, legacy_frame: bool) -> Self190     pub fn legacy_frame(mut self, legacy_frame: bool) -> Self {
191         self.legacy_frame = legacy_frame;
192         self
193     }
194 
read_size(input: &[u8]) -> Result<usize, Error>195     pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> {
196         let mut required = MIN_FRAME_INFO_SIZE;
197         let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap());
198         if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
199             return Ok(MAGIC_NUMBER_SIZE);
200         }
201 
202         if input.len() < required {
203             return Ok(required);
204         }
205 
206         if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
207             return Ok(8);
208         }
209         if magic_num != LZ4F_MAGIC_NUMBER {
210             return Err(Error::WrongMagicNumber);
211         }
212 
213         if input[4] & FLG_CONTENT_SIZE != 0 {
214             required += 8;
215         }
216         if input[4] & FLG_DICTIONARY_ID != 0 {
217             required += 4
218         }
219         Ok(required)
220     }
221 
write_size(&self) -> usize222     pub(crate) fn write_size(&self) -> usize {
223         let mut required = MIN_FRAME_INFO_SIZE;
224         if self.content_size.is_some() {
225             required += 8;
226         }
227         if self.dict_id.is_some() {
228             required += 4;
229         }
230         required
231     }
232 
write(&self, output: &mut [u8]) -> Result<usize, Error>233     pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> {
234         let write_size = self.write_size();
235         if output.len() < write_size {
236             return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into()));
237         }
238         let mut buffer = [0u8; MAX_FRAME_INFO_SIZE];
239         assert!(write_size <= buffer.len());
240         buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes());
241         buffer[4] = FLG_SUPPORTED_VERSION_BITS;
242         if self.block_checksums {
243             buffer[4] |= FLG_BLOCK_CHECKSUMS;
244         }
245         if self.content_checksum {
246             buffer[4] |= FLG_CONTENT_CHECKSUM;
247         }
248         if self.block_mode == BlockMode::Independent {
249             buffer[4] |= FLG_INDEPENDENT_BLOCKS;
250         }
251         buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT;
252 
253         // Optional section
254         let mut offset = 6;
255         if let Some(size) = self.content_size {
256             buffer[4] |= FLG_CONTENT_SIZE;
257             buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes());
258             offset += 8;
259         }
260         if let Some(dict_id) = self.dict_id {
261             buffer[4] |= FLG_DICTIONARY_ID;
262             buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes());
263             offset += 4;
264         }
265 
266         // Header checksum
267         let mut hasher = XxHash32::with_seed(0);
268         hasher.write(&buffer[4..offset]);
269         let header_checksum = (hasher.finish() >> 8) as u8;
270         buffer[offset] = header_checksum;
271         offset += 1;
272 
273         debug_assert_eq!(offset, write_size);
274         output[..write_size].copy_from_slice(&buffer[..write_size]);
275         Ok(write_size)
276     }
277 
read(mut input: &[u8]) -> Result<FrameInfo, Error>278     pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> {
279         let original_input = input;
280         // 4 byte Magic
281         let magic_num = {
282             let mut buffer = [0u8; 4];
283             input.read_exact(&mut buffer)?;
284             u32::from_le_bytes(buffer)
285         };
286         if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
287             return Ok(FrameInfo {
288                 block_size: BlockSize::Max8MB,
289                 legacy_frame: true,
290                 ..FrameInfo::default()
291             });
292         }
293         if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
294             let mut buffer = [0u8; 4];
295             input.read_exact(&mut buffer)?;
296             let user_data_len = u32::from_le_bytes(buffer);
297             return Err(Error::SkippableFrame(user_data_len));
298         }
299         if magic_num != LZ4F_MAGIC_NUMBER {
300             return Err(Error::WrongMagicNumber);
301         }
302 
303         // fixed size section
304         let [flg_byte, bd_byte] = {
305             let mut buffer = [0u8, 0];
306             input.read_exact(&mut buffer)?;
307             buffer
308         };
309 
310         if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS {
311             // version is always 01
312             return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK));
313         }
314 
315         if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 {
316             return Err(Error::ReservedBitsSet);
317         }
318 
319         let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 {
320             BlockMode::Independent
321         } else {
322             BlockMode::Linked
323         };
324         let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0;
325         let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0;
326 
327         let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT {
328             i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)),
329             4 => BlockSize::Max64KB,
330             5 => BlockSize::Max256KB,
331             6 => BlockSize::Max1MB,
332             7 => BlockSize::Max4MB,
333             _ => unreachable!(),
334         };
335 
336         // var len section
337         let mut content_size = None;
338         if flg_byte & FLG_CONTENT_SIZE != 0 {
339             let mut buffer = [0u8; 8];
340             input.read_exact(&mut buffer).unwrap();
341             content_size = Some(u64::from_le_bytes(buffer));
342         }
343 
344         let mut dict_id = None;
345         if flg_byte & FLG_DICTIONARY_ID != 0 {
346             let mut buffer = [0u8; 4];
347             input.read_exact(&mut buffer)?;
348             dict_id = Some(u32::from_le_bytes(buffer));
349         }
350 
351         // 1 byte header checksum
352         let expected_checksum = {
353             let mut buffer = [0u8; 1];
354             input.read_exact(&mut buffer)?;
355             buffer[0]
356         };
357         let mut hasher = XxHash32::with_seed(0);
358         hasher.write(&original_input[4..original_input.len() - input.len() - 1]);
359         let header_hash = (hasher.finish() >> 8) as u8;
360         if header_hash != expected_checksum {
361             return Err(Error::HeaderChecksumError);
362         }
363 
364         Ok(FrameInfo {
365             content_size,
366             dict_id,
367             block_size,
368             block_mode,
369             block_checksums,
370             content_checksum,
371             legacy_frame: false,
372         })
373     }
374 }
375 
376 #[derive(Debug)]
377 pub(crate) enum BlockInfo {
378     Compressed(u32),
379     Uncompressed(u32),
380     EndMark,
381 }
382 
383 impl BlockInfo {
read(mut input: &[u8]) -> Result<Self, Error>384     pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> {
385         let mut size_buffer = [0u8; 4];
386         input.read_exact(&mut size_buffer)?;
387         let size = u32::from_le_bytes(size_buffer);
388         if size == 0 {
389             Ok(BlockInfo::EndMark)
390         } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 {
391             Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT))
392         } else {
393             Ok(BlockInfo::Compressed(size))
394         }
395     }
396 
write(&self, mut output: &mut [u8]) -> Result<usize, Error>397     pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> {
398         let value = match self {
399             BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo),
400             BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len)
401                 if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 =>
402             {
403                 return Err(Error::InvalidBlockInfo)
404             }
405             BlockInfo::Compressed(len) => *len,
406             BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT,
407             BlockInfo::EndMark => 0,
408         };
409         output.write_all(&value.to_le_bytes())?;
410         Ok(4)
411     }
412 }
413