use twox_hash::XxHash32; use super::Error; use std::{ convert::TryInto, fmt::Debug, hash::Hasher, io, io::{Read, Write}, }; const FLG_RESERVED_MASK: u8 = 0b00000010; const FLG_VERSION_MASK: u8 = 0b11000000; const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000; const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000; const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000; const FLG_CONTENT_SIZE: u8 = 0b00001000; const FLG_CONTENT_CHECKSUM: u8 = 0b00000100; const FLG_DICTIONARY_ID: u8 = 0b00000001; const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK; const BD_BLOCK_SIZE_MASK: u8 = 0b01110000; const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4; const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000; const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204; pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102; const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive = 0x184D2A50..=0x184D2A5F; pub(crate) const MAGIC_NUMBER_SIZE: usize = 4; pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7; pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19; pub(crate) const BLOCK_INFO_SIZE: usize = 4; #[derive(Clone, Copy, PartialEq, Debug)] /// Different predefines blocksizes to choose when compressing data. #[derive(Default)] pub enum BlockSize { /// Will detect optimal frame size based on the size of the first write call #[default] Auto = 0, /// The default block size. Max64KB = 4, /// 256KB block size. Max256KB = 5, /// 1MB block size. Max1MB = 6, /// 4MB block size. Max4MB = 7, /// 8MB block size. Max8MB = 8, } impl BlockSize { /// Try to find optimal size based on passed buffer length. pub(crate) fn from_buf_length(buf_len: usize) -> Self { let mut blocksize = BlockSize::Max4MB; for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] { if buf_len > candidate.get_size() { return blocksize; } blocksize = candidate; } BlockSize::Max64KB } pub(crate) fn get_size(&self) -> usize { match self { BlockSize::Auto => unreachable!(), BlockSize::Max64KB => 64 * 1024, BlockSize::Max256KB => 256 * 1024, BlockSize::Max1MB => 1024 * 1024, BlockSize::Max4MB => 4 * 1024 * 1024, BlockSize::Max8MB => 8 * 1024 * 1024, } } } #[derive(Clone, Copy, PartialEq, Debug)] /// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo] #[derive(Default)] pub enum BlockMode { /// Every block is compressed independently. The default. #[default] Independent, /// Blocks can reference data from previous blocks. /// /// Effective when the stream contains small blocks. Linked, } // From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md // // General Structure of LZ4 Frame format // ------------------------------------- // // | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum | // |:-------:|:-------------:| ----- | ----- | ------- | ----------- | // | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes | // // Frame Descriptor // ---------------- // // | FLG | BD | (Content Size) | (Dictionary ID) | HC | // | ------- | ------- |:--------------:|:---------------:| ------- | // | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte | // // __FLG byte__ // // | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 | // | ------- |-------|-------|----------|------|----------|----------|------| // |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID| // // __BD byte__ // // | BitNb | 7 | 6-5-4 | 3-2-1-0 | // | ------- | -------- | ------------- | -------- | // |FieldName|*Reserved*| Block MaxSize |*Reserved*| // // Data Blocks // ----------- // // | Block Size | data | (Block Checksum) | // |:----------:| ------ |:----------------:| // | 4 bytes | | 0 - 4 bytes | // #[derive(Debug, Default, Clone)] /// The metadata for de/compressing with lz4 frame format. pub struct FrameInfo { /// If set, includes the total uncompressed size of data in the frame. pub content_size: Option, /// The identifier for the dictionary that must be used to correctly decode data. /// The compressor and the decompressor must use exactly the same dictionary. /// /// Note that this is currently unsupported and for this reason it's not pub. pub(crate) dict_id: Option, /// The maximum uncompressed size of each data block. pub block_size: BlockSize, /// The block mode. pub block_mode: BlockMode, /// If set, includes a checksum for each data block in the frame. pub block_checksums: bool, /// If set, includes a content checksum to verify that the full frame contents have been /// decoded correctly. pub content_checksum: bool, /// If set, use the legacy frame format pub legacy_frame: bool, } impl FrameInfo { /// Create a new `FrameInfo`. pub fn new() -> Self { Self::default() } /// Whether to include the total uncompressed size of data in the frame. pub fn content_size(mut self, content_size: Option) -> Self { self.content_size = content_size; self } /// The maximum uncompressed size of each data block. pub fn block_size(mut self, block_size: BlockSize) -> Self { self.block_size = block_size; self } /// The block mode. pub fn block_mode(mut self, block_mode: BlockMode) -> Self { self.block_mode = block_mode; self } /// If set, includes a checksum for each data block in the frame. pub fn block_checksums(mut self, block_checksums: bool) -> Self { self.block_checksums = block_checksums; self } /// If set, includes a content checksum to verify that the full frame contents have been /// decoded correctly. pub fn content_checksum(mut self, content_checksum: bool) -> Self { self.content_checksum = content_checksum; self } /// If set, use the legacy frame format. pub fn legacy_frame(mut self, legacy_frame: bool) -> Self { self.legacy_frame = legacy_frame; self } pub(crate) fn read_size(input: &[u8]) -> Result { let mut required = MIN_FRAME_INFO_SIZE; let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap()); if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { return Ok(MAGIC_NUMBER_SIZE); } if input.len() < required { return Ok(required); } if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { return Ok(8); } if magic_num != LZ4F_MAGIC_NUMBER { return Err(Error::WrongMagicNumber); } if input[4] & FLG_CONTENT_SIZE != 0 { required += 8; } if input[4] & FLG_DICTIONARY_ID != 0 { required += 4 } Ok(required) } pub(crate) fn write_size(&self) -> usize { let mut required = MIN_FRAME_INFO_SIZE; if self.content_size.is_some() { required += 8; } if self.dict_id.is_some() { required += 4; } required } pub(crate) fn write(&self, output: &mut [u8]) -> Result { let write_size = self.write_size(); if output.len() < write_size { return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into())); } let mut buffer = [0u8; MAX_FRAME_INFO_SIZE]; assert!(write_size <= buffer.len()); buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes()); buffer[4] = FLG_SUPPORTED_VERSION_BITS; if self.block_checksums { buffer[4] |= FLG_BLOCK_CHECKSUMS; } if self.content_checksum { buffer[4] |= FLG_CONTENT_CHECKSUM; } if self.block_mode == BlockMode::Independent { buffer[4] |= FLG_INDEPENDENT_BLOCKS; } buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT; // Optional section let mut offset = 6; if let Some(size) = self.content_size { buffer[4] |= FLG_CONTENT_SIZE; buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes()); offset += 8; } if let Some(dict_id) = self.dict_id { buffer[4] |= FLG_DICTIONARY_ID; buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes()); offset += 4; } // Header checksum let mut hasher = XxHash32::with_seed(0); hasher.write(&buffer[4..offset]); let header_checksum = (hasher.finish() >> 8) as u8; buffer[offset] = header_checksum; offset += 1; debug_assert_eq!(offset, write_size); output[..write_size].copy_from_slice(&buffer[..write_size]); Ok(write_size) } pub(crate) fn read(mut input: &[u8]) -> Result { let original_input = input; // 4 byte Magic let magic_num = { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; u32::from_le_bytes(buffer) }; if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { return Ok(FrameInfo { block_size: BlockSize::Max8MB, legacy_frame: true, ..FrameInfo::default() }); } if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; let user_data_len = u32::from_le_bytes(buffer); return Err(Error::SkippableFrame(user_data_len)); } if magic_num != LZ4F_MAGIC_NUMBER { return Err(Error::WrongMagicNumber); } // fixed size section let [flg_byte, bd_byte] = { let mut buffer = [0u8, 0]; input.read_exact(&mut buffer)?; buffer }; if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS { // version is always 01 return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK)); } if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 { return Err(Error::ReservedBitsSet); } let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 { BlockMode::Independent } else { BlockMode::Linked }; let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0; let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0; let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT { i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)), 4 => BlockSize::Max64KB, 5 => BlockSize::Max256KB, 6 => BlockSize::Max1MB, 7 => BlockSize::Max4MB, _ => unreachable!(), }; // var len section let mut content_size = None; if flg_byte & FLG_CONTENT_SIZE != 0 { let mut buffer = [0u8; 8]; input.read_exact(&mut buffer).unwrap(); content_size = Some(u64::from_le_bytes(buffer)); } let mut dict_id = None; if flg_byte & FLG_DICTIONARY_ID != 0 { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; dict_id = Some(u32::from_le_bytes(buffer)); } // 1 byte header checksum let expected_checksum = { let mut buffer = [0u8; 1]; input.read_exact(&mut buffer)?; buffer[0] }; let mut hasher = XxHash32::with_seed(0); hasher.write(&original_input[4..original_input.len() - input.len() - 1]); let header_hash = (hasher.finish() >> 8) as u8; if header_hash != expected_checksum { return Err(Error::HeaderChecksumError); } Ok(FrameInfo { content_size, dict_id, block_size, block_mode, block_checksums, content_checksum, legacy_frame: false, }) } } #[derive(Debug)] pub(crate) enum BlockInfo { Compressed(u32), Uncompressed(u32), EndMark, } impl BlockInfo { pub(crate) fn read(mut input: &[u8]) -> Result { let mut size_buffer = [0u8; 4]; input.read_exact(&mut size_buffer)?; let size = u32::from_le_bytes(size_buffer); if size == 0 { Ok(BlockInfo::EndMark) } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 { Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT)) } else { Ok(BlockInfo::Compressed(size)) } } pub(crate) fn write(&self, mut output: &mut [u8]) -> Result { let value = match self { BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo), BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len) if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 => { return Err(Error::InvalidBlockInfo) } BlockInfo::Compressed(len) => *len, BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT, BlockInfo::EndMark => 0, }; output.write_all(&value.to_le_bytes())?; Ok(4) } }