1 use twox_hash::XxHash32; 2 3 use super::Error; 4 use std::{ 5 convert::TryInto, 6 fmt::Debug, 7 hash::Hasher, 8 io, 9 io::{Read, Write}, 10 }; 11 12 const FLG_RESERVED_MASK: u8 = 0b00000010; 13 const FLG_VERSION_MASK: u8 = 0b11000000; 14 const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000; 15 16 const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000; 17 const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000; 18 const FLG_CONTENT_SIZE: u8 = 0b00001000; 19 const FLG_CONTENT_CHECKSUM: u8 = 0b00000100; 20 const FLG_DICTIONARY_ID: u8 = 0b00000001; 21 22 const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK; 23 const BD_BLOCK_SIZE_MASK: u8 = 0b01110000; 24 const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4; 25 26 const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000; 27 28 const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204; 29 pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102; 30 const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F; 31 32 pub(crate) const MAGIC_NUMBER_SIZE: usize = 4; 33 pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7; 34 pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19; 35 pub(crate) const BLOCK_INFO_SIZE: usize = 4; 36 37 #[derive(Clone, Copy, PartialEq, Debug)] 38 /// Different predefines blocksizes to choose when compressing data. 39 #[derive(Default)] 40 pub enum BlockSize { 41 /// Will detect optimal frame size based on the size of the first write call 42 #[default] 43 Auto = 0, 44 /// The default block size. 45 Max64KB = 4, 46 /// 256KB block size. 47 Max256KB = 5, 48 /// 1MB block size. 49 Max1MB = 6, 50 /// 4MB block size. 51 Max4MB = 7, 52 /// 8MB block size. 53 Max8MB = 8, 54 } 55 56 impl BlockSize { 57 /// Try to find optimal size based on passed buffer length. from_buf_length(buf_len: usize) -> Self58 pub(crate) fn from_buf_length(buf_len: usize) -> Self { 59 let mut blocksize = BlockSize::Max4MB; 60 61 for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] { 62 if buf_len > candidate.get_size() { 63 return blocksize; 64 } 65 blocksize = candidate; 66 } 67 BlockSize::Max64KB 68 } get_size(&self) -> usize69 pub(crate) fn get_size(&self) -> usize { 70 match self { 71 BlockSize::Auto => unreachable!(), 72 BlockSize::Max64KB => 64 * 1024, 73 BlockSize::Max256KB => 256 * 1024, 74 BlockSize::Max1MB => 1024 * 1024, 75 BlockSize::Max4MB => 4 * 1024 * 1024, 76 BlockSize::Max8MB => 8 * 1024 * 1024, 77 } 78 } 79 } 80 81 #[derive(Clone, Copy, PartialEq, Debug)] 82 /// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo] 83 #[derive(Default)] 84 pub enum BlockMode { 85 /// Every block is compressed independently. The default. 86 #[default] 87 Independent, 88 /// Blocks can reference data from previous blocks. 89 /// 90 /// Effective when the stream contains small blocks. 91 Linked, 92 } 93 94 // From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md 95 // 96 // General Structure of LZ4 Frame format 97 // ------------------------------------- 98 // 99 // | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum | 100 // |:-------:|:-------------:| ----- | ----- | ------- | ----------- | 101 // | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes | 102 // 103 // Frame Descriptor 104 // ---------------- 105 // 106 // | FLG | BD | (Content Size) | (Dictionary ID) | HC | 107 // | ------- | ------- |:--------------:|:---------------:| ------- | 108 // | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte | 109 // 110 // __FLG byte__ 111 // 112 // | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 | 113 // | ------- |-------|-------|----------|------|----------|----------|------| 114 // |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID| 115 // 116 // __BD byte__ 117 // 118 // | BitNb | 7 | 6-5-4 | 3-2-1-0 | 119 // | ------- | -------- | ------------- | -------- | 120 // |FieldName|*Reserved*| Block MaxSize |*Reserved*| 121 // 122 // Data Blocks 123 // ----------- 124 // 125 // | Block Size | data | (Block Checksum) | 126 // |:----------:| ------ |:----------------:| 127 // | 4 bytes | | 0 - 4 bytes | 128 // 129 #[derive(Debug, Default, Clone)] 130 /// The metadata for de/compressing with lz4 frame format. 131 pub struct FrameInfo { 132 /// If set, includes the total uncompressed size of data in the frame. 133 pub content_size: Option<u64>, 134 /// The identifier for the dictionary that must be used to correctly decode data. 135 /// The compressor and the decompressor must use exactly the same dictionary. 136 /// 137 /// Note that this is currently unsupported and for this reason it's not pub. 138 pub(crate) dict_id: Option<u32>, 139 /// The maximum uncompressed size of each data block. 140 pub block_size: BlockSize, 141 /// The block mode. 142 pub block_mode: BlockMode, 143 /// If set, includes a checksum for each data block in the frame. 144 pub block_checksums: bool, 145 /// If set, includes a content checksum to verify that the full frame contents have been 146 /// decoded correctly. 147 pub content_checksum: bool, 148 /// If set, use the legacy frame format 149 pub legacy_frame: bool, 150 } 151 152 impl FrameInfo { 153 /// Create a new `FrameInfo`. new() -> Self154 pub fn new() -> Self { 155 Self::default() 156 } 157 158 /// Whether to include the total uncompressed size of data in the frame. content_size(mut self, content_size: Option<u64>) -> Self159 pub fn content_size(mut self, content_size: Option<u64>) -> Self { 160 self.content_size = content_size; 161 self 162 } 163 164 /// The maximum uncompressed size of each data block. block_size(mut self, block_size: BlockSize) -> Self165 pub fn block_size(mut self, block_size: BlockSize) -> Self { 166 self.block_size = block_size; 167 self 168 } 169 170 /// The block mode. block_mode(mut self, block_mode: BlockMode) -> Self171 pub fn block_mode(mut self, block_mode: BlockMode) -> Self { 172 self.block_mode = block_mode; 173 self 174 } 175 176 /// If set, includes a checksum for each data block in the frame. block_checksums(mut self, block_checksums: bool) -> Self177 pub fn block_checksums(mut self, block_checksums: bool) -> Self { 178 self.block_checksums = block_checksums; 179 self 180 } 181 182 /// If set, includes a content checksum to verify that the full frame contents have been 183 /// decoded correctly. content_checksum(mut self, content_checksum: bool) -> Self184 pub fn content_checksum(mut self, content_checksum: bool) -> Self { 185 self.content_checksum = content_checksum; 186 self 187 } 188 189 /// If set, use the legacy frame format. legacy_frame(mut self, legacy_frame: bool) -> Self190 pub fn legacy_frame(mut self, legacy_frame: bool) -> Self { 191 self.legacy_frame = legacy_frame; 192 self 193 } 194 read_size(input: &[u8]) -> Result<usize, Error>195 pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> { 196 let mut required = MIN_FRAME_INFO_SIZE; 197 let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap()); 198 if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { 199 return Ok(MAGIC_NUMBER_SIZE); 200 } 201 202 if input.len() < required { 203 return Ok(required); 204 } 205 206 if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { 207 return Ok(8); 208 } 209 if magic_num != LZ4F_MAGIC_NUMBER { 210 return Err(Error::WrongMagicNumber); 211 } 212 213 if input[4] & FLG_CONTENT_SIZE != 0 { 214 required += 8; 215 } 216 if input[4] & FLG_DICTIONARY_ID != 0 { 217 required += 4 218 } 219 Ok(required) 220 } 221 write_size(&self) -> usize222 pub(crate) fn write_size(&self) -> usize { 223 let mut required = MIN_FRAME_INFO_SIZE; 224 if self.content_size.is_some() { 225 required += 8; 226 } 227 if self.dict_id.is_some() { 228 required += 4; 229 } 230 required 231 } 232 write(&self, output: &mut [u8]) -> Result<usize, Error>233 pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> { 234 let write_size = self.write_size(); 235 if output.len() < write_size { 236 return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into())); 237 } 238 let mut buffer = [0u8; MAX_FRAME_INFO_SIZE]; 239 assert!(write_size <= buffer.len()); 240 buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes()); 241 buffer[4] = FLG_SUPPORTED_VERSION_BITS; 242 if self.block_checksums { 243 buffer[4] |= FLG_BLOCK_CHECKSUMS; 244 } 245 if self.content_checksum { 246 buffer[4] |= FLG_CONTENT_CHECKSUM; 247 } 248 if self.block_mode == BlockMode::Independent { 249 buffer[4] |= FLG_INDEPENDENT_BLOCKS; 250 } 251 buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT; 252 253 // Optional section 254 let mut offset = 6; 255 if let Some(size) = self.content_size { 256 buffer[4] |= FLG_CONTENT_SIZE; 257 buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes()); 258 offset += 8; 259 } 260 if let Some(dict_id) = self.dict_id { 261 buffer[4] |= FLG_DICTIONARY_ID; 262 buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes()); 263 offset += 4; 264 } 265 266 // Header checksum 267 let mut hasher = XxHash32::with_seed(0); 268 hasher.write(&buffer[4..offset]); 269 let header_checksum = (hasher.finish() >> 8) as u8; 270 buffer[offset] = header_checksum; 271 offset += 1; 272 273 debug_assert_eq!(offset, write_size); 274 output[..write_size].copy_from_slice(&buffer[..write_size]); 275 Ok(write_size) 276 } 277 read(mut input: &[u8]) -> Result<FrameInfo, Error>278 pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> { 279 let original_input = input; 280 // 4 byte Magic 281 let magic_num = { 282 let mut buffer = [0u8; 4]; 283 input.read_exact(&mut buffer)?; 284 u32::from_le_bytes(buffer) 285 }; 286 if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { 287 return Ok(FrameInfo { 288 block_size: BlockSize::Max8MB, 289 legacy_frame: true, 290 ..FrameInfo::default() 291 }); 292 } 293 if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { 294 let mut buffer = [0u8; 4]; 295 input.read_exact(&mut buffer)?; 296 let user_data_len = u32::from_le_bytes(buffer); 297 return Err(Error::SkippableFrame(user_data_len)); 298 } 299 if magic_num != LZ4F_MAGIC_NUMBER { 300 return Err(Error::WrongMagicNumber); 301 } 302 303 // fixed size section 304 let [flg_byte, bd_byte] = { 305 let mut buffer = [0u8, 0]; 306 input.read_exact(&mut buffer)?; 307 buffer 308 }; 309 310 if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS { 311 // version is always 01 312 return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK)); 313 } 314 315 if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 { 316 return Err(Error::ReservedBitsSet); 317 } 318 319 let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 { 320 BlockMode::Independent 321 } else { 322 BlockMode::Linked 323 }; 324 let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0; 325 let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0; 326 327 let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT { 328 i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)), 329 4 => BlockSize::Max64KB, 330 5 => BlockSize::Max256KB, 331 6 => BlockSize::Max1MB, 332 7 => BlockSize::Max4MB, 333 _ => unreachable!(), 334 }; 335 336 // var len section 337 let mut content_size = None; 338 if flg_byte & FLG_CONTENT_SIZE != 0 { 339 let mut buffer = [0u8; 8]; 340 input.read_exact(&mut buffer).unwrap(); 341 content_size = Some(u64::from_le_bytes(buffer)); 342 } 343 344 let mut dict_id = None; 345 if flg_byte & FLG_DICTIONARY_ID != 0 { 346 let mut buffer = [0u8; 4]; 347 input.read_exact(&mut buffer)?; 348 dict_id = Some(u32::from_le_bytes(buffer)); 349 } 350 351 // 1 byte header checksum 352 let expected_checksum = { 353 let mut buffer = [0u8; 1]; 354 input.read_exact(&mut buffer)?; 355 buffer[0] 356 }; 357 let mut hasher = XxHash32::with_seed(0); 358 hasher.write(&original_input[4..original_input.len() - input.len() - 1]); 359 let header_hash = (hasher.finish() >> 8) as u8; 360 if header_hash != expected_checksum { 361 return Err(Error::HeaderChecksumError); 362 } 363 364 Ok(FrameInfo { 365 content_size, 366 dict_id, 367 block_size, 368 block_mode, 369 block_checksums, 370 content_checksum, 371 legacy_frame: false, 372 }) 373 } 374 } 375 376 #[derive(Debug)] 377 pub(crate) enum BlockInfo { 378 Compressed(u32), 379 Uncompressed(u32), 380 EndMark, 381 } 382 383 impl BlockInfo { read(mut input: &[u8]) -> Result<Self, Error>384 pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> { 385 let mut size_buffer = [0u8; 4]; 386 input.read_exact(&mut size_buffer)?; 387 let size = u32::from_le_bytes(size_buffer); 388 if size == 0 { 389 Ok(BlockInfo::EndMark) 390 } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 { 391 Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT)) 392 } else { 393 Ok(BlockInfo::Compressed(size)) 394 } 395 } 396 write(&self, mut output: &mut [u8]) -> Result<usize, Error>397 pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> { 398 let value = match self { 399 BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo), 400 BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len) 401 if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 => 402 { 403 return Err(Error::InvalidBlockInfo) 404 } 405 BlockInfo::Compressed(len) => *len, 406 BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT, 407 BlockInfo::EndMark => 0, 408 }; 409 output.write_all(&value.to_le_bytes())?; 410 Ok(4) 411 } 412 } 413