1 // Copyright 2015 Ilkka Rauta 2 // 3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 6 // option. This file may not be copied, modified, or distributed 7 // except according to those terms. 8 9 //! BitReader is a helper type to extract strings of bits from a slice of bytes. 10 //! 11 //! Here is how you read first a single bit, then three bits and finally four bits from a byte 12 //! buffer: 13 //! 14 //! ``` 15 //! use bitreader::BitReader; 16 //! 17 //! let slice_of_u8 = &[0b1000_1111]; 18 //! let mut reader = BitReader::new(slice_of_u8); 19 //! 20 //! // You probably should use try! or some other error handling mechanism in real code if the 21 //! // length of the input is not known in advance. 22 //! let a_single_bit = reader.read_u8(1).unwrap(); 23 //! assert_eq!(a_single_bit, 1); 24 //! 25 //! let more_bits = reader.read_u8(3).unwrap(); 26 //! assert_eq!(more_bits, 0); 27 //! 28 //! let last_bits_of_byte = reader.read_u8(4).unwrap(); 29 //! assert_eq!(last_bits_of_byte, 0b1111); 30 //! ``` 31 //! You can naturally read bits from longer buffer of data than just a single byte. 32 //! 33 //! As you read bits, the internal cursor of BitReader moves on along the stream of bits. Big 34 //! endian format is assumed when reading the multi-byte values. BitReader supports reading maximum 35 //! of 64 bits at a time (with read_u64). Reading signed values directly is not supported at the 36 //! moment. 37 //! 38 //! The reads do not need to be aligned in any particular way. 39 //! 40 //! Reading zero bits is a no-op. 41 //! 42 //! You can also skip over a number of bits, in which case there is no arbitrary small limits like 43 //! when reading the values to a variable. However, you can not seek past the end of the slice, 44 //! either when reading or when skipping bits. 45 //! 46 //! Note that the code will likely not work correctly if the slice is longer than 2^61 bytes, but 47 //! exceeding that should be pretty unlikely. Let's get back to this when people read exabytes of 48 //! information one bit at a time. 49 #![no_std] 50 cfg_if::cfg_if!{ 51 if #[cfg(feature = "std")] { 52 extern crate std; 53 use std::cmp::min; 54 use std::prelude::v1::*; 55 use std::fmt; 56 use std::error::Error; 57 use std::result; 58 } else { 59 use core::result; 60 use core::fmt; 61 use core::cmp::min; 62 } 63 } 64 65 #[cfg(test)] 66 mod tests; 67 68 /// BitReader reads data from a byte slice at the granularity of a single bit. 69 pub struct BitReader<'a> { 70 bytes: &'a [u8], 71 /// Position from the start of the slice, counted as bits instead of bytes 72 position: u64, 73 relative_offset: u64, 74 75 /// Length this reader is allowed to read from the slice, counted as bits instead of bytes. 76 length: u64, 77 } 78 79 impl<'a> BitReader<'a> { 80 /// Construct a new BitReader from a byte slice. The returned reader lives at most as long as 81 /// the slice given to is valid. new(bytes: &'a [u8]) -> BitReader<'a>82 pub fn new(bytes: &'a [u8]) -> BitReader<'a> { 83 BitReader { 84 bytes: bytes, 85 position: 0, 86 relative_offset: 0, 87 length: bytes.len() as u64 * 8, 88 } 89 } 90 91 /// Returns a copy of current BitReader, with the difference that its position() returns 92 /// positions relative to the position of the original BitReader at the construction time. 93 /// After construction, both readers are otherwise completely independent, except of course 94 /// for sharing the same source data. 95 /// 96 /// ``` 97 /// use bitreader::BitReader; 98 /// 99 /// let bytes = &[0b11110000, 0b00001111]; 100 /// let mut original = BitReader::new(bytes); 101 /// assert_eq!(original.read_u8(4).unwrap(), 0b1111); 102 /// assert_eq!(original.position(), 4); 103 /// 104 /// let mut relative = original.relative_reader(); 105 /// assert_eq!(relative.position(), 0); 106 /// 107 /// assert_eq!(original.read_u8(8).unwrap(), 0); 108 /// assert_eq!(relative.read_u8(8).unwrap(), 0); 109 /// 110 /// assert_eq!(original.position(), 12); 111 /// assert_eq!(relative.position(), 8); 112 /// ``` relative_reader(&self) -> BitReader<'a>113 pub fn relative_reader(&self) -> BitReader<'a> { 114 BitReader { 115 bytes: self.bytes, 116 position: self.position, 117 relative_offset: self.position, 118 length: self.length - self.position, 119 } 120 } 121 122 /// Returns a copy of current BitReader, with the difference that its position() returns 123 /// positions relative to the position of the original BitReader at the construction time, and 124 /// will not allow reading more than len bits. After construction, both readers are otherwise 125 // completely independent, except of course for sharing the same source data. 126 /// 127 /// ``` 128 /// use bitreader::BitReader; 129 /// use bitreader::BitReaderError; 130 /// 131 /// let bytes = &[0b11110000, 0b00001111]; 132 /// let mut original = BitReader::new(bytes); 133 /// assert_eq!(original.read_u8(4).unwrap(), 0b1111); 134 /// assert_eq!(original.position(), 4); 135 /// 136 /// let mut relative = original.relative_reader_atmost(8); 137 /// assert_eq!(relative.position(), 0); 138 /// 139 /// assert_eq!(original.read_u8(8).unwrap(), 0); 140 /// assert_eq!(relative.read_u8(8).unwrap(), 0); 141 /// 142 /// assert_eq!(original.position(), 12); 143 /// assert_eq!(relative.position(), 8); 144 /// 145 /// assert_eq!(relative.read_u8(8).unwrap_err(), BitReaderError::NotEnoughData{ 146 /// position: 8, 147 /// length: 8, 148 /// requested: 8 149 /// }); 150 /// ``` relative_reader_atmost(&self, len: u64) -> BitReader<'a>151 pub fn relative_reader_atmost(&self, len: u64) -> BitReader<'a> { 152 BitReader { 153 bytes: self.bytes, 154 position: self.position, 155 relative_offset: self.position, 156 length: min(self.length - self.position, len), 157 } 158 } 159 160 /// Read at most 8 bits into a u8. read_u8(&mut self, bit_count: u8) -> Result<u8>161 pub fn read_u8(&mut self, bit_count: u8) -> Result<u8> { 162 let value = self.read_value(bit_count, 8)?; 163 Ok((value & 0xff) as u8) 164 } 165 166 /// Read at most 8 bits into a u8, but without moving the cursor forward. peek_u8(&self, bit_count: u8) -> Result<u8>167 pub fn peek_u8(&self, bit_count: u8) -> Result<u8> { 168 self.relative_reader().read_u8(bit_count) 169 } 170 171 /// Fills the entire `output_bytes` slice. If there aren't enough bits remaining 172 /// after the internal cursor's current position, the cursor won't be moved forward 173 /// and the contents of `output_bytes` won't be modified. read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()>174 pub fn read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()> { 175 let requested = output_bytes.len() as u64 * 8; 176 if requested > self.remaining() { 177 Err(BitReaderError::NotEnoughData { 178 position: self.position(), 179 length: self.length, 180 requested, 181 }) 182 } else { 183 for byte in output_bytes.iter_mut() { 184 *byte = self.read_u8(8)?; 185 } 186 Ok(()) 187 } 188 } 189 190 /// Read at most 16 bits into a u16. read_u16(&mut self, bit_count: u8) -> Result<u16>191 pub fn read_u16(&mut self, bit_count: u8) -> Result<u16> { 192 let value = self.read_value(bit_count, 16)?; 193 Ok((value & 0xffff) as u16) 194 } 195 196 /// Read at most 16 bits into a u16, but without moving the cursor forward. peek_u16(&self, bit_count: u8) -> Result<u16>197 pub fn peek_u16(&self, bit_count: u8) -> Result<u16> { 198 self.relative_reader().read_u16(bit_count) 199 } 200 201 /// Read at most 32 bits into a u32. read_u32(&mut self, bit_count: u8) -> Result<u32>202 pub fn read_u32(&mut self, bit_count: u8) -> Result<u32> { 203 let value = self.read_value(bit_count, 32)?; 204 Ok((value & 0xffffffff) as u32) 205 } 206 207 /// Read at most 32 bits into a u32, but without moving the cursor forward. peek_u32(&self, bit_count: u8) -> Result<u32>208 pub fn peek_u32(&self, bit_count: u8) -> Result<u32> { 209 self.relative_reader().read_u32(bit_count) 210 } 211 212 /// Read at most 64 bits into a u64. read_u64(&mut self, bit_count: u8) -> Result<u64>213 pub fn read_u64(&mut self, bit_count: u8) -> Result<u64> { 214 let value = self.read_value(bit_count, 64)?; 215 Ok(value) 216 } 217 218 /// Read at most 64 bits into a u64, but without moving the cursor forward. peek_u64(&self, bit_count: u8) -> Result<u64>219 pub fn peek_u64(&self, bit_count: u8) -> Result<u64> { 220 self.relative_reader().read_u64(bit_count) 221 } 222 223 /// Read at most 8 bits into a i8. 224 /// Assumes the bits are stored in two's complement format. read_i8(&mut self, bit_count: u8) -> Result<i8>225 pub fn read_i8(&mut self, bit_count: u8) -> Result<i8> { 226 let value = self.read_signed_value(bit_count, 8)?; 227 Ok((value & 0xff) as i8) 228 } 229 230 /// Read at most 16 bits into a i16. 231 /// Assumes the bits are stored in two's complement format. read_i16(&mut self, bit_count: u8) -> Result<i16>232 pub fn read_i16(&mut self, bit_count: u8) -> Result<i16> { 233 let value = self.read_signed_value(bit_count, 16)?; 234 Ok((value & 0xffff) as i16) 235 } 236 237 /// Read at most 32 bits into a i32. 238 /// Assumes the bits are stored in two's complement format. read_i32(&mut self, bit_count: u8) -> Result<i32>239 pub fn read_i32(&mut self, bit_count: u8) -> Result<i32> { 240 let value = self.read_signed_value(bit_count, 32)?; 241 Ok((value & 0xffffffff) as i32) 242 } 243 244 /// Read at most 64 bits into a i64. 245 /// Assumes the bits are stored in two's complement format. read_i64(&mut self, bit_count: u8) -> Result<i64>246 pub fn read_i64(&mut self, bit_count: u8) -> Result<i64> { 247 let value = self.read_signed_value(bit_count, 64)?; 248 Ok(value) 249 } 250 251 /// Read a single bit as a boolean value. 252 /// Interprets 1 as true and 0 as false. read_bool(&mut self) -> Result<bool>253 pub fn read_bool(&mut self) -> Result<bool> { 254 match self.read_value(1, 1)? { 255 0 => Ok(false), 256 _ => Ok(true), 257 } 258 } 259 260 /// Read a single bit as a boolean value, but without moving the cursor forward. 261 /// Interprets 1 as true and 0 as false. peek_bool(&self) -> Result<bool>262 pub fn peek_bool(&self) -> Result<bool> { 263 self.relative_reader().read_bool() 264 } 265 266 /// Skip arbitrary number of bits. However, you can skip at most to the end of the byte slice. skip(&mut self, bit_count: u64) -> Result<()>267 pub fn skip(&mut self, bit_count: u64) -> Result<()> { 268 let end_position = self.position + bit_count; 269 if end_position > (self.relative_offset + self.length) { 270 return Err(BitReaderError::NotEnoughData { 271 position: self.position(), 272 length: self.length, 273 requested: bit_count, 274 }); 275 } 276 self.position = end_position; 277 Ok(()) 278 } 279 280 /// Returns the position of the cursor, or how many bits have been read so far. position(&self) -> u64281 pub fn position(&self) -> u64 { 282 self.position - self.relative_offset 283 } 284 285 /// Returns the number of bits not yet read from the underlying slice. remaining(&self) -> u64286 pub fn remaining(&self) -> u64 { 287 self.length - self.position 288 } 289 290 /// Helper to make sure the "bit cursor" is exactly at the beginning of a byte, or at specific 291 /// multi-byte alignment position. 292 /// 293 /// For example `reader.is_aligned(1)` returns true if exactly n bytes, or n * 8 bits, has been 294 /// read. Similarly, `reader.is_aligned(4)` returns true if exactly n * 32 bits, or n 4-byte 295 /// sequences has been read. 296 /// 297 /// This function can be used to validate the data is being read properly, for example by 298 /// adding invocations wrapped into `debug_assert!()` to places where it is known the data 299 /// should be n-byte aligned. is_aligned(&self, alignment_bytes: u32) -> bool300 pub fn is_aligned(&self, alignment_bytes: u32) -> bool { 301 self.position % (alignment_bytes as u64 * 8) == 0 302 } 303 304 /// Helper to move the "bit cursor" to exactly the beginning of a byte, or to a specific 305 /// multi-byte alignment position. 306 /// 307 /// That is, `reader.align(n)` moves the cursor to the next position that 308 /// is a multiple of n * 8 bits, if it's not correctly aligned already. align(&mut self, alignment_bytes: u32) -> Result<()>309 pub fn align(&mut self, alignment_bytes: u32) -> Result<()> { 310 let alignment_bits = alignment_bytes as u64 * 8; 311 let cur_alignment = self.position % alignment_bits; 312 let bits_to_skip = (alignment_bits - cur_alignment) % alignment_bits; 313 self.skip(bits_to_skip) 314 } 315 read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64>316 fn read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64> { 317 if bit_count == 0 { 318 return Ok(0); 319 } 320 let unsigned = self.read_value(bit_count, maximum_count)?; 321 // Fill the bits above the requested bits with all ones or all zeros, 322 // depending on the sign bit. 323 let sign_bit = unsigned >> (bit_count - 1) & 1; 324 let high_bits = if sign_bit == 1 { -1 } else { 0 }; 325 if bit_count == 64 { 326 // Avoid left-shift-with-overflow exception 327 return Ok(unsigned as i64); 328 } 329 Ok(high_bits << bit_count | unsigned as i64) 330 } 331 read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64>332 fn read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64> { 333 if bit_count == 0 { 334 return Ok(0); 335 } 336 if bit_count > maximum_count { 337 return Err(BitReaderError::TooManyBitsForType { 338 position: self.position, 339 requested: bit_count, 340 allowed: maximum_count, 341 }); 342 } 343 let start_position = self.position; 344 let end_position = self.position + bit_count as u64; 345 if end_position > (self.relative_offset + self.length) { 346 return Err(BitReaderError::NotEnoughData { 347 position: self.position(), 348 length: self.length, 349 requested: bit_count as u64, 350 }); 351 } 352 353 let mut value: u64 = 0; 354 355 for i in start_position..end_position { 356 let byte_index = (i / 8) as usize; 357 let byte = self.bytes[byte_index]; 358 let shift = 7 - (i % 8); 359 let bit = (byte >> shift) as u64 & 1; 360 value = (value << 1) | bit; 361 } 362 363 self.position = end_position; 364 Ok(value) 365 } 366 } 367 368 /// Result type for those BitReader operations that can fail. 369 pub type Result<T> = result::Result<T, BitReaderError>; 370 371 /// Error enumeration of BitReader errors. 372 #[derive(Debug,PartialEq,Copy,Clone)] 373 pub enum BitReaderError { 374 /// Requested more bits than there are left in the byte slice at the current position. 375 NotEnoughData { 376 /// Current posititon in bits relative to the beginning of the reader. 377 position: u64, 378 379 /// Total readable length in bits of the underlaying slice. 380 length: u64, 381 382 /// Bits requested to be read. 383 requested: u64, 384 }, 385 /// Requested more bits than the returned variable can hold, for example more than 8 bits when 386 /// reading into a u8. 387 TooManyBitsForType { 388 position: u64, 389 requested: u8, 390 allowed: u8, 391 } 392 } 393 394 #[cfg(feature = "std")] 395 impl Error for BitReaderError { description(&self) -> &str396 fn description(&self) -> &str { 397 match *self { 398 BitReaderError::NotEnoughData {..} => "Requested more bits than the byte slice has left", 399 BitReaderError::TooManyBitsForType {..} => "Requested more bits than the requested integer type can hold", 400 } 401 } 402 } 403 404 impl fmt::Display for BitReaderError { fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result405 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 406 //self.description().fmt(fmt) 407 match *self { 408 BitReaderError::NotEnoughData { position, length, requested } => write!(fmt, "BitReader: Requested {} bits with only {}/{} bits left (position {})", requested, length - position, length, position), 409 BitReaderError::TooManyBitsForType { position, requested, allowed } => write!(fmt, "BitReader: Requested {} bits while the type can only hold {} (position {})", requested, allowed, position), 410 } 411 } 412 } 413 414 /// Helper trait to allow reading bits into a variable without explicitly mentioning its type. 415 /// 416 /// If you can't or want, for some reason, to use BitReader's read methods (`read_u8` etc.) but 417 /// want to rely on type inference instead, you can use the ReadInto trait. The trait is 418 /// implemented for all basic integer types (8/16/32/64 bits, signed/unsigned) 419 /// and the boolean type. 420 /// 421 /// ``` 422 /// use bitreader::{BitReader,ReadInto}; 423 /// 424 /// let slice_of_u8 = &[0b1110_0000]; 425 /// let mut reader = BitReader::new(slice_of_u8); 426 /// 427 /// struct Foo { 428 /// bar: u8, 429 /// valid: bool, 430 /// } 431 /// 432 /// // No type mentioned here, instead the type of bits is inferred from the type of Foo::bar, 433 /// // and consequently the correct "overload" is used. 434 /// let bits = ReadInto::read(&mut reader, 2).unwrap(); 435 /// let valid = ReadInto::read(&mut reader, 1).unwrap(); 436 /// 437 /// let foo = Foo { bar: bits, valid: valid }; 438 /// assert_eq!(foo.bar, 3); 439 /// assert!(foo.valid); 440 /// ``` 441 pub trait ReadInto 442 where Self: Sized 443 { read(reader: &mut BitReader, bits: u8) -> Result<Self>444 fn read(reader: &mut BitReader, bits: u8) -> Result<Self>; 445 } 446 447 // There's eight almost identical implementations, let's make this easier. 448 macro_rules! impl_read_into { 449 ($T:ty, $method:ident) => ( 450 impl ReadInto for $T { 451 fn read(reader: &mut BitReader, bits: u8) -> Result<Self> { 452 reader.$method(bits) 453 } 454 } 455 ) 456 } 457 458 impl_read_into!(u8, read_u8); 459 impl_read_into!(u16, read_u16); 460 impl_read_into!(u32, read_u32); 461 impl_read_into!(u64, read_u64); 462 463 impl_read_into!(i8, read_i8); 464 impl_read_into!(i16, read_i16); 465 impl_read_into!(i32, read_i32); 466 impl_read_into!(i64, read_i64); 467 468 // We can't cast to bool, so this requires a separate method. 469 impl ReadInto for bool { read(reader: &mut BitReader, bits: u8) -> Result<Self>470 fn read(reader: &mut BitReader, bits: u8) -> Result<Self> { 471 match reader.read_u8(bits)? { 472 0 => Ok(false), 473 _ => Ok(true), 474 } 475 } 476 } 477