1 // Copyright © 2019 The Rust Fuzz Project Developers. 2 // 3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 6 // option. This file may not be copied, modified, or distributed 7 // except according to those terms. 8 9 //! Wrappers around raw, unstructured bytes. 10 11 use crate::{Arbitrary, Error, Result}; 12 use std::marker::PhantomData; 13 use std::ops::ControlFlow; 14 use std::{mem, ops}; 15 16 /// A source of unstructured data. 17 /// 18 /// An `Unstructured` helps `Arbitrary` implementations interpret raw data 19 /// (typically provided by a fuzzer) as a "DNA string" that describes how to 20 /// construct the `Arbitrary` type. The goal is that a small change to the "DNA 21 /// string" (the raw data wrapped by an `Unstructured`) results in a small 22 /// change to the generated `Arbitrary` instance. This helps a fuzzer 23 /// efficiently explore the `Arbitrary`'s input space. 24 /// 25 /// `Unstructured` is deterministic: given the same raw data, the same series of 26 /// API calls will return the same results (modulo system resource constraints, 27 /// like running out of memory). However, `Unstructured` does not guarantee 28 /// anything beyond that: it makes not guarantee that it will yield bytes from 29 /// the underlying data in any particular order. 30 /// 31 /// You shouldn't generally need to use an `Unstructured` unless you are writing 32 /// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly, 33 /// you should just be passing it through to nested `Arbitrary::arbitrary` 34 /// calls. 35 /// 36 /// # Example 37 /// 38 /// Imagine you were writing a color conversion crate. You might want to write 39 /// fuzz tests that take a random RGB color and assert various properties, run 40 /// functions and make sure nothing panics, etc. 41 /// 42 /// Below is what translating the fuzzer's raw input into an `Unstructured` and 43 /// using that to generate an arbitrary RGB color might look like: 44 /// 45 /// ``` 46 /// # #[cfg(feature = "derive")] fn foo() { 47 /// use arbitrary::{Arbitrary, Unstructured}; 48 /// 49 /// /// An RGB color. 50 /// #[derive(Arbitrary)] 51 /// pub struct Rgb { 52 /// r: u8, 53 /// g: u8, 54 /// b: u8, 55 /// } 56 /// 57 /// // Get the raw bytes from the fuzzer. 58 /// # let get_input_from_fuzzer = || &[]; 59 /// let raw_data: &[u8] = get_input_from_fuzzer(); 60 /// 61 /// // Wrap it in an `Unstructured`. 62 /// let mut unstructured = Unstructured::new(raw_data); 63 /// 64 /// // Generate an `Rgb` color and run our checks. 65 /// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) { 66 /// # let run_my_color_conversion_checks = |_| {}; 67 /// run_my_color_conversion_checks(rgb); 68 /// } 69 /// # } 70 /// ``` 71 pub struct Unstructured<'a> { 72 data: &'a [u8], 73 } 74 75 impl<'a> Unstructured<'a> { 76 /// Create a new `Unstructured` from the given raw data. 77 /// 78 /// # Example 79 /// 80 /// ``` 81 /// use arbitrary::Unstructured; 82 /// 83 /// let u = Unstructured::new(&[1, 2, 3, 4]); 84 /// ``` new(data: &'a [u8]) -> Self85 pub fn new(data: &'a [u8]) -> Self { 86 Unstructured { data } 87 } 88 89 /// Get the number of remaining bytes of underlying data that are still 90 /// available. 91 /// 92 /// # Example 93 /// 94 /// ``` 95 /// use arbitrary::{Arbitrary, Unstructured}; 96 /// 97 /// let mut u = Unstructured::new(&[1, 2, 3]); 98 /// 99 /// // Initially have three bytes of data. 100 /// assert_eq!(u.len(), 3); 101 /// 102 /// // Generating a `bool` consumes one byte from the underlying data, so 103 /// // we are left with two bytes afterwards. 104 /// let _ = bool::arbitrary(&mut u); 105 /// assert_eq!(u.len(), 2); 106 /// ``` 107 #[inline] len(&self) -> usize108 pub fn len(&self) -> usize { 109 self.data.len() 110 } 111 112 /// Is the underlying unstructured data exhausted? 113 /// 114 /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`. 115 /// 116 /// # Example 117 /// 118 /// ``` 119 /// use arbitrary::{Arbitrary, Unstructured}; 120 /// 121 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 122 /// 123 /// // Initially, we are not empty. 124 /// assert!(!u.is_empty()); 125 /// 126 /// // Generating a `u32` consumes all four bytes of the underlying data, so 127 /// // we become empty afterwards. 128 /// let _ = u32::arbitrary(&mut u); 129 /// assert!(u.is_empty()); 130 /// ``` 131 #[inline] is_empty(&self) -> bool132 pub fn is_empty(&self) -> bool { 133 self.len() == 0 134 } 135 136 /// Generate an arbitrary instance of `A`. 137 /// 138 /// This is simply a helper method that is equivalent to `<A as 139 /// Arbitrary>::arbitrary(self)`. This helper is a little bit more concise, 140 /// and can be used in situations where Rust's type inference will figure 141 /// out what `A` should be. 142 /// 143 /// # Example 144 /// 145 /// ``` 146 /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> { 147 /// use arbitrary::{Arbitrary, Unstructured}; 148 /// 149 /// #[derive(Arbitrary)] 150 /// struct MyType { 151 /// // ... 152 /// } 153 /// 154 /// fn do_stuff(value: MyType) { 155 /// # let _ = value; 156 /// // ... 157 /// } 158 /// 159 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 160 /// 161 /// // Rust's type inference can figure out that `value` should be of type 162 /// // `MyType` here: 163 /// let value = u.arbitrary()?; 164 /// do_stuff(value); 165 /// # Ok(()) } 166 /// ``` arbitrary<A>(&mut self) -> Result<A> where A: Arbitrary<'a>,167 pub fn arbitrary<A>(&mut self) -> Result<A> 168 where 169 A: Arbitrary<'a>, 170 { 171 <A as Arbitrary<'a>>::arbitrary(self) 172 } 173 174 /// Get the number of elements to insert when building up a collection of 175 /// arbitrary `ElementType`s. 176 /// 177 /// This uses the [`<ElementType as 178 /// Arbitrary>::size_hint`][crate::Arbitrary::size_hint] method to smartly 179 /// choose a length such that we most likely have enough underlying bytes to 180 /// construct that many arbitrary `ElementType`s. 181 /// 182 /// This should only be called within an `Arbitrary` implementation. 183 /// 184 /// # Example 185 /// 186 /// ``` 187 /// use arbitrary::{Arbitrary, Result, Unstructured}; 188 /// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> } 189 /// # impl<T> MyCollection<T> { 190 /// # pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } } 191 /// # pub fn insert(&mut self, element: T) {} 192 /// # } 193 /// 194 /// impl<'a, T> Arbitrary<'a> for MyCollection<T> 195 /// where 196 /// T: Arbitrary<'a>, 197 /// { 198 /// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> { 199 /// // Get the number of `T`s we should insert into our collection. 200 /// let len = u.arbitrary_len::<T>()?; 201 /// 202 /// // And then create a collection of that length! 203 /// let mut my_collection = MyCollection::with_capacity(len); 204 /// for _ in 0..len { 205 /// let element = T::arbitrary(u)?; 206 /// my_collection.insert(element); 207 /// } 208 /// 209 /// Ok(my_collection) 210 /// } 211 /// } 212 /// ``` arbitrary_len<ElementType>(&mut self) -> Result<usize> where ElementType: Arbitrary<'a>,213 pub fn arbitrary_len<ElementType>(&mut self) -> Result<usize> 214 where 215 ElementType: Arbitrary<'a>, 216 { 217 let byte_size = self.arbitrary_byte_size()?; 218 let (lower, upper) = <ElementType as Arbitrary>::size_hint(0); 219 let elem_size = upper.unwrap_or(lower * 2); 220 let elem_size = std::cmp::max(1, elem_size); 221 Ok(byte_size / elem_size) 222 } 223 arbitrary_byte_size(&mut self) -> Result<usize>224 fn arbitrary_byte_size(&mut self) -> Result<usize> { 225 if self.data.is_empty() { 226 Ok(0) 227 } else if self.data.len() == 1 { 228 self.data = &[]; 229 Ok(0) 230 } else { 231 // Take lengths from the end of the data, since the `libFuzzer` folks 232 // found that this lets fuzzers more efficiently explore the input 233 // space. 234 // 235 // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97 236 237 // We only consume as many bytes as necessary to cover the entire 238 // range of the byte string. 239 // Note: We cast to u64 so we don't overflow when checking std::u32::MAX + 4 on 32-bit archs 240 let len = if self.data.len() as u64 <= std::u8::MAX as u64 + 1 { 241 let bytes = 1; 242 let max_size = self.data.len() - bytes; 243 let (rest, for_size) = self.data.split_at(max_size); 244 self.data = rest; 245 Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize 246 } else if self.data.len() as u64 <= std::u16::MAX as u64 + 2 { 247 let bytes = 2; 248 let max_size = self.data.len() - bytes; 249 let (rest, for_size) = self.data.split_at(max_size); 250 self.data = rest; 251 Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize 252 } else if self.data.len() as u64 <= std::u32::MAX as u64 + 4 { 253 let bytes = 4; 254 let max_size = self.data.len() - bytes; 255 let (rest, for_size) = self.data.split_at(max_size); 256 self.data = rest; 257 Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize 258 } else { 259 let bytes = 8; 260 let max_size = self.data.len() - bytes; 261 let (rest, for_size) = self.data.split_at(max_size); 262 self.data = rest; 263 Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize 264 }; 265 266 Ok(len) 267 } 268 } 269 270 /// Generate an integer within the given range. 271 /// 272 /// Do not use this to generate the size of a collection. Use 273 /// `arbitrary_len` instead. 274 /// 275 /// # Panics 276 /// 277 /// Panics if `range.start > range.end`. That is, the given range must be 278 /// non-empty. 279 /// 280 /// # Example 281 /// 282 /// ``` 283 /// use arbitrary::{Arbitrary, Unstructured}; 284 /// 285 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 286 /// 287 /// let x: i32 = u.int_in_range(-5_000..=-1_000) 288 /// .expect("constructed `u` with enough bytes to generate an `i32`"); 289 /// 290 /// assert!(-5_000 <= x); 291 /// assert!(x <= -1_000); 292 /// ``` int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T> where T: Int,293 pub fn int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T> 294 where 295 T: Int, 296 { 297 let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?; 298 self.data = &self.data[bytes_consumed..]; 299 Ok(result) 300 } 301 int_in_range_impl<T>( range: ops::RangeInclusive<T>, mut bytes: impl Iterator<Item = u8>, ) -> Result<(T, usize)> where T: Int,302 fn int_in_range_impl<T>( 303 range: ops::RangeInclusive<T>, 304 mut bytes: impl Iterator<Item = u8>, 305 ) -> Result<(T, usize)> 306 where 307 T: Int, 308 { 309 let start = *range.start(); 310 let end = *range.end(); 311 assert!( 312 start <= end, 313 "`arbitrary::Unstructured::int_in_range` requires a non-empty range" 314 ); 315 316 // When there is only one possible choice, don't waste any entropy from 317 // the underlying data. 318 if start == end { 319 return Ok((start, 0)); 320 } 321 322 // From here on out we work with the unsigned representation. All of the 323 // operations performed below work out just as well whether or not `T` 324 // is a signed or unsigned integer. 325 let start = start.to_unsigned(); 326 let end = end.to_unsigned(); 327 328 let delta = end.wrapping_sub(start); 329 debug_assert_ne!(delta, T::Unsigned::ZERO); 330 331 // Compute an arbitrary integer offset from the start of the range. We 332 // do this by consuming `size_of(T)` bytes from the input to create an 333 // arbitrary integer and then clamping that int into our range bounds 334 // with a modulo operation. 335 let mut arbitrary_int = T::Unsigned::ZERO; 336 let mut bytes_consumed: usize = 0; 337 338 while (bytes_consumed < mem::size_of::<T>()) 339 && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO 340 { 341 let byte = match bytes.next() { 342 None => break, 343 Some(b) => b, 344 }; 345 bytes_consumed += 1; 346 347 // Combine this byte into our arbitrary integer, but avoid 348 // overflowing the shift for `u8` and `i8`. 349 arbitrary_int = if mem::size_of::<T>() == 1 { 350 T::Unsigned::from_u8(byte) 351 } else { 352 (arbitrary_int << 8) | T::Unsigned::from_u8(byte) 353 }; 354 } 355 356 let offset = if delta == T::Unsigned::MAX { 357 arbitrary_int 358 } else { 359 arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap()) 360 }; 361 362 // Finally, we add `start` to our offset from `start` to get the result 363 // actual value within the range. 364 let result = start.wrapping_add(offset); 365 366 // And convert back to our maybe-signed representation. 367 let result = T::from_unsigned(result); 368 debug_assert!(*range.start() <= result); 369 debug_assert!(result <= *range.end()); 370 371 Ok((result, bytes_consumed)) 372 } 373 374 /// Choose one of the given choices. 375 /// 376 /// This should only be used inside of `Arbitrary` implementations. 377 /// 378 /// Returns an error if there is not enough underlying data to make a 379 /// choice or if no choices are provided. 380 /// 381 /// # Examples 382 /// 383 /// Selecting from an array of choices: 384 /// 385 /// ``` 386 /// use arbitrary::Unstructured; 387 /// 388 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 389 /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g']; 390 /// 391 /// let choice = u.choose(&choices).unwrap(); 392 /// 393 /// println!("chose {}", choice); 394 /// ``` 395 /// 396 /// An error is returned if no choices are provided: 397 /// 398 /// ``` 399 /// use arbitrary::Unstructured; 400 /// 401 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 402 /// let choices: [char; 0] = []; 403 /// 404 /// let result = u.choose(&choices); 405 /// 406 /// assert!(result.is_err()); 407 /// ``` choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T>408 pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> { 409 let idx = self.choose_index(choices.len())?; 410 Ok(&choices[idx]) 411 } 412 413 /// Choose a value in `0..len`. 414 /// 415 /// Returns an error if the `len` is zero. 416 /// 417 /// # Examples 418 /// 419 /// Using Fisher–Yates shuffle shuffle to gerate an arbitrary permutation. 420 /// 421 /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle 422 /// 423 /// ``` 424 /// use arbitrary::Unstructured; 425 /// 426 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 427 /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g']; 428 /// let mut to_permute = &mut permutation[..]; 429 /// while to_permute.len() > 1 { 430 /// let idx = u.choose_index(to_permute.len()).unwrap(); 431 /// to_permute.swap(0, idx); 432 /// to_permute = &mut to_permute[1..]; 433 /// } 434 /// 435 /// println!("permutation: {:?}", permutation); 436 /// ``` 437 /// 438 /// An error is returned if the length is zero: 439 /// 440 /// ``` 441 /// use arbitrary::Unstructured; 442 /// 443 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); 444 /// let array: [i32; 0] = []; 445 /// 446 /// let result = u.choose_index(array.len()); 447 /// 448 /// assert!(result.is_err()); 449 /// ``` choose_index(&mut self, len: usize) -> Result<usize>450 pub fn choose_index(&mut self, len: usize) -> Result<usize> { 451 if len == 0 { 452 return Err(Error::EmptyChoose); 453 } 454 let idx = self.int_in_range(0..=len - 1)?; 455 Ok(idx) 456 } 457 458 /// Generate a boolean according to the given ratio. 459 /// 460 /// # Panics 461 /// 462 /// Panics when the numerator and denominator do not meet these constraints: 463 /// 464 /// * `0 < numerator <= denominator` 465 /// 466 /// # Example 467 /// 468 /// Generate a boolean that is `true` five sevenths of the time: 469 /// 470 /// ``` 471 /// # fn foo() -> arbitrary::Result<()> { 472 /// use arbitrary::Unstructured; 473 /// 474 /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; 475 /// let mut u = Unstructured::new(&my_data); 476 /// 477 /// if u.ratio(5, 7)? { 478 /// // Take this branch 5/7 of the time. 479 /// } 480 /// # Ok(()) 481 /// # } 482 /// ``` ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool> where T: Int,483 pub fn ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool> 484 where 485 T: Int, 486 { 487 assert!(T::ZERO < numerator); 488 assert!(numerator <= denominator); 489 let x = self.int_in_range(T::ONE..=denominator)?; 490 Ok(x <= numerator) 491 } 492 493 /// Fill a `buffer` with bytes from the underlying raw data. 494 /// 495 /// This should only be called within an `Arbitrary` implementation. This is 496 /// a very low-level operation. You should generally prefer calling nested 497 /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and 498 /// `String::arbitrary` over using this method directly. 499 /// 500 /// If this `Unstructured` does not have enough underlying data to fill the 501 /// whole `buffer`, it pads the buffer out with zeros. 502 /// 503 /// # Example 504 /// 505 /// ``` 506 /// use arbitrary::Unstructured; 507 /// 508 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 509 /// 510 /// let mut buf = [0; 2]; 511 /// 512 /// assert!(u.fill_buffer(&mut buf).is_ok()); 513 /// assert_eq!(buf, [1, 2]); 514 /// 515 /// assert!(u.fill_buffer(&mut buf).is_ok()); 516 /// assert_eq!(buf, [3, 4]); 517 /// 518 /// assert!(u.fill_buffer(&mut buf).is_ok()); 519 /// assert_eq!(buf, [0, 0]); 520 /// ``` fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()>521 pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> { 522 let n = std::cmp::min(buffer.len(), self.data.len()); 523 buffer[..n].copy_from_slice(&self.data[..n]); 524 for byte in buffer[n..].iter_mut() { 525 *byte = 0; 526 } 527 self.data = &self.data[n..]; 528 Ok(()) 529 } 530 531 /// Provide `size` bytes from the underlying raw data. 532 /// 533 /// This should only be called within an `Arbitrary` implementation. This is 534 /// a very low-level operation. You should generally prefer calling nested 535 /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and 536 /// `String::arbitrary` over using this method directly. 537 /// 538 /// # Example 539 /// 540 /// ``` 541 /// use arbitrary::Unstructured; 542 /// 543 /// let mut u = Unstructured::new(&[1, 2, 3, 4]); 544 /// 545 /// assert!(u.bytes(2).unwrap() == &[1, 2]); 546 /// assert!(u.bytes(2).unwrap() == &[3, 4]); 547 /// ``` bytes(&mut self, size: usize) -> Result<&'a [u8]>548 pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> { 549 if self.data.len() < size { 550 return Err(Error::NotEnoughData); 551 } 552 553 let (for_buf, rest) = self.data.split_at(size); 554 self.data = rest; 555 Ok(for_buf) 556 } 557 558 /// Peek at `size` number of bytes of the underlying raw input. 559 /// 560 /// Does not consume the bytes, only peeks at them. 561 /// 562 /// Returns `None` if there are not `size` bytes left in the underlying raw 563 /// input. 564 /// 565 /// # Example 566 /// 567 /// ``` 568 /// use arbitrary::Unstructured; 569 /// 570 /// let u = Unstructured::new(&[1, 2, 3]); 571 /// 572 /// assert_eq!(u.peek_bytes(0).unwrap(), []); 573 /// assert_eq!(u.peek_bytes(1).unwrap(), [1]); 574 /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]); 575 /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]); 576 /// 577 /// assert!(u.peek_bytes(4).is_none()); 578 /// ``` peek_bytes(&self, size: usize) -> Option<&'a [u8]>579 pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> { 580 self.data.get(..size) 581 } 582 583 /// Consume all of the rest of the remaining underlying bytes. 584 /// 585 /// Returns a slice of all the remaining, unconsumed bytes. 586 /// 587 /// # Example 588 /// 589 /// ``` 590 /// use arbitrary::Unstructured; 591 /// 592 /// let mut u = Unstructured::new(&[1, 2, 3]); 593 /// 594 /// let mut remaining = u.take_rest(); 595 /// 596 /// assert_eq!(remaining, [1, 2, 3]); 597 /// ``` take_rest(mut self) -> &'a [u8]598 pub fn take_rest(mut self) -> &'a [u8] { 599 mem::take(&mut self.data) 600 } 601 602 /// Provide an iterator over elements for constructing a collection 603 /// 604 /// This is useful for implementing [`Arbitrary::arbitrary`] on collections 605 /// since the implementation is simply `u.arbitrary_iter()?.collect()` arbitrary_iter<'b, ElementType: Arbitrary<'a>>( &'b mut self, ) -> Result<ArbitraryIter<'a, 'b, ElementType>>606 pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>( 607 &'b mut self, 608 ) -> Result<ArbitraryIter<'a, 'b, ElementType>> { 609 Ok(ArbitraryIter { 610 u: &mut *self, 611 _marker: PhantomData, 612 }) 613 } 614 615 /// Provide an iterator over elements for constructing a collection from 616 /// all the remaining bytes. 617 /// 618 /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections 619 /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()` arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>( self, ) -> Result<ArbitraryTakeRestIter<'a, ElementType>>620 pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>( 621 self, 622 ) -> Result<ArbitraryTakeRestIter<'a, ElementType>> { 623 let (lower, upper) = ElementType::size_hint(0); 624 625 let elem_size = upper.unwrap_or(lower * 2); 626 let elem_size = std::cmp::max(1, elem_size); 627 let size = self.len() / elem_size; 628 Ok(ArbitraryTakeRestIter { 629 size, 630 u: Some(self), 631 _marker: PhantomData, 632 }) 633 } 634 635 /// Call the given function an arbitrary number of times. 636 /// 637 /// The function is given this `Unstructured` so that it can continue to 638 /// generate arbitrary data and structures. 639 /// 640 /// You may optionaly specify minimum and maximum bounds on the number of 641 /// times the function is called. 642 /// 643 /// You may break out of the loop early by returning 644 /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return 645 /// `Ok(std::ops::ControlFlow::Continue)`. 646 /// 647 /// # Panics 648 /// 649 /// Panics if `min > max`. 650 /// 651 /// # Example 652 /// 653 /// Call a closure that generates an arbitrary type inside a context an 654 /// arbitrary number of times: 655 /// 656 /// ``` 657 /// use arbitrary::{Result, Unstructured}; 658 /// use std::ops::ControlFlow; 659 /// 660 /// enum Type { 661 /// /// A boolean type. 662 /// Bool, 663 /// 664 /// /// An integer type. 665 /// Int, 666 /// 667 /// /// A list of the `i`th type in this type's context. 668 /// List(usize), 669 /// } 670 /// 671 /// fn arbitrary_types_context(u: &mut Unstructured) -> Result<Vec<Type>> { 672 /// let mut context = vec![]; 673 /// 674 /// u.arbitrary_loop(Some(10), Some(20), |u| { 675 /// let num_choices = if context.is_empty() { 676 /// 2 677 /// } else { 678 /// 3 679 /// }; 680 /// let ty = match u.int_in_range::<u8>(1..=num_choices)? { 681 /// 1 => Type::Bool, 682 /// 2 => Type::Int, 683 /// 3 => Type::List(u.int_in_range(0..=context.len() - 1)?), 684 /// _ => unreachable!(), 685 /// }; 686 /// context.push(ty); 687 /// Ok(ControlFlow::Continue(())) 688 /// })?; 689 /// 690 /// // The number of loop iterations are constrained by the min/max 691 /// // bounds that we provided. 692 /// assert!(context.len() >= 10); 693 /// assert!(context.len() <= 20); 694 /// 695 /// Ok(context) 696 /// } 697 /// ``` arbitrary_loop( &mut self, min: Option<u32>, max: Option<u32>, mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>, ) -> Result<()>698 pub fn arbitrary_loop( 699 &mut self, 700 min: Option<u32>, 701 max: Option<u32>, 702 mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>, 703 ) -> Result<()> { 704 let min = min.unwrap_or(0); 705 let max = max.unwrap_or(u32::MAX); 706 707 for _ in 0..self.int_in_range(min..=max)? { 708 match f(self)? { 709 ControlFlow::Continue(_) => continue, 710 ControlFlow::Break(_) => break, 711 } 712 } 713 714 Ok(()) 715 } 716 } 717 718 /// Utility iterator produced by [`Unstructured::arbitrary_iter`] 719 pub struct ArbitraryIter<'a, 'b, ElementType> { 720 u: &'b mut Unstructured<'a>, 721 _marker: PhantomData<ElementType>, 722 } 723 724 impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, ElementType> { 725 type Item = Result<ElementType>; next(&mut self) -> Option<Result<ElementType>>726 fn next(&mut self) -> Option<Result<ElementType>> { 727 let keep_going = self.u.arbitrary().unwrap_or(false); 728 if keep_going { 729 Some(Arbitrary::arbitrary(self.u)) 730 } else { 731 None 732 } 733 } 734 } 735 736 /// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`] 737 pub struct ArbitraryTakeRestIter<'a, ElementType> { 738 u: Option<Unstructured<'a>>, 739 size: usize, 740 _marker: PhantomData<ElementType>, 741 } 742 743 impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> { 744 type Item = Result<ElementType>; next(&mut self) -> Option<Result<ElementType>>745 fn next(&mut self) -> Option<Result<ElementType>> { 746 if let Some(mut u) = self.u.take() { 747 if self.size == 1 { 748 Some(Arbitrary::arbitrary_take_rest(u)) 749 } else if self.size == 0 { 750 None 751 } else { 752 self.size -= 1; 753 let ret = Arbitrary::arbitrary(&mut u); 754 self.u = Some(u); 755 Some(ret) 756 } 757 } else { 758 None 759 } 760 } 761 } 762 763 /// A trait that is implemented for all of the primitive integers: 764 /// 765 /// * `u8` 766 /// * `u16` 767 /// * `u32` 768 /// * `u64` 769 /// * `u128` 770 /// * `usize` 771 /// * `i8` 772 /// * `i16` 773 /// * `i32` 774 /// * `i64` 775 /// * `i128` 776 /// * `isize` 777 /// 778 /// Don't implement this trait yourself. 779 pub trait Int: 780 Copy 781 + std::fmt::Debug 782 + PartialOrd 783 + Ord 784 + ops::Sub<Self, Output = Self> 785 + ops::Rem<Self, Output = Self> 786 + ops::Shr<Self, Output = Self> 787 + ops::Shl<usize, Output = Self> 788 + ops::BitOr<Self, Output = Self> 789 { 790 #[doc(hidden)] 791 type Unsigned: Int; 792 793 #[doc(hidden)] 794 const ZERO: Self; 795 796 #[doc(hidden)] 797 const ONE: Self; 798 799 #[doc(hidden)] 800 const MAX: Self; 801 802 #[doc(hidden)] from_u8(b: u8) -> Self803 fn from_u8(b: u8) -> Self; 804 805 #[doc(hidden)] from_usize(u: usize) -> Self806 fn from_usize(u: usize) -> Self; 807 808 #[doc(hidden)] checked_add(self, rhs: Self) -> Option<Self>809 fn checked_add(self, rhs: Self) -> Option<Self>; 810 811 #[doc(hidden)] wrapping_add(self, rhs: Self) -> Self812 fn wrapping_add(self, rhs: Self) -> Self; 813 814 #[doc(hidden)] wrapping_sub(self, rhs: Self) -> Self815 fn wrapping_sub(self, rhs: Self) -> Self; 816 817 #[doc(hidden)] to_unsigned(self) -> Self::Unsigned818 fn to_unsigned(self) -> Self::Unsigned; 819 820 #[doc(hidden)] from_unsigned(unsigned: Self::Unsigned) -> Self821 fn from_unsigned(unsigned: Self::Unsigned) -> Self; 822 } 823 824 macro_rules! impl_int { 825 ( $( $ty:ty : $unsigned_ty: ty ; )* ) => { 826 $( 827 impl Int for $ty { 828 type Unsigned = $unsigned_ty; 829 830 const ZERO: Self = 0; 831 832 const ONE: Self = 1; 833 834 const MAX: Self = Self::MAX; 835 836 fn from_u8(b: u8) -> Self { 837 b as Self 838 } 839 840 fn from_usize(u: usize) -> Self { 841 u as Self 842 } 843 844 fn checked_add(self, rhs: Self) -> Option<Self> { 845 <$ty>::checked_add(self, rhs) 846 } 847 848 fn wrapping_add(self, rhs: Self) -> Self { 849 <$ty>::wrapping_add(self, rhs) 850 } 851 852 fn wrapping_sub(self, rhs: Self) -> Self { 853 <$ty>::wrapping_sub(self, rhs) 854 } 855 856 fn to_unsigned(self) -> Self::Unsigned { 857 self as $unsigned_ty 858 } 859 860 fn from_unsigned(unsigned: $unsigned_ty) -> Self { 861 unsigned as Self 862 } 863 } 864 )* 865 } 866 } 867 868 impl_int! { 869 u8: u8; 870 u16: u16; 871 u32: u32; 872 u64: u64; 873 u128: u128; 874 usize: usize; 875 i8: u8; 876 i16: u16; 877 i32: u32; 878 i64: u64; 879 i128: u128; 880 isize: usize; 881 } 882 883 #[cfg(test)] 884 mod tests { 885 use super::*; 886 887 #[test] test_byte_size()888 fn test_byte_size() { 889 let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]); 890 // Should take one byte off the end 891 assert_eq!(u.arbitrary_byte_size().unwrap(), 6); 892 assert_eq!(u.len(), 9); 893 let mut v = vec![]; 894 v.resize(260, 0); 895 v.push(1); 896 v.push(4); 897 let mut u = Unstructured::new(&v); 898 // Should read two bytes off the end 899 assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104); 900 assert_eq!(u.len(), 260); 901 } 902 903 #[test] int_in_range_of_one()904 fn int_in_range_of_one() { 905 let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]); 906 let x = u.int_in_range(0..=0).unwrap(); 907 assert_eq!(x, 0); 908 let choice = *u.choose(&[42]).unwrap(); 909 assert_eq!(choice, 42) 910 } 911 912 #[test] int_in_range_uses_minimal_amount_of_bytes()913 fn int_in_range_uses_minimal_amount_of_bytes() { 914 let mut u = Unstructured::new(&[1, 2]); 915 assert_eq!(1, u.int_in_range::<u8>(0..=u8::MAX).unwrap()); 916 assert_eq!(u.len(), 1); 917 918 let mut u = Unstructured::new(&[1, 2]); 919 assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32).unwrap()); 920 assert_eq!(u.len(), 1); 921 922 let mut u = Unstructured::new(&[1]); 923 assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32 + 1).unwrap()); 924 assert!(u.is_empty()); 925 } 926 927 #[test] int_in_range_in_bounds()928 fn int_in_range_in_bounds() { 929 for input in u8::MIN..=u8::MAX { 930 let input = [input]; 931 932 let mut u = Unstructured::new(&input); 933 let x = u.int_in_range(1..=u8::MAX).unwrap(); 934 assert_ne!(x, 0); 935 936 let mut u = Unstructured::new(&input); 937 let x = u.int_in_range(0..=u8::MAX - 1).unwrap(); 938 assert_ne!(x, u8::MAX); 939 } 940 } 941 942 #[test] int_in_range_covers_unsigned_range()943 fn int_in_range_covers_unsigned_range() { 944 // Test that we generate all values within the range given to 945 // `int_in_range`. 946 947 let mut full = [false; u8::MAX as usize + 1]; 948 let mut no_zero = [false; u8::MAX as usize]; 949 let mut no_max = [false; u8::MAX as usize]; 950 let mut narrow = [false; 10]; 951 952 for input in u8::MIN..=u8::MAX { 953 let input = [input]; 954 955 let mut u = Unstructured::new(&input); 956 let x = u.int_in_range(0..=u8::MAX).unwrap(); 957 full[x as usize] = true; 958 959 let mut u = Unstructured::new(&input); 960 let x = u.int_in_range(1..=u8::MAX).unwrap(); 961 no_zero[x as usize - 1] = true; 962 963 let mut u = Unstructured::new(&input); 964 let x = u.int_in_range(0..=u8::MAX - 1).unwrap(); 965 no_max[x as usize] = true; 966 967 let mut u = Unstructured::new(&input); 968 let x = u.int_in_range(100..=109).unwrap(); 969 narrow[x as usize - 100] = true; 970 } 971 972 for (i, covered) in full.iter().enumerate() { 973 assert!(covered, "full[{}] should have been generated", i); 974 } 975 for (i, covered) in no_zero.iter().enumerate() { 976 assert!(covered, "no_zero[{}] should have been generated", i); 977 } 978 for (i, covered) in no_max.iter().enumerate() { 979 assert!(covered, "no_max[{}] should have been generated", i); 980 } 981 for (i, covered) in narrow.iter().enumerate() { 982 assert!(covered, "narrow[{}] should have been generated", i); 983 } 984 } 985 986 #[test] int_in_range_covers_signed_range()987 fn int_in_range_covers_signed_range() { 988 // Test that we generate all values within the range given to 989 // `int_in_range`. 990 991 let mut full = [false; u8::MAX as usize + 1]; 992 let mut no_min = [false; u8::MAX as usize]; 993 let mut no_max = [false; u8::MAX as usize]; 994 let mut narrow = [false; 21]; 995 996 let abs_i8_min: isize = 128; 997 998 for input in 0..=u8::MAX { 999 let input = [input]; 1000 1001 let mut u = Unstructured::new(&input); 1002 let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap(); 1003 full[(x as isize + abs_i8_min) as usize] = true; 1004 1005 let mut u = Unstructured::new(&input); 1006 let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap(); 1007 no_min[(x as isize + abs_i8_min - 1) as usize] = true; 1008 1009 let mut u = Unstructured::new(&input); 1010 let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap(); 1011 no_max[(x as isize + abs_i8_min) as usize] = true; 1012 1013 let mut u = Unstructured::new(&input); 1014 let x = u.int_in_range(-10..=10).unwrap(); 1015 narrow[(x as isize + 10) as usize] = true; 1016 } 1017 1018 for (i, covered) in full.iter().enumerate() { 1019 assert!(covered, "full[{}] should have been generated", i); 1020 } 1021 for (i, covered) in no_min.iter().enumerate() { 1022 assert!(covered, "no_min[{}] should have been generated", i); 1023 } 1024 for (i, covered) in no_max.iter().enumerate() { 1025 assert!(covered, "no_max[{}] should have been generated", i); 1026 } 1027 for (i, covered) in narrow.iter().enumerate() { 1028 assert!(covered, "narrow[{}] should have been generated", i); 1029 } 1030 } 1031 } 1032