1 // Copyright © 2019 The Rust Fuzz Project Developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 //! Wrappers around raw, unstructured bytes.
10 
11 use crate::{Arbitrary, Error, Result};
12 use std::marker::PhantomData;
13 use std::ops::ControlFlow;
14 use std::{mem, ops};
15 
16 /// A source of unstructured data.
17 ///
18 /// An `Unstructured` helps `Arbitrary` implementations interpret raw data
19 /// (typically provided by a fuzzer) as a "DNA string" that describes how to
20 /// construct the `Arbitrary` type. The goal is that a small change to the "DNA
21 /// string" (the raw data wrapped by an `Unstructured`) results in a small
22 /// change to the generated `Arbitrary` instance. This helps a fuzzer
23 /// efficiently explore the `Arbitrary`'s input space.
24 ///
25 /// `Unstructured` is deterministic: given the same raw data, the same series of
26 /// API calls will return the same results (modulo system resource constraints,
27 /// like running out of memory). However, `Unstructured` does not guarantee
28 /// anything beyond that: it makes not guarantee that it will yield bytes from
29 /// the underlying data in any particular order.
30 ///
31 /// You shouldn't generally need to use an `Unstructured` unless you are writing
32 /// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly,
33 /// you should just be passing it through to nested `Arbitrary::arbitrary`
34 /// calls.
35 ///
36 /// # Example
37 ///
38 /// Imagine you were writing a color conversion crate. You might want to write
39 /// fuzz tests that take a random RGB color and assert various properties, run
40 /// functions and make sure nothing panics, etc.
41 ///
42 /// Below is what translating the fuzzer's raw input into an `Unstructured` and
43 /// using that to generate an arbitrary RGB color might look like:
44 ///
45 /// ```
46 /// # #[cfg(feature = "derive")] fn foo() {
47 /// use arbitrary::{Arbitrary, Unstructured};
48 ///
49 /// /// An RGB color.
50 /// #[derive(Arbitrary)]
51 /// pub struct Rgb {
52 ///     r: u8,
53 ///     g: u8,
54 ///     b: u8,
55 /// }
56 ///
57 /// // Get the raw bytes from the fuzzer.
58 /// #   let get_input_from_fuzzer = || &[];
59 /// let raw_data: &[u8] = get_input_from_fuzzer();
60 ///
61 /// // Wrap it in an `Unstructured`.
62 /// let mut unstructured = Unstructured::new(raw_data);
63 ///
64 /// // Generate an `Rgb` color and run our checks.
65 /// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) {
66 /// #   let run_my_color_conversion_checks = |_| {};
67 ///     run_my_color_conversion_checks(rgb);
68 /// }
69 /// # }
70 /// ```
71 pub struct Unstructured<'a> {
72     data: &'a [u8],
73 }
74 
75 impl<'a> Unstructured<'a> {
76     /// Create a new `Unstructured` from the given raw data.
77     ///
78     /// # Example
79     ///
80     /// ```
81     /// use arbitrary::Unstructured;
82     ///
83     /// let u = Unstructured::new(&[1, 2, 3, 4]);
84     /// ```
new(data: &'a [u8]) -> Self85     pub fn new(data: &'a [u8]) -> Self {
86         Unstructured { data }
87     }
88 
89     /// Get the number of remaining bytes of underlying data that are still
90     /// available.
91     ///
92     /// # Example
93     ///
94     /// ```
95     /// use arbitrary::{Arbitrary, Unstructured};
96     ///
97     /// let mut u = Unstructured::new(&[1, 2, 3]);
98     ///
99     /// // Initially have three bytes of data.
100     /// assert_eq!(u.len(), 3);
101     ///
102     /// // Generating a `bool` consumes one byte from the underlying data, so
103     /// // we are left with two bytes afterwards.
104     /// let _ = bool::arbitrary(&mut u);
105     /// assert_eq!(u.len(), 2);
106     /// ```
107     #[inline]
len(&self) -> usize108     pub fn len(&self) -> usize {
109         self.data.len()
110     }
111 
112     /// Is the underlying unstructured data exhausted?
113     ///
114     /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`.
115     ///
116     /// # Example
117     ///
118     /// ```
119     /// use arbitrary::{Arbitrary, Unstructured};
120     ///
121     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
122     ///
123     /// // Initially, we are not empty.
124     /// assert!(!u.is_empty());
125     ///
126     /// // Generating a `u32` consumes all four bytes of the underlying data, so
127     /// // we become empty afterwards.
128     /// let _ = u32::arbitrary(&mut u);
129     /// assert!(u.is_empty());
130     /// ```
131     #[inline]
is_empty(&self) -> bool132     pub fn is_empty(&self) -> bool {
133         self.len() == 0
134     }
135 
136     /// Generate an arbitrary instance of `A`.
137     ///
138     /// This is simply a helper method that is equivalent to `<A as
139     /// Arbitrary>::arbitrary(self)`. This helper is a little bit more concise,
140     /// and can be used in situations where Rust's type inference will figure
141     /// out what `A` should be.
142     ///
143     /// # Example
144     ///
145     /// ```
146     /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> {
147     /// use arbitrary::{Arbitrary, Unstructured};
148     ///
149     /// #[derive(Arbitrary)]
150     /// struct MyType {
151     ///     // ...
152     /// }
153     ///
154     /// fn do_stuff(value: MyType) {
155     /// #   let _ = value;
156     ///     // ...
157     /// }
158     ///
159     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
160     ///
161     /// // Rust's type inference can figure out that `value` should be of type
162     /// // `MyType` here:
163     /// let value = u.arbitrary()?;
164     /// do_stuff(value);
165     /// # Ok(()) }
166     /// ```
arbitrary<A>(&mut self) -> Result<A> where A: Arbitrary<'a>,167     pub fn arbitrary<A>(&mut self) -> Result<A>
168     where
169         A: Arbitrary<'a>,
170     {
171         <A as Arbitrary<'a>>::arbitrary(self)
172     }
173 
174     /// Get the number of elements to insert when building up a collection of
175     /// arbitrary `ElementType`s.
176     ///
177     /// This uses the [`<ElementType as
178     /// Arbitrary>::size_hint`][crate::Arbitrary::size_hint] method to smartly
179     /// choose a length such that we most likely have enough underlying bytes to
180     /// construct that many arbitrary `ElementType`s.
181     ///
182     /// This should only be called within an `Arbitrary` implementation.
183     ///
184     /// # Example
185     ///
186     /// ```
187     /// use arbitrary::{Arbitrary, Result, Unstructured};
188     /// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
189     /// # impl<T> MyCollection<T> {
190     /// #     pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } }
191     /// #     pub fn insert(&mut self, element: T) {}
192     /// # }
193     ///
194     /// impl<'a, T> Arbitrary<'a> for MyCollection<T>
195     /// where
196     ///     T: Arbitrary<'a>,
197     /// {
198     ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
199     ///         // Get the number of `T`s we should insert into our collection.
200     ///         let len = u.arbitrary_len::<T>()?;
201     ///
202     ///         // And then create a collection of that length!
203     ///         let mut my_collection = MyCollection::with_capacity(len);
204     ///         for _ in 0..len {
205     ///             let element = T::arbitrary(u)?;
206     ///             my_collection.insert(element);
207     ///         }
208     ///
209     ///         Ok(my_collection)
210     ///     }
211     /// }
212     /// ```
arbitrary_len<ElementType>(&mut self) -> Result<usize> where ElementType: Arbitrary<'a>,213     pub fn arbitrary_len<ElementType>(&mut self) -> Result<usize>
214     where
215         ElementType: Arbitrary<'a>,
216     {
217         let byte_size = self.arbitrary_byte_size()?;
218         let (lower, upper) = <ElementType as Arbitrary>::size_hint(0);
219         let elem_size = upper.unwrap_or(lower * 2);
220         let elem_size = std::cmp::max(1, elem_size);
221         Ok(byte_size / elem_size)
222     }
223 
arbitrary_byte_size(&mut self) -> Result<usize>224     fn arbitrary_byte_size(&mut self) -> Result<usize> {
225         if self.data.is_empty() {
226             Ok(0)
227         } else if self.data.len() == 1 {
228             self.data = &[];
229             Ok(0)
230         } else {
231             // Take lengths from the end of the data, since the `libFuzzer` folks
232             // found that this lets fuzzers more efficiently explore the input
233             // space.
234             //
235             // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97
236 
237             // We only consume as many bytes as necessary to cover the entire
238             // range of the byte string.
239             // Note: We cast to u64 so we don't overflow when checking std::u32::MAX + 4 on 32-bit archs
240             let len = if self.data.len() as u64 <= std::u8::MAX as u64 + 1 {
241                 let bytes = 1;
242                 let max_size = self.data.len() - bytes;
243                 let (rest, for_size) = self.data.split_at(max_size);
244                 self.data = rest;
245                 Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize
246             } else if self.data.len() as u64 <= std::u16::MAX as u64 + 2 {
247                 let bytes = 2;
248                 let max_size = self.data.len() - bytes;
249                 let (rest, for_size) = self.data.split_at(max_size);
250                 self.data = rest;
251                 Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize
252             } else if self.data.len() as u64 <= std::u32::MAX as u64 + 4 {
253                 let bytes = 4;
254                 let max_size = self.data.len() - bytes;
255                 let (rest, for_size) = self.data.split_at(max_size);
256                 self.data = rest;
257                 Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize
258             } else {
259                 let bytes = 8;
260                 let max_size = self.data.len() - bytes;
261                 let (rest, for_size) = self.data.split_at(max_size);
262                 self.data = rest;
263                 Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize
264             };
265 
266             Ok(len)
267         }
268     }
269 
270     /// Generate an integer within the given range.
271     ///
272     /// Do not use this to generate the size of a collection. Use
273     /// `arbitrary_len` instead.
274     ///
275     /// # Panics
276     ///
277     /// Panics if `range.start > range.end`. That is, the given range must be
278     /// non-empty.
279     ///
280     /// # Example
281     ///
282     /// ```
283     /// use arbitrary::{Arbitrary, Unstructured};
284     ///
285     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
286     ///
287     /// let x: i32 = u.int_in_range(-5_000..=-1_000)
288     ///     .expect("constructed `u` with enough bytes to generate an `i32`");
289     ///
290     /// assert!(-5_000 <= x);
291     /// assert!(x <= -1_000);
292     /// ```
int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T> where T: Int,293     pub fn int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T>
294     where
295         T: Int,
296     {
297         let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?;
298         self.data = &self.data[bytes_consumed..];
299         Ok(result)
300     }
301 
int_in_range_impl<T>( range: ops::RangeInclusive<T>, mut bytes: impl Iterator<Item = u8>, ) -> Result<(T, usize)> where T: Int,302     fn int_in_range_impl<T>(
303         range: ops::RangeInclusive<T>,
304         mut bytes: impl Iterator<Item = u8>,
305     ) -> Result<(T, usize)>
306     where
307         T: Int,
308     {
309         let start = *range.start();
310         let end = *range.end();
311         assert!(
312             start <= end,
313             "`arbitrary::Unstructured::int_in_range` requires a non-empty range"
314         );
315 
316         // When there is only one possible choice, don't waste any entropy from
317         // the underlying data.
318         if start == end {
319             return Ok((start, 0));
320         }
321 
322         // From here on out we work with the unsigned representation. All of the
323         // operations performed below work out just as well whether or not `T`
324         // is a signed or unsigned integer.
325         let start = start.to_unsigned();
326         let end = end.to_unsigned();
327 
328         let delta = end.wrapping_sub(start);
329         debug_assert_ne!(delta, T::Unsigned::ZERO);
330 
331         // Compute an arbitrary integer offset from the start of the range. We
332         // do this by consuming `size_of(T)` bytes from the input to create an
333         // arbitrary integer and then clamping that int into our range bounds
334         // with a modulo operation.
335         let mut arbitrary_int = T::Unsigned::ZERO;
336         let mut bytes_consumed: usize = 0;
337 
338         while (bytes_consumed < mem::size_of::<T>())
339             && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO
340         {
341             let byte = match bytes.next() {
342                 None => break,
343                 Some(b) => b,
344             };
345             bytes_consumed += 1;
346 
347             // Combine this byte into our arbitrary integer, but avoid
348             // overflowing the shift for `u8` and `i8`.
349             arbitrary_int = if mem::size_of::<T>() == 1 {
350                 T::Unsigned::from_u8(byte)
351             } else {
352                 (arbitrary_int << 8) | T::Unsigned::from_u8(byte)
353             };
354         }
355 
356         let offset = if delta == T::Unsigned::MAX {
357             arbitrary_int
358         } else {
359             arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap())
360         };
361 
362         // Finally, we add `start` to our offset from `start` to get the result
363         // actual value within the range.
364         let result = start.wrapping_add(offset);
365 
366         // And convert back to our maybe-signed representation.
367         let result = T::from_unsigned(result);
368         debug_assert!(*range.start() <= result);
369         debug_assert!(result <= *range.end());
370 
371         Ok((result, bytes_consumed))
372     }
373 
374     /// Choose one of the given choices.
375     ///
376     /// This should only be used inside of `Arbitrary` implementations.
377     ///
378     /// Returns an error if there is not enough underlying data to make a
379     /// choice or if no choices are provided.
380     ///
381     /// # Examples
382     ///
383     /// Selecting from an array of choices:
384     ///
385     /// ```
386     /// use arbitrary::Unstructured;
387     ///
388     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
389     /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
390     ///
391     /// let choice = u.choose(&choices).unwrap();
392     ///
393     /// println!("chose {}", choice);
394     /// ```
395     ///
396     /// An error is returned if no choices are provided:
397     ///
398     /// ```
399     /// use arbitrary::Unstructured;
400     ///
401     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
402     /// let choices: [char; 0] = [];
403     ///
404     /// let result = u.choose(&choices);
405     ///
406     /// assert!(result.is_err());
407     /// ```
choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T>408     pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> {
409         let idx = self.choose_index(choices.len())?;
410         Ok(&choices[idx])
411     }
412 
413     /// Choose a value in `0..len`.
414     ///
415     /// Returns an error if the `len` is zero.
416     ///
417     /// # Examples
418     ///
419     /// Using Fisher–Yates shuffle shuffle to gerate an arbitrary permutation.
420     ///
421     /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle
422     ///
423     /// ```
424     /// use arbitrary::Unstructured;
425     ///
426     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
427     /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
428     /// let mut to_permute = &mut permutation[..];
429     /// while to_permute.len() > 1 {
430     ///     let idx = u.choose_index(to_permute.len()).unwrap();
431     ///     to_permute.swap(0, idx);
432     ///     to_permute = &mut to_permute[1..];
433     /// }
434     ///
435     /// println!("permutation: {:?}", permutation);
436     /// ```
437     ///
438     /// An error is returned if the length is zero:
439     ///
440     /// ```
441     /// use arbitrary::Unstructured;
442     ///
443     /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
444     /// let array: [i32; 0] = [];
445     ///
446     /// let result = u.choose_index(array.len());
447     ///
448     /// assert!(result.is_err());
449     /// ```
choose_index(&mut self, len: usize) -> Result<usize>450     pub fn choose_index(&mut self, len: usize) -> Result<usize> {
451         if len == 0 {
452             return Err(Error::EmptyChoose);
453         }
454         let idx = self.int_in_range(0..=len - 1)?;
455         Ok(idx)
456     }
457 
458     /// Generate a boolean according to the given ratio.
459     ///
460     /// # Panics
461     ///
462     /// Panics when the numerator and denominator do not meet these constraints:
463     ///
464     /// * `0 < numerator <= denominator`
465     ///
466     /// # Example
467     ///
468     /// Generate a boolean that is `true` five sevenths of the time:
469     ///
470     /// ```
471     /// # fn foo() -> arbitrary::Result<()> {
472     /// use arbitrary::Unstructured;
473     ///
474     /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
475     /// let mut u = Unstructured::new(&my_data);
476     ///
477     /// if u.ratio(5, 7)? {
478     ///     // Take this branch 5/7 of the time.
479     /// }
480     /// # Ok(())
481     /// # }
482     /// ```
ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool> where T: Int,483     pub fn ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool>
484     where
485         T: Int,
486     {
487         assert!(T::ZERO < numerator);
488         assert!(numerator <= denominator);
489         let x = self.int_in_range(T::ONE..=denominator)?;
490         Ok(x <= numerator)
491     }
492 
493     /// Fill a `buffer` with bytes from the underlying raw data.
494     ///
495     /// This should only be called within an `Arbitrary` implementation. This is
496     /// a very low-level operation. You should generally prefer calling nested
497     /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
498     /// `String::arbitrary` over using this method directly.
499     ///
500     /// If this `Unstructured` does not have enough underlying data to fill the
501     /// whole `buffer`, it pads the buffer out with zeros.
502     ///
503     /// # Example
504     ///
505     /// ```
506     /// use arbitrary::Unstructured;
507     ///
508     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
509     ///
510     /// let mut buf = [0; 2];
511     ///
512     /// assert!(u.fill_buffer(&mut buf).is_ok());
513     /// assert_eq!(buf, [1, 2]);
514     ///
515     /// assert!(u.fill_buffer(&mut buf).is_ok());
516     /// assert_eq!(buf, [3, 4]);
517     ///
518     /// assert!(u.fill_buffer(&mut buf).is_ok());
519     /// assert_eq!(buf, [0, 0]);
520     /// ```
fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()>521     pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> {
522         let n = std::cmp::min(buffer.len(), self.data.len());
523         buffer[..n].copy_from_slice(&self.data[..n]);
524         for byte in buffer[n..].iter_mut() {
525             *byte = 0;
526         }
527         self.data = &self.data[n..];
528         Ok(())
529     }
530 
531     /// Provide `size` bytes from the underlying raw data.
532     ///
533     /// This should only be called within an `Arbitrary` implementation. This is
534     /// a very low-level operation. You should generally prefer calling nested
535     /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
536     /// `String::arbitrary` over using this method directly.
537     ///
538     /// # Example
539     ///
540     /// ```
541     /// use arbitrary::Unstructured;
542     ///
543     /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
544     ///
545     /// assert!(u.bytes(2).unwrap() == &[1, 2]);
546     /// assert!(u.bytes(2).unwrap() == &[3, 4]);
547     /// ```
bytes(&mut self, size: usize) -> Result<&'a [u8]>548     pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> {
549         if self.data.len() < size {
550             return Err(Error::NotEnoughData);
551         }
552 
553         let (for_buf, rest) = self.data.split_at(size);
554         self.data = rest;
555         Ok(for_buf)
556     }
557 
558     /// Peek at `size` number of bytes of the underlying raw input.
559     ///
560     /// Does not consume the bytes, only peeks at them.
561     ///
562     /// Returns `None` if there are not `size` bytes left in the underlying raw
563     /// input.
564     ///
565     /// # Example
566     ///
567     /// ```
568     /// use arbitrary::Unstructured;
569     ///
570     /// let u = Unstructured::new(&[1, 2, 3]);
571     ///
572     /// assert_eq!(u.peek_bytes(0).unwrap(), []);
573     /// assert_eq!(u.peek_bytes(1).unwrap(), [1]);
574     /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]);
575     /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]);
576     ///
577     /// assert!(u.peek_bytes(4).is_none());
578     /// ```
peek_bytes(&self, size: usize) -> Option<&'a [u8]>579     pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> {
580         self.data.get(..size)
581     }
582 
583     /// Consume all of the rest of the remaining underlying bytes.
584     ///
585     /// Returns a slice of all the remaining, unconsumed bytes.
586     ///
587     /// # Example
588     ///
589     /// ```
590     /// use arbitrary::Unstructured;
591     ///
592     /// let mut u = Unstructured::new(&[1, 2, 3]);
593     ///
594     /// let mut remaining = u.take_rest();
595     ///
596     /// assert_eq!(remaining, [1, 2, 3]);
597     /// ```
take_rest(mut self) -> &'a [u8]598     pub fn take_rest(mut self) -> &'a [u8] {
599         mem::take(&mut self.data)
600     }
601 
602     /// Provide an iterator over elements for constructing a collection
603     ///
604     /// This is useful for implementing [`Arbitrary::arbitrary`] on collections
605     /// since the implementation is simply `u.arbitrary_iter()?.collect()`
arbitrary_iter<'b, ElementType: Arbitrary<'a>>( &'b mut self, ) -> Result<ArbitraryIter<'a, 'b, ElementType>>606     pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>(
607         &'b mut self,
608     ) -> Result<ArbitraryIter<'a, 'b, ElementType>> {
609         Ok(ArbitraryIter {
610             u: &mut *self,
611             _marker: PhantomData,
612         })
613     }
614 
615     /// Provide an iterator over elements for constructing a collection from
616     /// all the remaining bytes.
617     ///
618     /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections
619     /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()`
arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>( self, ) -> Result<ArbitraryTakeRestIter<'a, ElementType>>620     pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>(
621         self,
622     ) -> Result<ArbitraryTakeRestIter<'a, ElementType>> {
623         let (lower, upper) = ElementType::size_hint(0);
624 
625         let elem_size = upper.unwrap_or(lower * 2);
626         let elem_size = std::cmp::max(1, elem_size);
627         let size = self.len() / elem_size;
628         Ok(ArbitraryTakeRestIter {
629             size,
630             u: Some(self),
631             _marker: PhantomData,
632         })
633     }
634 
635     /// Call the given function an arbitrary number of times.
636     ///
637     /// The function is given this `Unstructured` so that it can continue to
638     /// generate arbitrary data and structures.
639     ///
640     /// You may optionaly specify minimum and maximum bounds on the number of
641     /// times the function is called.
642     ///
643     /// You may break out of the loop early by returning
644     /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return
645     /// `Ok(std::ops::ControlFlow::Continue)`.
646     ///
647     /// # Panics
648     ///
649     /// Panics if `min > max`.
650     ///
651     /// # Example
652     ///
653     /// Call a closure that generates an arbitrary type inside a context an
654     /// arbitrary number of times:
655     ///
656     /// ```
657     /// use arbitrary::{Result, Unstructured};
658     /// use std::ops::ControlFlow;
659     ///
660     /// enum Type {
661     ///     /// A boolean type.
662     ///     Bool,
663     ///
664     ///     /// An integer type.
665     ///     Int,
666     ///
667     ///     /// A list of the `i`th type in this type's context.
668     ///     List(usize),
669     /// }
670     ///
671     /// fn arbitrary_types_context(u: &mut Unstructured) -> Result<Vec<Type>> {
672     ///     let mut context = vec![];
673     ///
674     ///     u.arbitrary_loop(Some(10), Some(20), |u| {
675     ///         let num_choices = if context.is_empty() {
676     ///             2
677     ///         } else {
678     ///             3
679     ///         };
680     ///         let ty = match u.int_in_range::<u8>(1..=num_choices)? {
681     ///             1 => Type::Bool,
682     ///             2 => Type::Int,
683     ///             3 => Type::List(u.int_in_range(0..=context.len() - 1)?),
684     ///             _ => unreachable!(),
685     ///         };
686     ///         context.push(ty);
687     ///         Ok(ControlFlow::Continue(()))
688     ///     })?;
689     ///
690     ///     // The number of loop iterations are constrained by the min/max
691     ///     // bounds that we provided.
692     ///     assert!(context.len() >= 10);
693     ///     assert!(context.len() <= 20);
694     ///
695     ///     Ok(context)
696     /// }
697     /// ```
arbitrary_loop( &mut self, min: Option<u32>, max: Option<u32>, mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>, ) -> Result<()>698     pub fn arbitrary_loop(
699         &mut self,
700         min: Option<u32>,
701         max: Option<u32>,
702         mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>,
703     ) -> Result<()> {
704         let min = min.unwrap_or(0);
705         let max = max.unwrap_or(u32::MAX);
706 
707         for _ in 0..self.int_in_range(min..=max)? {
708             match f(self)? {
709                 ControlFlow::Continue(_) => continue,
710                 ControlFlow::Break(_) => break,
711             }
712         }
713 
714         Ok(())
715     }
716 }
717 
718 /// Utility iterator produced by [`Unstructured::arbitrary_iter`]
719 pub struct ArbitraryIter<'a, 'b, ElementType> {
720     u: &'b mut Unstructured<'a>,
721     _marker: PhantomData<ElementType>,
722 }
723 
724 impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, ElementType> {
725     type Item = Result<ElementType>;
next(&mut self) -> Option<Result<ElementType>>726     fn next(&mut self) -> Option<Result<ElementType>> {
727         let keep_going = self.u.arbitrary().unwrap_or(false);
728         if keep_going {
729             Some(Arbitrary::arbitrary(self.u))
730         } else {
731             None
732         }
733     }
734 }
735 
736 /// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`]
737 pub struct ArbitraryTakeRestIter<'a, ElementType> {
738     u: Option<Unstructured<'a>>,
739     size: usize,
740     _marker: PhantomData<ElementType>,
741 }
742 
743 impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> {
744     type Item = Result<ElementType>;
next(&mut self) -> Option<Result<ElementType>>745     fn next(&mut self) -> Option<Result<ElementType>> {
746         if let Some(mut u) = self.u.take() {
747             if self.size == 1 {
748                 Some(Arbitrary::arbitrary_take_rest(u))
749             } else if self.size == 0 {
750                 None
751             } else {
752                 self.size -= 1;
753                 let ret = Arbitrary::arbitrary(&mut u);
754                 self.u = Some(u);
755                 Some(ret)
756             }
757         } else {
758             None
759         }
760     }
761 }
762 
763 /// A trait that is implemented for all of the primitive integers:
764 ///
765 /// * `u8`
766 /// * `u16`
767 /// * `u32`
768 /// * `u64`
769 /// * `u128`
770 /// * `usize`
771 /// * `i8`
772 /// * `i16`
773 /// * `i32`
774 /// * `i64`
775 /// * `i128`
776 /// * `isize`
777 ///
778 /// Don't implement this trait yourself.
779 pub trait Int:
780     Copy
781     + std::fmt::Debug
782     + PartialOrd
783     + Ord
784     + ops::Sub<Self, Output = Self>
785     + ops::Rem<Self, Output = Self>
786     + ops::Shr<Self, Output = Self>
787     + ops::Shl<usize, Output = Self>
788     + ops::BitOr<Self, Output = Self>
789 {
790     #[doc(hidden)]
791     type Unsigned: Int;
792 
793     #[doc(hidden)]
794     const ZERO: Self;
795 
796     #[doc(hidden)]
797     const ONE: Self;
798 
799     #[doc(hidden)]
800     const MAX: Self;
801 
802     #[doc(hidden)]
from_u8(b: u8) -> Self803     fn from_u8(b: u8) -> Self;
804 
805     #[doc(hidden)]
from_usize(u: usize) -> Self806     fn from_usize(u: usize) -> Self;
807 
808     #[doc(hidden)]
checked_add(self, rhs: Self) -> Option<Self>809     fn checked_add(self, rhs: Self) -> Option<Self>;
810 
811     #[doc(hidden)]
wrapping_add(self, rhs: Self) -> Self812     fn wrapping_add(self, rhs: Self) -> Self;
813 
814     #[doc(hidden)]
wrapping_sub(self, rhs: Self) -> Self815     fn wrapping_sub(self, rhs: Self) -> Self;
816 
817     #[doc(hidden)]
to_unsigned(self) -> Self::Unsigned818     fn to_unsigned(self) -> Self::Unsigned;
819 
820     #[doc(hidden)]
from_unsigned(unsigned: Self::Unsigned) -> Self821     fn from_unsigned(unsigned: Self::Unsigned) -> Self;
822 }
823 
824 macro_rules! impl_int {
825     ( $( $ty:ty : $unsigned_ty: ty ; )* ) => {
826         $(
827             impl Int for $ty {
828                 type Unsigned = $unsigned_ty;
829 
830                 const ZERO: Self = 0;
831 
832                 const ONE: Self = 1;
833 
834                 const MAX: Self = Self::MAX;
835 
836                 fn from_u8(b: u8) -> Self {
837                     b as Self
838                 }
839 
840                 fn from_usize(u: usize) -> Self {
841                     u as Self
842                 }
843 
844                 fn checked_add(self, rhs: Self) -> Option<Self> {
845                     <$ty>::checked_add(self, rhs)
846                 }
847 
848                 fn wrapping_add(self, rhs: Self) -> Self {
849                     <$ty>::wrapping_add(self, rhs)
850                 }
851 
852                 fn wrapping_sub(self, rhs: Self) -> Self {
853                     <$ty>::wrapping_sub(self, rhs)
854                 }
855 
856                 fn to_unsigned(self) -> Self::Unsigned {
857                     self as $unsigned_ty
858                 }
859 
860                 fn from_unsigned(unsigned: $unsigned_ty) -> Self {
861                     unsigned as Self
862                 }
863             }
864         )*
865     }
866 }
867 
868 impl_int! {
869     u8: u8;
870     u16: u16;
871     u32: u32;
872     u64: u64;
873     u128: u128;
874     usize: usize;
875     i8: u8;
876     i16: u16;
877     i32: u32;
878     i64: u64;
879     i128: u128;
880     isize: usize;
881 }
882 
883 #[cfg(test)]
884 mod tests {
885     use super::*;
886 
887     #[test]
test_byte_size()888     fn test_byte_size() {
889         let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
890         // Should take one byte off the end
891         assert_eq!(u.arbitrary_byte_size().unwrap(), 6);
892         assert_eq!(u.len(), 9);
893         let mut v = vec![];
894         v.resize(260, 0);
895         v.push(1);
896         v.push(4);
897         let mut u = Unstructured::new(&v);
898         // Should read two bytes off the end
899         assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104);
900         assert_eq!(u.len(), 260);
901     }
902 
903     #[test]
int_in_range_of_one()904     fn int_in_range_of_one() {
905         let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
906         let x = u.int_in_range(0..=0).unwrap();
907         assert_eq!(x, 0);
908         let choice = *u.choose(&[42]).unwrap();
909         assert_eq!(choice, 42)
910     }
911 
912     #[test]
int_in_range_uses_minimal_amount_of_bytes()913     fn int_in_range_uses_minimal_amount_of_bytes() {
914         let mut u = Unstructured::new(&[1, 2]);
915         assert_eq!(1, u.int_in_range::<u8>(0..=u8::MAX).unwrap());
916         assert_eq!(u.len(), 1);
917 
918         let mut u = Unstructured::new(&[1, 2]);
919         assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32).unwrap());
920         assert_eq!(u.len(), 1);
921 
922         let mut u = Unstructured::new(&[1]);
923         assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32 + 1).unwrap());
924         assert!(u.is_empty());
925     }
926 
927     #[test]
int_in_range_in_bounds()928     fn int_in_range_in_bounds() {
929         for input in u8::MIN..=u8::MAX {
930             let input = [input];
931 
932             let mut u = Unstructured::new(&input);
933             let x = u.int_in_range(1..=u8::MAX).unwrap();
934             assert_ne!(x, 0);
935 
936             let mut u = Unstructured::new(&input);
937             let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
938             assert_ne!(x, u8::MAX);
939         }
940     }
941 
942     #[test]
int_in_range_covers_unsigned_range()943     fn int_in_range_covers_unsigned_range() {
944         // Test that we generate all values within the range given to
945         // `int_in_range`.
946 
947         let mut full = [false; u8::MAX as usize + 1];
948         let mut no_zero = [false; u8::MAX as usize];
949         let mut no_max = [false; u8::MAX as usize];
950         let mut narrow = [false; 10];
951 
952         for input in u8::MIN..=u8::MAX {
953             let input = [input];
954 
955             let mut u = Unstructured::new(&input);
956             let x = u.int_in_range(0..=u8::MAX).unwrap();
957             full[x as usize] = true;
958 
959             let mut u = Unstructured::new(&input);
960             let x = u.int_in_range(1..=u8::MAX).unwrap();
961             no_zero[x as usize - 1] = true;
962 
963             let mut u = Unstructured::new(&input);
964             let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
965             no_max[x as usize] = true;
966 
967             let mut u = Unstructured::new(&input);
968             let x = u.int_in_range(100..=109).unwrap();
969             narrow[x as usize - 100] = true;
970         }
971 
972         for (i, covered) in full.iter().enumerate() {
973             assert!(covered, "full[{}] should have been generated", i);
974         }
975         for (i, covered) in no_zero.iter().enumerate() {
976             assert!(covered, "no_zero[{}] should have been generated", i);
977         }
978         for (i, covered) in no_max.iter().enumerate() {
979             assert!(covered, "no_max[{}] should have been generated", i);
980         }
981         for (i, covered) in narrow.iter().enumerate() {
982             assert!(covered, "narrow[{}] should have been generated", i);
983         }
984     }
985 
986     #[test]
int_in_range_covers_signed_range()987     fn int_in_range_covers_signed_range() {
988         // Test that we generate all values within the range given to
989         // `int_in_range`.
990 
991         let mut full = [false; u8::MAX as usize + 1];
992         let mut no_min = [false; u8::MAX as usize];
993         let mut no_max = [false; u8::MAX as usize];
994         let mut narrow = [false; 21];
995 
996         let abs_i8_min: isize = 128;
997 
998         for input in 0..=u8::MAX {
999             let input = [input];
1000 
1001             let mut u = Unstructured::new(&input);
1002             let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap();
1003             full[(x as isize + abs_i8_min) as usize] = true;
1004 
1005             let mut u = Unstructured::new(&input);
1006             let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap();
1007             no_min[(x as isize + abs_i8_min - 1) as usize] = true;
1008 
1009             let mut u = Unstructured::new(&input);
1010             let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap();
1011             no_max[(x as isize + abs_i8_min) as usize] = true;
1012 
1013             let mut u = Unstructured::new(&input);
1014             let x = u.int_in_range(-10..=10).unwrap();
1015             narrow[(x as isize + 10) as usize] = true;
1016         }
1017 
1018         for (i, covered) in full.iter().enumerate() {
1019             assert!(covered, "full[{}] should have been generated", i);
1020         }
1021         for (i, covered) in no_min.iter().enumerate() {
1022             assert!(covered, "no_min[{}] should have been generated", i);
1023         }
1024         for (i, covered) in no_max.iter().enumerate() {
1025             assert!(covered, "no_max[{}] should have been generated", i);
1026         }
1027         for (i, covered) in narrow.iter().enumerate() {
1028             assert!(covered, "narrow[{}] should have been generated", i);
1029         }
1030     }
1031 }
1032