1 use std::iter::IntoIterator;
2 use std::time::Duration;
3 use std::time::Instant;
4 
5 use crate::black_box;
6 use crate::measurement::{Measurement, WallTime};
7 use crate::BatchSize;
8 
9 #[cfg(feature = "async")]
10 use std::future::Future;
11 
12 #[cfg(feature = "async")]
13 use crate::async_executor::AsyncExecutor;
14 
15 // ================================== MAINTENANCE NOTE =============================================
16 // Any changes made to either Bencher or AsyncBencher will have to be replicated to the other!
17 // ================================== MAINTENANCE NOTE =============================================
18 
19 /// Timer struct used to iterate a benchmarked function and measure the runtime.
20 ///
21 /// This struct provides different timing loops as methods. Each timing loop provides a different
22 /// way to time a routine and each has advantages and disadvantages.
23 ///
24 /// * If you want to do the iteration and measurement yourself (eg. passing the iteration count
25 ///   to a separate process), use `iter_custom`.
26 /// * If your routine requires no per-iteration setup and returns a value with an expensive `drop`
27 ///   method, use `iter_with_large_drop`.
28 /// * If your routine requires some per-iteration setup that shouldn't be timed, use `iter_batched`
29 ///   or `iter_batched_ref`. See [`BatchSize`](enum.BatchSize.html) for a discussion of batch sizes.
30 ///   If the setup value implements `Drop` and you don't want to include the `drop` time in the
31 ///   measurement, use `iter_batched_ref`, otherwise use `iter_batched`. These methods are also
32 ///   suitable for benchmarking routines which return a value with an expensive `drop` method,
33 ///   but are more complex than `iter_with_large_drop`.
34 /// * Otherwise, use `iter`.
35 pub struct Bencher<'a, M: Measurement = WallTime> {
36     pub(crate) iterated: bool,         // Have we iterated this benchmark?
37     pub(crate) iters: u64,             // Number of times to iterate this benchmark
38     pub(crate) value: M::Value,        // The measured value
39     pub(crate) measurement: &'a M,     // Reference to the measurement object
40     pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period.
41 }
42 impl<'a, M: Measurement> Bencher<'a, M> {
43     /// Times a `routine` by executing it many times and timing the total elapsed time.
44     ///
45     /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
46     ///
47     /// # Timing model
48     ///
49     /// Note that the `Bencher` also times the time required to destroy the output of `routine()`.
50     /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
51     /// to the runtime of the `routine`.
52     ///
53     /// ```text
54     /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
55     /// ```
56     ///
57     /// # Example
58     ///
59     /// ```rust
60     /// #[macro_use] extern crate criterion;
61     ///
62     /// use criterion::*;
63     ///
64     /// // The function to benchmark
65     /// fn foo() {
66     ///     // ...
67     /// }
68     ///
69     /// fn bench(c: &mut Criterion) {
70     ///     c.bench_function("iter", move |b| {
71     ///         b.iter(|| foo())
72     ///     });
73     /// }
74     ///
75     /// criterion_group!(benches, bench);
76     /// criterion_main!(benches);
77     /// ```
78     ///
79     #[inline(never)]
iter<O, R>(&mut self, mut routine: R) where R: FnMut() -> O,80     pub fn iter<O, R>(&mut self, mut routine: R)
81     where
82         R: FnMut() -> O,
83     {
84         self.iterated = true;
85         let time_start = Instant::now();
86         let start = self.measurement.start();
87         for _ in 0..self.iters {
88             black_box(routine());
89         }
90         self.value = self.measurement.end(start);
91         self.elapsed_time = time_start.elapsed();
92     }
93 
94     /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
95     ///
96     /// Prefer this timing loop in cases where `routine` has to do its own measurements to
97     /// get accurate timing information (for example in multi-threaded scenarios where you spawn
98     /// and coordinate with multiple threads).
99     ///
100     /// # Timing model
101     /// Custom, the timing model is whatever is returned as the Duration from `routine`.
102     ///
103     /// # Example
104     /// ```rust
105     /// #[macro_use] extern crate criterion;
106     /// use criterion::*;
107     /// use criterion::black_box;
108     /// use std::time::Instant;
109     ///
110     /// fn foo() {
111     ///     // ...
112     /// }
113     ///
114     /// fn bench(c: &mut Criterion) {
115     ///     c.bench_function("iter", move |b| {
116     ///         b.iter_custom(|iters| {
117     ///             let start = Instant::now();
118     ///             for _i in 0..iters {
119     ///                 black_box(foo());
120     ///             }
121     ///             start.elapsed()
122     ///         })
123     ///     });
124     /// }
125     ///
126     /// criterion_group!(benches, bench);
127     /// criterion_main!(benches);
128     /// ```
129     ///
130     #[inline(never)]
iter_custom<R>(&mut self, mut routine: R) where R: FnMut(u64) -> M::Value,131     pub fn iter_custom<R>(&mut self, mut routine: R)
132     where
133         R: FnMut(u64) -> M::Value,
134     {
135         self.iterated = true;
136         let time_start = Instant::now();
137         self.value = routine(self.iters);
138         self.elapsed_time = time_start.elapsed();
139     }
140 
141     #[doc(hidden)]
iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R) where S: FnMut() -> I, R: FnMut(I) -> O,142     pub fn iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R)
143     where
144         S: FnMut() -> I,
145         R: FnMut(I) -> O,
146     {
147         self.iter_batched(setup, routine, BatchSize::PerIteration);
148     }
149 
150     /// Times a `routine` by collecting its output on each iteration. This avoids timing the
151     /// destructor of the value returned by `routine`.
152     ///
153     /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not under the
154     /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead.
155     ///
156     /// # Timing model
157     ///
158     /// ``` text
159     /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
160     /// ```
161     ///
162     /// # Example
163     ///
164     /// ```rust
165     /// #[macro_use] extern crate criterion;
166     ///
167     /// use criterion::*;
168     ///
169     /// fn create_vector() -> Vec<u64> {
170     ///     # vec![]
171     ///     // ...
172     /// }
173     ///
174     /// fn bench(c: &mut Criterion) {
175     ///     c.bench_function("with_drop", move |b| {
176     ///         // This will avoid timing the Vec::drop.
177     ///         b.iter_with_large_drop(|| create_vector())
178     ///     });
179     /// }
180     ///
181     /// criterion_group!(benches, bench);
182     /// criterion_main!(benches);
183     /// ```
184     ///
iter_with_large_drop<O, R>(&mut self, mut routine: R) where R: FnMut() -> O,185     pub fn iter_with_large_drop<O, R>(&mut self, mut routine: R)
186     where
187         R: FnMut() -> O,
188     {
189         self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
190     }
191 
192     /// Times a `routine` that requires some input by generating a batch of input, then timing the
193     /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
194     /// details on choosing the batch size. Use this when the routine must consume its input.
195     ///
196     /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
197     /// data on each iteration.
198     ///
199     /// # Timing model
200     ///
201     /// ```text
202     /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
203     /// ```
204     ///
205     /// # Example
206     ///
207     /// ```rust
208     /// #[macro_use] extern crate criterion;
209     ///
210     /// use criterion::*;
211     ///
212     /// fn create_scrambled_data() -> Vec<u64> {
213     ///     # vec![]
214     ///     // ...
215     /// }
216     ///
217     /// // The sorting algorithm to test
218     /// fn sort(data: &mut [u64]) {
219     ///     // ...
220     /// }
221     ///
222     /// fn bench(c: &mut Criterion) {
223     ///     let data = create_scrambled_data();
224     ///
225     ///     c.bench_function("with_setup", move |b| {
226     ///         // This will avoid timing the to_vec call.
227     ///         b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
228     ///     });
229     /// }
230     ///
231     /// criterion_group!(benches, bench);
232     /// criterion_main!(benches);
233     /// ```
234     ///
235     #[inline(never)]
iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize) where S: FnMut() -> I, R: FnMut(I) -> O,236     pub fn iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
237     where
238         S: FnMut() -> I,
239         R: FnMut(I) -> O,
240     {
241         self.iterated = true;
242         let batch_size = size.iters_per_batch(self.iters);
243         assert!(batch_size != 0, "Batch size must not be zero.");
244         let time_start = Instant::now();
245         self.value = self.measurement.zero();
246 
247         if batch_size == 1 {
248             for _ in 0..self.iters {
249                 let input = black_box(setup());
250 
251                 let start = self.measurement.start();
252                 let output = routine(input);
253                 let end = self.measurement.end(start);
254                 self.value = self.measurement.add(&self.value, &end);
255 
256                 drop(black_box(output));
257             }
258         } else {
259             let mut iteration_counter = 0;
260 
261             while iteration_counter < self.iters {
262                 let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
263 
264                 let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
265                 let mut outputs = Vec::with_capacity(batch_size as usize);
266 
267                 let start = self.measurement.start();
268                 outputs.extend(inputs.into_iter().map(&mut routine));
269                 let end = self.measurement.end(start);
270                 self.value = self.measurement.add(&self.value, &end);
271 
272                 black_box(outputs);
273 
274                 iteration_counter += batch_size;
275             }
276         }
277 
278         self.elapsed_time = time_start.elapsed();
279     }
280 
281     /// Times a `routine` that requires some input by generating a batch of input, then timing the
282     /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
283     /// details on choosing the batch size. Use this when the routine should accept the input by
284     /// mutable reference.
285     ///
286     /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
287     /// data on each iteration.
288     ///
289     /// # Timing model
290     ///
291     /// ```text
292     /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
293     /// ```
294     ///
295     /// # Example
296     ///
297     /// ```rust
298     /// #[macro_use] extern crate criterion;
299     ///
300     /// use criterion::*;
301     ///
302     /// fn create_scrambled_data() -> Vec<u64> {
303     ///     # vec![]
304     ///     // ...
305     /// }
306     ///
307     /// // The sorting algorithm to test
308     /// fn sort(data: &mut [u64]) {
309     ///     // ...
310     /// }
311     ///
312     /// fn bench(c: &mut Criterion) {
313     ///     let data = create_scrambled_data();
314     ///
315     ///     c.bench_function("with_setup", move |b| {
316     ///         // This will avoid timing the to_vec call.
317     ///         b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
318     ///     });
319     /// }
320     ///
321     /// criterion_group!(benches, bench);
322     /// criterion_main!(benches);
323     /// ```
324     ///
325     #[inline(never)]
iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize) where S: FnMut() -> I, R: FnMut(&mut I) -> O,326     pub fn iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
327     where
328         S: FnMut() -> I,
329         R: FnMut(&mut I) -> O,
330     {
331         self.iterated = true;
332         let batch_size = size.iters_per_batch(self.iters);
333         assert!(batch_size != 0, "Batch size must not be zero.");
334         let time_start = Instant::now();
335         self.value = self.measurement.zero();
336 
337         if batch_size == 1 {
338             for _ in 0..self.iters {
339                 let mut input = black_box(setup());
340 
341                 let start = self.measurement.start();
342                 let output = routine(&mut input);
343                 let end = self.measurement.end(start);
344                 self.value = self.measurement.add(&self.value, &end);
345 
346                 drop(black_box(output));
347                 drop(black_box(input));
348             }
349         } else {
350             let mut iteration_counter = 0;
351 
352             while iteration_counter < self.iters {
353                 let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
354 
355                 let mut inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
356                 let mut outputs = Vec::with_capacity(batch_size as usize);
357 
358                 let start = self.measurement.start();
359                 outputs.extend(inputs.iter_mut().map(&mut routine));
360                 let end = self.measurement.end(start);
361                 self.value = self.measurement.add(&self.value, &end);
362 
363                 black_box(outputs);
364 
365                 iteration_counter += batch_size;
366             }
367         }
368         self.elapsed_time = time_start.elapsed();
369     }
370 
371     // Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly
372     // if they don't.
assert_iterated(&mut self)373     pub(crate) fn assert_iterated(&mut self) {
374         assert!(
375             self.iterated,
376             "Benchmark function must call Bencher::iter or related method."
377         );
378         self.iterated = false;
379     }
380 
381     /// Convert this bencher into an AsyncBencher, which enables async/await support.
382     #[cfg(feature = "async")]
to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M>383     pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M> {
384         AsyncBencher { b: self, runner }
385     }
386 }
387 
388 /// Async/await variant of the Bencher struct.
389 #[cfg(feature = "async")]
390 pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> {
391     b: &'b mut Bencher<'a, M>,
392     runner: A,
393 }
394 #[cfg(feature = "async")]
395 impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> {
396     /// Times a `routine` by executing it many times and timing the total elapsed time.
397     ///
398     /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
399     ///
400     /// # Timing model
401     ///
402     /// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`.
403     /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
404     /// to the runtime of the `routine`.
405     ///
406     /// ```text
407     /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
408     /// ```
409     ///
410     /// # Example
411     ///
412     /// ```rust
413     /// #[macro_use] extern crate criterion;
414     ///
415     /// use criterion::*;
416     /// use criterion::async_executor::FuturesExecutor;
417     ///
418     /// // The function to benchmark
419     /// async fn foo() {
420     ///     // ...
421     /// }
422     ///
423     /// fn bench(c: &mut Criterion) {
424     ///     c.bench_function("iter", move |b| {
425     ///         b.to_async(FuturesExecutor).iter(|| async { foo().await } )
426     ///     });
427     /// }
428     ///
429     /// criterion_group!(benches, bench);
430     /// criterion_main!(benches);
431     /// ```
432     ///
433     #[inline(never)]
iter<O, R, F>(&mut self, mut routine: R) where R: FnMut() -> F, F: Future<Output = O>,434     pub fn iter<O, R, F>(&mut self, mut routine: R)
435     where
436         R: FnMut() -> F,
437         F: Future<Output = O>,
438     {
439         let AsyncBencher { b, runner } = self;
440         runner.block_on(async {
441             b.iterated = true;
442             let time_start = Instant::now();
443             let start = b.measurement.start();
444             for _ in 0..b.iters {
445                 black_box(routine().await);
446             }
447             b.value = b.measurement.end(start);
448             b.elapsed_time = time_start.elapsed();
449         });
450     }
451 
452     /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
453     ///
454     /// Prefer this timing loop in cases where `routine` has to do its own measurements to
455     /// get accurate timing information (for example in multi-threaded scenarios where you spawn
456     /// and coordinate with multiple threads).
457     ///
458     /// # Timing model
459     /// Custom, the timing model is whatever is returned as the Duration from `routine`.
460     ///
461     /// # Example
462     /// ```rust
463     /// #[macro_use] extern crate criterion;
464     /// use criterion::*;
465     /// use criterion::black_box;
466     /// use criterion::async_executor::FuturesExecutor;
467     /// use std::time::Instant;
468     ///
469     /// async fn foo() {
470     ///     // ...
471     /// }
472     ///
473     /// fn bench(c: &mut Criterion) {
474     ///     c.bench_function("iter", move |b| {
475     ///         b.to_async(FuturesExecutor).iter_custom(|iters| {
476     ///             async move {
477     ///                 let start = Instant::now();
478     ///                 for _i in 0..iters {
479     ///                     black_box(foo().await);
480     ///                 }
481     ///                 start.elapsed()
482     ///             }
483     ///         })
484     ///     });
485     /// }
486     ///
487     /// criterion_group!(benches, bench);
488     /// criterion_main!(benches);
489     /// ```
490     ///
491     #[inline(never)]
iter_custom<R, F>(&mut self, mut routine: R) where R: FnMut(u64) -> F, F: Future<Output = M::Value>,492     pub fn iter_custom<R, F>(&mut self, mut routine: R)
493     where
494         R: FnMut(u64) -> F,
495         F: Future<Output = M::Value>,
496     {
497         let AsyncBencher { b, runner } = self;
498         runner.block_on(async {
499             b.iterated = true;
500             let time_start = Instant::now();
501             b.value = routine(b.iters).await;
502             b.elapsed_time = time_start.elapsed();
503         })
504     }
505 
506     #[doc(hidden)]
iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R) where S: FnMut() -> I, R: FnMut(I) -> F, F: Future<Output = O>,507     pub fn iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
508     where
509         S: FnMut() -> I,
510         R: FnMut(I) -> F,
511         F: Future<Output = O>,
512     {
513         self.iter_batched(setup, routine, BatchSize::PerIteration);
514     }
515 
516     /// Times a `routine` by collecting its output on each iteration. This avoids timing the
517     /// destructor of the value returned by `routine`.
518     ///
519     /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not under the
520     /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead.
521     ///
522     /// # Timing model
523     ///
524     /// ``` text
525     /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
526     /// ```
527     ///
528     /// # Example
529     ///
530     /// ```rust
531     /// #[macro_use] extern crate criterion;
532     ///
533     /// use criterion::*;
534     /// use criterion::async_executor::FuturesExecutor;
535     ///
536     /// async fn create_vector() -> Vec<u64> {
537     ///     # vec![]
538     ///     // ...
539     /// }
540     ///
541     /// fn bench(c: &mut Criterion) {
542     ///     c.bench_function("with_drop", move |b| {
543     ///         // This will avoid timing the Vec::drop.
544     ///         b.to_async(FuturesExecutor).iter_with_large_drop(|| async { create_vector().await })
545     ///     });
546     /// }
547     ///
548     /// criterion_group!(benches, bench);
549     /// criterion_main!(benches);
550     /// ```
551     ///
iter_with_large_drop<O, R, F>(&mut self, mut routine: R) where R: FnMut() -> F, F: Future<Output = O>,552     pub fn iter_with_large_drop<O, R, F>(&mut self, mut routine: R)
553     where
554         R: FnMut() -> F,
555         F: Future<Output = O>,
556     {
557         self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
558     }
559 
560     #[doc(hidden)]
iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R) where S: FnMut() -> I, R: FnMut(I) -> F, F: Future<Output = O>,561     pub fn iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
562     where
563         S: FnMut() -> I,
564         R: FnMut(I) -> F,
565         F: Future<Output = O>,
566     {
567         self.iter_batched(setup, routine, BatchSize::NumBatches(1));
568     }
569 
570     /// Times a `routine` that requires some input by generating a batch of input, then timing the
571     /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
572     /// details on choosing the batch size. Use this when the routine must consume its input.
573     ///
574     /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
575     /// data on each iteration.
576     ///
577     /// # Timing model
578     ///
579     /// ```text
580     /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
581     /// ```
582     ///
583     /// # Example
584     ///
585     /// ```rust
586     /// #[macro_use] extern crate criterion;
587     ///
588     /// use criterion::*;
589     /// use criterion::async_executor::FuturesExecutor;
590     ///
591     /// fn create_scrambled_data() -> Vec<u64> {
592     ///     # vec![]
593     ///     // ...
594     /// }
595     ///
596     /// // The sorting algorithm to test
597     /// async fn sort(data: &mut [u64]) {
598     ///     // ...
599     /// }
600     ///
601     /// fn bench(c: &mut Criterion) {
602     ///     let data = create_scrambled_data();
603     ///
604     ///     c.bench_function("with_setup", move |b| {
605     ///         // This will avoid timing the to_vec call.
606     ///         b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
607     ///     });
608     /// }
609     ///
610     /// criterion_group!(benches, bench);
611     /// criterion_main!(benches);
612     /// ```
613     ///
614     #[inline(never)]
iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize) where S: FnMut() -> I, R: FnMut(I) -> F, F: Future<Output = O>,615     pub fn iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
616     where
617         S: FnMut() -> I,
618         R: FnMut(I) -> F,
619         F: Future<Output = O>,
620     {
621         let AsyncBencher { b, runner } = self;
622         runner.block_on(async {
623             b.iterated = true;
624             let batch_size = size.iters_per_batch(b.iters);
625             assert!(batch_size != 0, "Batch size must not be zero.");
626             let time_start = Instant::now();
627             b.value = b.measurement.zero();
628 
629             if batch_size == 1 {
630                 for _ in 0..b.iters {
631                     let input = black_box(setup());
632 
633                     let start = b.measurement.start();
634                     let output = routine(input).await;
635                     let end = b.measurement.end(start);
636                     b.value = b.measurement.add(&b.value, &end);
637 
638                     drop(black_box(output));
639                 }
640             } else {
641                 let mut iteration_counter = 0;
642 
643                 while iteration_counter < b.iters {
644                     let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
645 
646                     let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
647                     let mut outputs = Vec::with_capacity(batch_size as usize);
648 
649                     let start = b.measurement.start();
650                     // Can't use .extend here like the sync version does
651                     for input in inputs {
652                         outputs.push(routine(input).await);
653                     }
654                     let end = b.measurement.end(start);
655                     b.value = b.measurement.add(&b.value, &end);
656 
657                     black_box(outputs);
658 
659                     iteration_counter += batch_size;
660                 }
661             }
662 
663             b.elapsed_time = time_start.elapsed();
664         })
665     }
666 
667     /// Times a `routine` that requires some input by generating a batch of input, then timing the
668     /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
669     /// details on choosing the batch size. Use this when the routine should accept the input by
670     /// mutable reference.
671     ///
672     /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
673     /// data on each iteration.
674     ///
675     /// # Timing model
676     ///
677     /// ```text
678     /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
679     /// ```
680     ///
681     /// # Example
682     ///
683     /// ```rust
684     /// #[macro_use] extern crate criterion;
685     ///
686     /// use criterion::*;
687     /// use criterion::async_executor::FuturesExecutor;
688     ///
689     /// fn create_scrambled_data() -> Vec<u64> {
690     ///     # vec![]
691     ///     // ...
692     /// }
693     ///
694     /// // The sorting algorithm to test
695     /// async fn sort(data: &mut [u64]) {
696     ///     // ...
697     /// }
698     ///
699     /// fn bench(c: &mut Criterion) {
700     ///     let data = create_scrambled_data();
701     ///
702     ///     c.bench_function("with_setup", move |b| {
703     ///         // This will avoid timing the to_vec call.
704     ///         b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
705     ///     });
706     /// }
707     ///
708     /// criterion_group!(benches, bench);
709     /// criterion_main!(benches);
710     /// ```
711     ///
712     #[inline(never)]
iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize) where S: FnMut() -> I, R: FnMut(&mut I) -> F, F: Future<Output = O>,713     pub fn iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
714     where
715         S: FnMut() -> I,
716         R: FnMut(&mut I) -> F,
717         F: Future<Output = O>,
718     {
719         let AsyncBencher { b, runner } = self;
720         runner.block_on(async {
721             b.iterated = true;
722             let batch_size = size.iters_per_batch(b.iters);
723             assert!(batch_size != 0, "Batch size must not be zero.");
724             let time_start = Instant::now();
725             b.value = b.measurement.zero();
726 
727             if batch_size == 1 {
728                 for _ in 0..b.iters {
729                     let mut input = black_box(setup());
730 
731                     let start = b.measurement.start();
732                     let output = routine(&mut input).await;
733                     let end = b.measurement.end(start);
734                     b.value = b.measurement.add(&b.value, &end);
735 
736                     drop(black_box(output));
737                     drop(black_box(input));
738                 }
739             } else {
740                 let mut iteration_counter = 0;
741 
742                 while iteration_counter < b.iters {
743                     let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
744 
745                     let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
746                     let mut outputs = Vec::with_capacity(batch_size as usize);
747 
748                     let start = b.measurement.start();
749                     // Can't use .extend here like the sync version does
750                     for mut input in inputs {
751                         outputs.push(routine(&mut input).await);
752                     }
753                     let end = b.measurement.end(start);
754                     b.value = b.measurement.add(&b.value, &end);
755 
756                     black_box(outputs);
757 
758                     iteration_counter += batch_size;
759                 }
760             }
761             b.elapsed_time = time_start.elapsed();
762         });
763     }
764 }
765