1 //! A statistics-driven micro-benchmarking library written in Rust.
2 //!
3 //! This crate is a microbenchmarking library which aims to provide strong
4 //! statistical confidence in detecting and estimating the size of performance
5 //! improvements and regressions, while also being easy to use.
6 //!
7 //! See
8 //! [the user guide](https://bheisler.github.io/criterion.rs/book/index.html)
9 //! for examples as well as details on the measurement and analysis process,
10 //! and the output.
11 //!
12 //! ## Features:
13 //! * Collects detailed statistics, providing strong confidence that changes
14 //! to performance are real, not measurement noise.
15 //! * Produces detailed charts, providing thorough understanding of your code's
16 //! performance behavior.
17
18 #![warn(missing_docs)]
19 #![warn(bare_trait_objects)]
20 #![cfg_attr(feature = "real_blackbox", feature(test))]
21 #![cfg_attr(
22 feature = "cargo-clippy",
23 allow(
24 clippy::just_underscores_and_digits, // Used in the stats code
25 clippy::transmute_ptr_to_ptr, // Used in the stats code
26 clippy::manual_non_exhaustive, // Remove when MSRV bumped above 1.40
27 )
28 )]
29
30 #[cfg(all(feature = "rayon", target_arch = "wasm32"))]
31 compile_error!("Rayon cannot be used when targeting wasi32. Try disabling default features.");
32
33 #[cfg(test)]
34 extern crate approx;
35
36 #[cfg(test)]
37 extern crate quickcheck;
38
39 use regex::Regex;
40
41 #[macro_use]
42 extern crate lazy_static;
43
44 #[cfg(feature = "real_blackbox")]
45 extern crate test;
46
47 #[macro_use]
48 extern crate serde_derive;
49
50 // Needs to be declared before other modules
51 // in order to be usable there.
52 #[macro_use]
53 mod macros_private;
54 #[macro_use]
55 mod analysis;
56 mod benchmark;
57 #[macro_use]
58 mod benchmark_group;
59 pub mod async_executor;
60 mod bencher;
61 mod connection;
62 #[cfg(feature = "csv_output")]
63 mod csv_report;
64 mod error;
65 mod estimate;
66 mod format;
67 mod fs;
68 mod html;
69 mod kde;
70 mod macros;
71 pub mod measurement;
72 mod plot;
73 pub mod profiler;
74 mod report;
75 mod routine;
76 mod stats;
77
78 use std::cell::RefCell;
79 use std::collections::HashSet;
80 use std::default::Default;
81 use std::env;
82 use std::net::TcpStream;
83 use std::path::{Path, PathBuf};
84 use std::process::Command;
85 use std::sync::{Mutex, MutexGuard};
86 use std::time::Duration;
87
88 use criterion_plot::{Version, VersionError};
89
90 use crate::benchmark::BenchmarkConfig;
91 use crate::connection::Connection;
92 use crate::connection::OutgoingMessage;
93 use crate::html::Html;
94 use crate::measurement::{Measurement, WallTime};
95 #[cfg(feature = "plotters")]
96 use crate::plot::PlottersBackend;
97 use crate::plot::{Gnuplot, Plotter};
98 use crate::profiler::{ExternalProfiler, Profiler};
99 use crate::report::{BencherReport, CliReport, CliVerbosity, Report, ReportContext, Reports};
100
101 #[cfg(feature = "async")]
102 pub use crate::bencher::AsyncBencher;
103 pub use crate::bencher::Bencher;
104 pub use crate::benchmark_group::{BenchmarkGroup, BenchmarkId};
105
106 lazy_static! {
107 static ref DEBUG_ENABLED: bool = std::env::var_os("CRITERION_DEBUG").is_some();
108 static ref GNUPLOT_VERSION: Result<Version, VersionError> = criterion_plot::version();
109 static ref DEFAULT_PLOTTING_BACKEND: PlottingBackend = {
110 if cfg!(feature = "html_reports") {
111 match &*GNUPLOT_VERSION {
112 Ok(_) => PlottingBackend::Gnuplot,
113 Err(e) => {
114 match e {
115 VersionError::Exec(_) => eprintln!("Gnuplot not found, using plotters backend"),
116 e => eprintln!(
117 "Gnuplot not found or not usable, using plotters backend\n{}",
118 e
119 ),
120 };
121 PlottingBackend::Plotters
122 }
123 }
124 } else {
125 PlottingBackend::None
126 }
127 };
128 static ref CARGO_CRITERION_CONNECTION: Option<Mutex<Connection>> = {
129 match std::env::var("CARGO_CRITERION_PORT") {
130 Ok(port_str) => {
131 let port: u16 = port_str.parse().ok()?;
132 let stream = TcpStream::connect(("localhost", port)).ok()?;
133 Some(Mutex::new(Connection::new(stream).ok()?))
134 }
135 Err(_) => None,
136 }
137 };
138 static ref DEFAULT_OUTPUT_DIRECTORY: PathBuf = {
139 // Set criterion home to (in descending order of preference):
140 // - $CRITERION_HOME (cargo-criterion sets this, but other users could as well)
141 // - $CARGO_TARGET_DIR/criterion
142 // - the cargo target dir from `cargo metadata`
143 // - ./target/criterion
144 if let Some(value) = env::var_os("CRITERION_HOME") {
145 PathBuf::from(value)
146 } else if let Some(path) = cargo_target_directory() {
147 path.join("criterion")
148 } else {
149 PathBuf::from("target/criterion")
150 }
151 };
152 }
153
debug_enabled() -> bool154 fn debug_enabled() -> bool {
155 *DEBUG_ENABLED
156 }
157
158 /// A function that is opaque to the optimizer, used to prevent the compiler from
159 /// optimizing away computations in a benchmark.
160 ///
161 /// This variant is backed by the (unstable) test::black_box function.
162 #[cfg(feature = "real_blackbox")]
black_box<T>(dummy: T) -> T163 pub fn black_box<T>(dummy: T) -> T {
164 test::black_box(dummy)
165 }
166
167 /// A function that is opaque to the optimizer, used to prevent the compiler from
168 /// optimizing away computations in a benchmark.
169 ///
170 /// This variant is stable-compatible, but it may cause some performance overhead
171 /// or fail to prevent code from being eliminated.
172 #[cfg(not(feature = "real_blackbox"))]
black_box<T>(dummy: T) -> T173 pub fn black_box<T>(dummy: T) -> T {
174 unsafe {
175 let ret = std::ptr::read_volatile(&dummy);
176 std::mem::forget(dummy);
177 ret
178 }
179 }
180
181 /// Argument to [`Bencher::iter_batched`](struct.Bencher.html#method.iter_batched) and
182 /// [`Bencher::iter_batched_ref`](struct.Bencher.html#method.iter_batched_ref) which controls the
183 /// batch size.
184 ///
185 /// Generally speaking, almost all benchmarks should use `SmallInput`. If the input or the result
186 /// of the benchmark routine is large enough that `SmallInput` causes out-of-memory errors,
187 /// `LargeInput` can be used to reduce memory usage at the cost of increasing the measurement
188 /// overhead. If the input or the result is extremely large (or if it holds some
189 /// limited external resource like a file handle), `PerIteration` will set the number of iterations
190 /// per batch to exactly one. `PerIteration` can increase the measurement overhead substantially
191 /// and should be avoided wherever possible.
192 ///
193 /// Each value lists an estimate of the measurement overhead. This is intended as a rough guide
194 /// to assist in choosing an option, it should not be relied upon. In particular, it is not valid
195 /// to subtract the listed overhead from the measurement and assume that the result represents the
196 /// true runtime of a function. The actual measurement overhead for your specific benchmark depends
197 /// on the details of the function you're benchmarking and the hardware and operating
198 /// system running the benchmark.
199 ///
200 /// With that said, if the runtime of your function is small relative to the measurement overhead
201 /// it will be difficult to take accurate measurements. In this situation, the best option is to use
202 /// [`Bencher::iter`](struct.Bencher.html#method.iter) which has next-to-zero measurement overhead.
203 #[derive(Debug, Eq, PartialEq, Copy, Hash, Clone)]
204 pub enum BatchSize {
205 /// `SmallInput` indicates that the input to the benchmark routine (the value returned from
206 /// the setup routine) is small enough that millions of values can be safely held in memory.
207 /// Always prefer `SmallInput` unless the benchmark is using too much memory.
208 ///
209 /// In testing, the maximum measurement overhead from benchmarking with `SmallInput` is on the
210 /// order of 500 picoseconds. This is presented as a rough guide; your results may vary.
211 SmallInput,
212
213 /// `LargeInput` indicates that the input to the benchmark routine or the value returned from
214 /// that routine is large. This will reduce the memory usage but increase the measurement
215 /// overhead.
216 ///
217 /// In testing, the maximum measurement overhead from benchmarking with `LargeInput` is on the
218 /// order of 750 picoseconds. This is presented as a rough guide; your results may vary.
219 LargeInput,
220
221 /// `PerIteration` indicates that the input to the benchmark routine or the value returned from
222 /// that routine is extremely large or holds some limited resource, such that holding many values
223 /// in memory at once is infeasible. This provides the worst measurement overhead, but the
224 /// lowest memory usage.
225 ///
226 /// In testing, the maximum measurement overhead from benchmarking with `PerIteration` is on the
227 /// order of 350 nanoseconds or 350,000 picoseconds. This is presented as a rough guide; your
228 /// results may vary.
229 PerIteration,
230
231 /// `NumBatches` will attempt to divide the iterations up into a given number of batches.
232 /// A larger number of batches (and thus smaller batches) will reduce memory usage but increase
233 /// measurement overhead. This allows the user to choose their own tradeoff between memory usage
234 /// and measurement overhead, but care must be taken in tuning the number of batches. Most
235 /// benchmarks should use `SmallInput` or `LargeInput` instead.
236 NumBatches(u64),
237
238 /// `NumIterations` fixes the batch size to a constant number, specified by the user. This
239 /// allows the user to choose their own tradeoff between overhead and memory usage, but care must
240 /// be taken in tuning the batch size. In general, the measurement overhead of `NumIterations`
241 /// will be larger than that of `NumBatches`. Most benchmarks should use `SmallInput` or
242 /// `LargeInput` instead.
243 NumIterations(u64),
244
245 #[doc(hidden)]
246 __NonExhaustive,
247 }
248 impl BatchSize {
249 /// Convert to a number of iterations per batch.
250 ///
251 /// We try to do a constant number of batches regardless of the number of iterations in this
252 /// sample. If the measurement overhead is roughly constant regardless of the number of
253 /// iterations the analysis of the results later will have an easier time separating the
254 /// measurement overhead from the benchmark time.
iters_per_batch(self, iters: u64) -> u64255 fn iters_per_batch(self, iters: u64) -> u64 {
256 match self {
257 BatchSize::SmallInput => (iters + 10 - 1) / 10,
258 BatchSize::LargeInput => (iters + 1000 - 1) / 1000,
259 BatchSize::PerIteration => 1,
260 BatchSize::NumBatches(batches) => (iters + batches - 1) / batches,
261 BatchSize::NumIterations(size) => size,
262 BatchSize::__NonExhaustive => panic!("__NonExhaustive is not a valid BatchSize."),
263 }
264 }
265 }
266
267 /// Baseline describes how the baseline_directory is handled.
268 #[derive(Debug, Clone, Copy)]
269 pub enum Baseline {
270 /// CompareLenient compares against a previous saved version of the baseline.
271 /// If a previous baseline does not exist, the benchmark is run as normal but no comparison occurs.
272 CompareLenient,
273 /// CompareStrict compares against a previous saved version of the baseline.
274 /// If a previous baseline does not exist, a panic occurs.
275 CompareStrict,
276 /// Save writes the benchmark results to the baseline directory,
277 /// overwriting any results that were previously there.
278 Save,
279 /// Discard benchmark results.
280 Discard,
281 }
282
283 /// Enum used to select the plotting backend.
284 #[derive(Debug, Clone, Copy)]
285 pub enum PlottingBackend {
286 /// Plotting backend which uses the external `gnuplot` command to render plots. This is the
287 /// default if the `gnuplot` command is installed.
288 Gnuplot,
289 /// Plotting backend which uses the rust 'Plotters' library. This is the default if `gnuplot`
290 /// is not installed.
291 Plotters,
292 /// Null plotting backend which outputs nothing,
293 None,
294 }
295 impl PlottingBackend {
create_plotter(&self) -> Option<Box<dyn Plotter>>296 fn create_plotter(&self) -> Option<Box<dyn Plotter>> {
297 match self {
298 PlottingBackend::Gnuplot => Some(Box::new(Gnuplot::default())),
299 #[cfg(feature = "plotters")]
300 PlottingBackend::Plotters => Some(Box::new(PlottersBackend::default())),
301 #[cfg(not(feature = "plotters"))]
302 PlottingBackend::Plotters => panic!("Criterion was built without plotters support."),
303 PlottingBackend::None => None,
304 }
305 }
306 }
307
308 #[derive(Debug, Clone)]
309 /// Enum representing the execution mode.
310 pub(crate) enum Mode {
311 /// Run benchmarks normally.
312 Benchmark,
313 /// List all benchmarks but do not run them.
314 List,
315 /// Run benchmarks once to verify that they work, but otherwise do not measure them.
316 Test,
317 /// Iterate benchmarks for a given length of time but do not analyze or report on them.
318 Profile(Duration),
319 }
320 impl Mode {
is_benchmark(&self) -> bool321 pub fn is_benchmark(&self) -> bool {
322 matches!(self, Mode::Benchmark)
323 }
324 }
325
326 /// The benchmark manager
327 ///
328 /// `Criterion` lets you configure and execute benchmarks
329 ///
330 /// Each benchmark consists of four phases:
331 ///
332 /// - **Warm-up**: The routine is repeatedly executed, to let the CPU/OS/JIT/interpreter adapt to
333 /// the new load
334 /// - **Measurement**: The routine is repeatedly executed, and timing information is collected into
335 /// a sample
336 /// - **Analysis**: The sample is analyzed and distilled into meaningful statistics that get
337 /// reported to stdout, stored in files, and plotted
338 /// - **Comparison**: The current sample is compared with the sample obtained in the previous
339 /// benchmark.
340 pub struct Criterion<M: Measurement = WallTime> {
341 config: BenchmarkConfig,
342 filter: Option<Regex>,
343 report: Reports,
344 output_directory: PathBuf,
345 baseline_directory: String,
346 baseline: Baseline,
347 load_baseline: Option<String>,
348 all_directories: HashSet<String>,
349 all_titles: HashSet<String>,
350 measurement: M,
351 profiler: Box<RefCell<dyn Profiler>>,
352 connection: Option<MutexGuard<'static, Connection>>,
353 mode: Mode,
354 }
355
356 /// Returns the Cargo target directory, possibly calling `cargo metadata` to
357 /// figure it out.
cargo_target_directory() -> Option<PathBuf>358 fn cargo_target_directory() -> Option<PathBuf> {
359 #[derive(Deserialize)]
360 struct Metadata {
361 target_directory: PathBuf,
362 }
363
364 env::var_os("CARGO_TARGET_DIR")
365 .map(PathBuf::from)
366 .or_else(|| {
367 let output = Command::new(env::var_os("CARGO")?)
368 .args(&["metadata", "--format-version", "1"])
369 .output()
370 .ok()?;
371 let metadata: Metadata = serde_json::from_slice(&output.stdout).ok()?;
372 Some(metadata.target_directory)
373 })
374 }
375
376 impl Default for Criterion {
377 /// Creates a benchmark manager with the following default settings:
378 ///
379 /// - Sample size: 100 measurements
380 /// - Warm-up time: 3 s
381 /// - Measurement time: 5 s
382 /// - Bootstrap size: 100 000 resamples
383 /// - Noise threshold: 0.01 (1%)
384 /// - Confidence level: 0.95
385 /// - Significance level: 0.05
386 /// - Plotting: enabled, using gnuplot if available or plotters if gnuplot is not available
387 /// - No filter
default() -> Criterion388 fn default() -> Criterion {
389 let reports = Reports {
390 cli_enabled: true,
391 cli: CliReport::new(false, false, CliVerbosity::Normal),
392 bencher_enabled: false,
393 bencher: BencherReport,
394 html: DEFAULT_PLOTTING_BACKEND.create_plotter().map(Html::new),
395 csv_enabled: cfg!(feature = "csv_output"),
396 };
397
398 let mut criterion = Criterion {
399 config: BenchmarkConfig {
400 confidence_level: 0.95,
401 measurement_time: Duration::from_secs(5),
402 noise_threshold: 0.01,
403 nresamples: 100_000,
404 sample_size: 100,
405 significance_level: 0.05,
406 warm_up_time: Duration::from_secs(3),
407 sampling_mode: SamplingMode::Auto,
408 quick_mode: false,
409 },
410 filter: None,
411 report: reports,
412 baseline_directory: "base".to_owned(),
413 baseline: Baseline::Save,
414 load_baseline: None,
415 output_directory: DEFAULT_OUTPUT_DIRECTORY.clone(),
416 all_directories: HashSet::new(),
417 all_titles: HashSet::new(),
418 measurement: WallTime,
419 profiler: Box::new(RefCell::new(ExternalProfiler)),
420 connection: CARGO_CRITERION_CONNECTION
421 .as_ref()
422 .map(|mtx| mtx.lock().unwrap()),
423 mode: Mode::Benchmark,
424 };
425
426 if criterion.connection.is_some() {
427 // disable all reports when connected to cargo-criterion; it will do the reporting.
428 criterion.report.cli_enabled = false;
429 criterion.report.bencher_enabled = false;
430 criterion.report.csv_enabled = false;
431 criterion.report.html = None;
432 }
433 criterion
434 }
435 }
436
437 impl<M: Measurement> Criterion<M> {
438 /// Changes the measurement for the benchmarks run with this runner. See the
439 /// Measurement trait for more details
with_measurement<M2: Measurement>(self, m: M2) -> Criterion<M2>440 pub fn with_measurement<M2: Measurement>(self, m: M2) -> Criterion<M2> {
441 // Can't use struct update syntax here because they're technically different types.
442 Criterion {
443 config: self.config,
444 filter: self.filter,
445 report: self.report,
446 baseline_directory: self.baseline_directory,
447 baseline: self.baseline,
448 load_baseline: self.load_baseline,
449 output_directory: self.output_directory,
450 all_directories: self.all_directories,
451 all_titles: self.all_titles,
452 measurement: m,
453 profiler: self.profiler,
454 connection: self.connection,
455 mode: self.mode,
456 }
457 }
458
459 #[must_use]
460 /// Changes the internal profiler for benchmarks run with this runner. See
461 /// the Profiler trait for more details.
with_profiler<P: Profiler + 'static>(self, p: P) -> Criterion<M>462 pub fn with_profiler<P: Profiler + 'static>(self, p: P) -> Criterion<M> {
463 Criterion {
464 profiler: Box::new(RefCell::new(p)),
465 ..self
466 }
467 }
468
469 #[must_use]
470 /// Set the plotting backend. By default, Criterion will use gnuplot if available, or plotters
471 /// if not.
472 ///
473 /// Panics if `backend` is `PlottingBackend::Gnuplot` and gnuplot is not available.
plotting_backend(mut self, backend: PlottingBackend) -> Criterion<M>474 pub fn plotting_backend(mut self, backend: PlottingBackend) -> Criterion<M> {
475 if let PlottingBackend::Gnuplot = backend {
476 assert!(
477 !GNUPLOT_VERSION.is_err(),
478 "Gnuplot plotting backend was requested, but gnuplot is not available. \
479 To continue, either install Gnuplot or allow Criterion.rs to fall back \
480 to using plotters."
481 );
482 }
483
484 self.report.html = backend.create_plotter().map(Html::new);
485 self
486 }
487
488 #[must_use]
489 /// Changes the default size of the sample for benchmarks run with this runner.
490 ///
491 /// A bigger sample should yield more accurate results if paired with a sufficiently large
492 /// measurement time.
493 ///
494 /// Sample size must be at least 10.
495 ///
496 /// # Panics
497 ///
498 /// Panics if n < 10
sample_size(mut self, n: usize) -> Criterion<M>499 pub fn sample_size(mut self, n: usize) -> Criterion<M> {
500 assert!(n >= 10);
501
502 self.config.sample_size = n;
503 self
504 }
505
506 #[must_use]
507 /// Changes the default warm up time for benchmarks run with this runner.
508 ///
509 /// # Panics
510 ///
511 /// Panics if the input duration is zero
warm_up_time(mut self, dur: Duration) -> Criterion<M>512 pub fn warm_up_time(mut self, dur: Duration) -> Criterion<M> {
513 assert!(dur.as_nanos() > 0);
514
515 self.config.warm_up_time = dur;
516 self
517 }
518
519 #[must_use]
520 /// Changes the default measurement time for benchmarks run with this runner.
521 ///
522 /// With a longer time, the measurement will become more resilient to transitory peak loads
523 /// caused by external programs
524 ///
525 /// **Note**: If the measurement time is too "low", Criterion will automatically increase it
526 ///
527 /// # Panics
528 ///
529 /// Panics if the input duration in zero
measurement_time(mut self, dur: Duration) -> Criterion<M>530 pub fn measurement_time(mut self, dur: Duration) -> Criterion<M> {
531 assert!(dur.as_nanos() > 0);
532
533 self.config.measurement_time = dur;
534 self
535 }
536
537 #[must_use]
538 /// Changes the default number of resamples for benchmarks run with this runner.
539 ///
540 /// Number of resamples to use for the
541 /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
542 ///
543 /// A larger number of resamples reduces the random sampling errors, which are inherent to the
544 /// bootstrap method, but also increases the analysis time
545 ///
546 /// # Panics
547 ///
548 /// Panics if the number of resamples is set to zero
nresamples(mut self, n: usize) -> Criterion<M>549 pub fn nresamples(mut self, n: usize) -> Criterion<M> {
550 assert!(n > 0);
551 if n <= 1000 {
552 eprintln!("\nWarning: It is not recommended to reduce nresamples below 1000.");
553 }
554
555 self.config.nresamples = n;
556 self
557 }
558
559 #[must_use]
560 /// Changes the default noise threshold for benchmarks run with this runner. The noise threshold
561 /// is used to filter out small changes in performance, even if they are statistically
562 /// significant. Sometimes benchmarking the same code twice will result in small but
563 /// statistically significant differences solely because of noise. This provides a way to filter
564 /// out some of these false positives at the cost of making it harder to detect small changes
565 /// to the true performance of the benchmark.
566 ///
567 /// The default is 0.01, meaning that changes smaller than 1% will be ignored.
568 ///
569 /// # Panics
570 ///
571 /// Panics if the threshold is set to a negative value
noise_threshold(mut self, threshold: f64) -> Criterion<M>572 pub fn noise_threshold(mut self, threshold: f64) -> Criterion<M> {
573 assert!(threshold >= 0.0);
574
575 self.config.noise_threshold = threshold;
576 self
577 }
578
579 #[must_use]
580 /// Changes the default confidence level for benchmarks run with this runner. The confidence
581 /// level is the desired probability that the true runtime lies within the estimated
582 /// [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
583 /// 0.95, meaning that the confidence interval should capture the true value 95% of the time.
584 ///
585 /// # Panics
586 ///
587 /// Panics if the confidence level is set to a value outside the `(0, 1)` range
confidence_level(mut self, cl: f64) -> Criterion<M>588 pub fn confidence_level(mut self, cl: f64) -> Criterion<M> {
589 assert!(cl > 0.0 && cl < 1.0);
590 if cl < 0.5 {
591 eprintln!("\nWarning: It is not recommended to reduce confidence level below 0.5.");
592 }
593
594 self.config.confidence_level = cl;
595 self
596 }
597
598 #[must_use]
599 /// Changes the default [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
600 /// for benchmarks run with this runner. This is used to perform a
601 /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
602 /// the measurements from this run are different from the measured performance of the last run.
603 /// The significance level is the desired probability that two measurements of identical code
604 /// will be considered 'different' due to noise in the measurements. The default value is 0.05,
605 /// meaning that approximately 5% of identical benchmarks will register as different due to
606 /// noise.
607 ///
608 /// This presents a trade-off. By setting the significance level closer to 0.0, you can increase
609 /// the statistical robustness against noise, but it also weakens Criterion.rs' ability to
610 /// detect small but real changes in the performance. By setting the significance level
611 /// closer to 1.0, Criterion.rs will be more able to detect small true changes, but will also
612 /// report more spurious differences.
613 ///
614 /// See also the noise threshold setting.
615 ///
616 /// # Panics
617 ///
618 /// Panics if the significance level is set to a value outside the `(0, 1)` range
significance_level(mut self, sl: f64) -> Criterion<M>619 pub fn significance_level(mut self, sl: f64) -> Criterion<M> {
620 assert!(sl > 0.0 && sl < 1.0);
621
622 self.config.significance_level = sl;
623 self
624 }
625
626 #[must_use]
627 /// Enables plotting
with_plots(mut self) -> Criterion<M>628 pub fn with_plots(mut self) -> Criterion<M> {
629 // If running under cargo-criterion then don't re-enable the reports; let it do the reporting.
630 if self.connection.is_none() && self.report.html.is_none() {
631 let default_backend = DEFAULT_PLOTTING_BACKEND.create_plotter();
632 if let Some(backend) = default_backend {
633 self.report.html = Some(Html::new(backend));
634 } else {
635 panic!("Cannot find a default plotting backend!");
636 }
637 }
638 self
639 }
640
641 #[must_use]
642 /// Disables plotting
without_plots(mut self) -> Criterion<M>643 pub fn without_plots(mut self) -> Criterion<M> {
644 self.report.html = None;
645 self
646 }
647
648 #[must_use]
649 /// Names an explicit baseline and enables overwriting the previous results.
save_baseline(mut self, baseline: String) -> Criterion<M>650 pub fn save_baseline(mut self, baseline: String) -> Criterion<M> {
651 self.baseline_directory = baseline;
652 self.baseline = Baseline::Save;
653 self
654 }
655
656 #[must_use]
657 /// Names an explicit baseline and disables overwriting the previous results.
retain_baseline(mut self, baseline: String, strict: bool) -> Criterion<M>658 pub fn retain_baseline(mut self, baseline: String, strict: bool) -> Criterion<M> {
659 self.baseline_directory = baseline;
660 self.baseline = if strict {
661 Baseline::CompareStrict
662 } else {
663 Baseline::CompareLenient
664 };
665 self
666 }
667
668 #[must_use]
669 /// Filters the benchmarks. Only benchmarks with names that contain the
670 /// given string will be executed.
with_filter<S: Into<String>>(mut self, filter: S) -> Criterion<M>671 pub fn with_filter<S: Into<String>>(mut self, filter: S) -> Criterion<M> {
672 let filter_text = filter.into();
673 let filter = Regex::new(&filter_text).unwrap_or_else(|err| {
674 panic!(
675 "Unable to parse '{}' as a regular expression: {}",
676 filter_text, err
677 )
678 });
679 self.filter = Some(filter);
680
681 self
682 }
683
684 #[must_use]
685 /// Override whether the CLI output will be colored or not. Usually you would use the `--color`
686 /// CLI argument, but this is available for programmmatic use as well.
with_output_color(mut self, enabled: bool) -> Criterion<M>687 pub fn with_output_color(mut self, enabled: bool) -> Criterion<M> {
688 self.report.cli.enable_text_coloring = enabled;
689 self
690 }
691
692 /// Set the output directory (currently for testing only)
693 #[must_use]
694 #[doc(hidden)]
output_directory(mut self, path: &Path) -> Criterion<M>695 pub fn output_directory(mut self, path: &Path) -> Criterion<M> {
696 self.output_directory = path.to_owned();
697
698 self
699 }
700
701 /// Set the profile time (currently for testing only)
702 #[must_use]
703 #[doc(hidden)]
profile_time(mut self, profile_time: Option<Duration>) -> Criterion<M>704 pub fn profile_time(mut self, profile_time: Option<Duration>) -> Criterion<M> {
705 match profile_time {
706 Some(time) => self.mode = Mode::Profile(time),
707 None => self.mode = Mode::Benchmark,
708 }
709
710 self
711 }
712
713 /// Generate the final summary at the end of a run.
714 #[doc(hidden)]
final_summary(&self)715 pub fn final_summary(&self) {
716 if !self.mode.is_benchmark() {
717 return;
718 }
719
720 let report_context = ReportContext {
721 output_directory: self.output_directory.clone(),
722 plot_config: PlotConfiguration::default(),
723 };
724
725 self.report.final_summary(&report_context);
726 }
727
728 /// Configure this criterion struct based on the command-line arguments to
729 /// this process.
730 #[must_use]
731 #[cfg_attr(feature = "cargo-clippy", allow(clippy::cognitive_complexity))]
configure_from_args(mut self) -> Criterion<M>732 pub fn configure_from_args(mut self) -> Criterion<M> {
733 use clap::{Arg, Command};
734 let matches = Command::new("Criterion Benchmark")
735 .arg(Arg::new("FILTER")
736 .help("Skip benchmarks whose names do not contain FILTER.")
737 .index(1))
738 .arg(Arg::new("color")
739 .short('c')
740 .long("color")
741 .alias("colour")
742 .takes_value(true)
743 .possible_values(&["auto", "always", "never"])
744 .default_value("auto")
745 .help("Configure coloring of output. always = always colorize output, never = never colorize output, auto = colorize output if output is a tty and compiled for unix."))
746 .arg(Arg::new("verbose")
747 .short('v')
748 .long("verbose")
749 .help("Print additional statistical information."))
750 .arg(Arg::new("quiet")
751 .long("quiet")
752 .conflicts_with("verbose")
753 .help("Print only the benchmark results."))
754 .arg(Arg::new("noplot")
755 .short('n')
756 .long("noplot")
757 .help("Disable plot and HTML generation."))
758 .arg(Arg::new("save-baseline")
759 .short('s')
760 .long("save-baseline")
761 .default_value("base")
762 .help("Save results under a named baseline."))
763 .arg(Arg::new("discard-baseline")
764 .long("discard-baseline")
765 .conflicts_with_all(&["save-baseline", "baseline", "baseline-lenient"])
766 .help("Discard benchmark results."))
767 .arg(Arg::new("baseline")
768 .short('b')
769 .long("baseline")
770 .takes_value(true)
771 .conflicts_with_all(&["save-baseline", "baseline-lenient"])
772 .help("Compare to a named baseline. If any benchmarks do not have the specified baseline this command fails."))
773 .arg(Arg::new("baseline-lenient")
774 .long("baseline-lenient")
775 .takes_value(true)
776 .conflicts_with_all(&["save-baseline", "baseline"])
777 .help("Compare to a named baseline. If any benchmarks do not have the specified baseline then just those benchmarks are not compared against the baseline while every other benchmark is compared against the baseline."))
778 .arg(Arg::new("list")
779 .long("list")
780 .help("List all benchmarks")
781 .conflicts_with_all(&["test", "profile-time"]))
782 .arg(Arg::new("profile-time")
783 .long("profile-time")
784 .takes_value(true)
785 .help("Iterate each benchmark for approximately the given number of seconds, doing no analysis and without storing the results. Useful for running the benchmarks in a profiler.")
786 .conflicts_with_all(&["test", "list"]))
787 .arg(Arg::new("load-baseline")
788 .long("load-baseline")
789 .takes_value(true)
790 .conflicts_with("profile-time")
791 .requires("baseline")
792 .help("Load a previous baseline instead of sampling new data."))
793 .arg(Arg::new("sample-size")
794 .long("sample-size")
795 .takes_value(true)
796 .help(&*format!("Changes the default size of the sample for this run. [default: {}]", self.config.sample_size)))
797 .arg(Arg::new("warm-up-time")
798 .long("warm-up-time")
799 .takes_value(true)
800 .help(&*format!("Changes the default warm up time for this run. [default: {}]", self.config.warm_up_time.as_secs())))
801 .arg(Arg::new("measurement-time")
802 .long("measurement-time")
803 .takes_value(true)
804 .help(&*format!("Changes the default measurement time for this run. [default: {}]", self.config.measurement_time.as_secs())))
805 .arg(Arg::new("nresamples")
806 .long("nresamples")
807 .takes_value(true)
808 .help(&*format!("Changes the default number of resamples for this run. [default: {}]", self.config.nresamples)))
809 .arg(Arg::new("noise-threshold")
810 .long("noise-threshold")
811 .takes_value(true)
812 .help(&*format!("Changes the default noise threshold for this run. [default: {}]", self.config.noise_threshold)))
813 .arg(Arg::new("confidence-level")
814 .long("confidence-level")
815 .takes_value(true)
816 .help(&*format!("Changes the default confidence level for this run. [default: {}]", self.config.confidence_level)))
817 .arg(Arg::new("significance-level")
818 .long("significance-level")
819 .takes_value(true)
820 .help(&*format!("Changes the default significance level for this run. [default: {}]", self.config.significance_level)))
821 .arg(Arg::new("quick")
822 .long("quick")
823 .conflicts_with("sample-size")
824 .help(&*format!("Benchmark only until the significance level has been reached [default: {}]", self.config.quick_mode)))
825 .arg(Arg::new("test")
826 .hide(true)
827 .long("test")
828 .help("Run the benchmarks once, to verify that they execute successfully, but do not measure or report the results.")
829 .conflicts_with_all(&["list", "profile-time"]))
830 .arg(Arg::new("bench")
831 .hide(true)
832 .long("bench"))
833 .arg(Arg::new("plotting-backend")
834 .long("plotting-backend")
835 .takes_value(true)
836 .possible_values(&["gnuplot", "plotters"])
837 .help("Set the plotting backend. By default, Criterion.rs will use the gnuplot backend if gnuplot is available, or the plotters backend if it isn't."))
838 .arg(Arg::new("output-format")
839 .long("output-format")
840 .takes_value(true)
841 .possible_values(&["criterion", "bencher"])
842 .default_value("criterion")
843 .help("Change the CLI output format. By default, Criterion.rs will use its own format. If output format is set to 'bencher', Criterion.rs will print output in a format that resembles the 'bencher' crate."))
844 .arg(Arg::new("nocapture")
845 .long("nocapture")
846 .hide(true)
847 .help("Ignored, but added for compatibility with libtest."))
848 .arg(Arg::new("show-output")
849 .long("show-output")
850 .hidden(true)
851 .help("Ignored, but added for compatibility with libtest."))
852 .arg(Arg::new("version")
853 .hidden(true)
854 .short('V')
855 .long("version"))
856 .after_help("
857 This executable is a Criterion.rs benchmark.
858 See https://github.com/bheisler/criterion.rs for more details.
859
860 To enable debug output, define the environment variable CRITERION_DEBUG.
861 Criterion.rs will output more debug information and will save the gnuplot
862 scripts alongside the generated plots.
863
864 To test that the benchmarks work, run `cargo test --benches`
865
866 NOTE: If you see an 'unrecognized option' error using any of the options above, see:
867 https://bheisler.github.io/criterion.rs/book/faq.html
868 ")
869 .get_matches();
870
871 if self.connection.is_some() {
872 if let Some(color) = matches.value_of("color") {
873 if color != "auto" {
874 eprintln!("Warning: --color will be ignored when running with cargo-criterion. Use `cargo criterion --color {} -- <args>` instead.", color);
875 }
876 }
877 if matches.is_present("verbose") {
878 eprintln!("Warning: --verbose will be ignored when running with cargo-criterion. Use `cargo criterion --output-format verbose -- <args>` instead.");
879 }
880 if matches.is_present("noplot") {
881 eprintln!("Warning: --noplot will be ignored when running with cargo-criterion. Use `cargo criterion --plotting-backend disabled -- <args>` instead.");
882 }
883 if let Some(backend) = matches.value_of("plotting-backend") {
884 eprintln!("Warning: --plotting-backend will be ignored when running with cargo-criterion. Use `cargo criterion --plotting-backend {} -- <args>` instead.", backend);
885 }
886 if let Some(format) = matches.value_of("output-format") {
887 if format != "criterion" {
888 eprintln!("Warning: --output-format will be ignored when running with cargo-criterion. Use `cargo criterion --output-format {} -- <args>` instead.", format);
889 }
890 }
891
892 if matches.is_present("baseline")
893 || matches
894 .value_of("save-baseline")
895 .map(|base| base != "base")
896 .unwrap_or(false)
897 || matches.is_present("load-baseline")
898 {
899 eprintln!("Error: baselines are not supported when running with cargo-criterion.");
900 std::process::exit(1);
901 }
902 }
903
904 let bench = matches.is_present("bench");
905 let test = matches.is_present("test");
906 let test_mode = match (bench, test) {
907 (true, true) => true, // cargo bench -- --test should run tests
908 (true, false) => false, // cargo bench should run benchmarks
909 (false, _) => true, // cargo test --benches should run tests
910 };
911
912 self.mode = if test_mode {
913 Mode::Test
914 } else if matches.is_present("list") {
915 Mode::List
916 } else if matches.is_present("profile-time") {
917 let num_seconds = matches.value_of_t_or_exit("profile-time");
918
919 if num_seconds < 1.0 {
920 eprintln!("Profile time must be at least one second.");
921 std::process::exit(1);
922 }
923
924 Mode::Profile(Duration::from_secs_f64(num_seconds))
925 } else {
926 Mode::Benchmark
927 };
928
929 // This is kind of a hack, but disable the connection to the runner if we're not benchmarking.
930 if !self.mode.is_benchmark() {
931 self.connection = None;
932 }
933
934 if let Some(filter) = matches.value_of("FILTER") {
935 self = self.with_filter(filter);
936 }
937
938 match matches.value_of("plotting-backend") {
939 // Use plotting_backend() here to re-use the panic behavior if Gnuplot is not available.
940 Some("gnuplot") => self = self.plotting_backend(PlottingBackend::Gnuplot),
941 Some("plotters") => self = self.plotting_backend(PlottingBackend::Plotters),
942 Some(val) => panic!("Unexpected plotting backend '{}'", val),
943 None => {}
944 }
945
946 if matches.is_present("noplot") {
947 self = self.without_plots();
948 }
949
950 if let Some(dir) = matches.value_of("save-baseline") {
951 self.baseline = Baseline::Save;
952 self.baseline_directory = dir.to_owned()
953 }
954 if matches.is_present("discard-baseline") {
955 self.baseline = Baseline::Discard;
956 }
957 if let Some(dir) = matches.value_of("baseline") {
958 self.baseline = Baseline::CompareStrict;
959 self.baseline_directory = dir.to_owned();
960 }
961 if let Some(dir) = matches.value_of("baseline-lenient") {
962 self.baseline = Baseline::CompareLenient;
963 self.baseline_directory = dir.to_owned();
964 }
965
966 if self.connection.is_some() {
967 // disable all reports when connected to cargo-criterion; it will do the reporting.
968 self.report.cli_enabled = false;
969 self.report.bencher_enabled = false;
970 self.report.csv_enabled = false;
971 self.report.html = None;
972 } else {
973 match matches.value_of("output-format") {
974 Some("bencher") => {
975 self.report.bencher_enabled = true;
976 self.report.cli_enabled = false;
977 }
978 _ => {
979 let verbose = matches.is_present("verbose");
980 let verbosity = if verbose {
981 CliVerbosity::Verbose
982 } else if matches.is_present("quiet") {
983 CliVerbosity::Quiet
984 } else {
985 CliVerbosity::Normal
986 };
987 let stdout_isatty = atty::is(atty::Stream::Stdout);
988 let mut enable_text_overwrite = stdout_isatty && !verbose && !debug_enabled();
989 let enable_text_coloring;
990 match matches.value_of("color") {
991 Some("always") => {
992 enable_text_coloring = true;
993 }
994 Some("never") => {
995 enable_text_coloring = false;
996 enable_text_overwrite = false;
997 }
998 _ => enable_text_coloring = stdout_isatty,
999 };
1000 self.report.bencher_enabled = false;
1001 self.report.cli_enabled = true;
1002 self.report.cli =
1003 CliReport::new(enable_text_overwrite, enable_text_coloring, verbosity);
1004 }
1005 };
1006 }
1007
1008 if let Some(dir) = matches.value_of("load-baseline") {
1009 self.load_baseline = Some(dir.to_owned());
1010 }
1011
1012 if matches.is_present("sample-size") {
1013 let num_size = matches.value_of_t_or_exit("sample-size");
1014
1015 assert!(num_size >= 10);
1016 self.config.sample_size = num_size;
1017 }
1018 if matches.is_present("warm-up-time") {
1019 let num_seconds = matches.value_of_t_or_exit("warm-up-time");
1020
1021 let dur = std::time::Duration::from_secs_f64(num_seconds);
1022 assert!(dur.as_nanos() > 0);
1023
1024 self.config.warm_up_time = dur;
1025 }
1026 if matches.is_present("measurement-time") {
1027 let num_seconds = matches.value_of_t_or_exit("measurement-time");
1028
1029 let dur = std::time::Duration::from_secs_f64(num_seconds);
1030 assert!(dur.as_nanos() > 0);
1031
1032 self.config.measurement_time = dur;
1033 }
1034 if matches.is_present("nresamples") {
1035 let num_resamples = matches.value_of_t_or_exit("nresamples");
1036
1037 assert!(num_resamples > 0);
1038
1039 self.config.nresamples = num_resamples;
1040 }
1041 if matches.is_present("noise-threshold") {
1042 let num_noise_threshold = matches.value_of_t_or_exit("noise-threshold");
1043
1044 assert!(num_noise_threshold > 0.0);
1045
1046 self.config.noise_threshold = num_noise_threshold;
1047 }
1048 if matches.is_present("confidence-level") {
1049 let num_confidence_level = matches.value_of_t_or_exit("confidence-level");
1050
1051 assert!(num_confidence_level > 0.0 && num_confidence_level < 1.0);
1052
1053 self.config.confidence_level = num_confidence_level;
1054 }
1055 if matches.is_present("significance-level") {
1056 let num_significance_level = matches.value_of_t_or_exit("significance-level");
1057
1058 assert!(num_significance_level > 0.0 && num_significance_level < 1.0);
1059
1060 self.config.significance_level = num_significance_level;
1061 }
1062
1063 if matches.is_present("quick") {
1064 self.config.quick_mode = true;
1065 }
1066
1067 self
1068 }
1069
filter_matches(&self, id: &str) -> bool1070 fn filter_matches(&self, id: &str) -> bool {
1071 match &self.filter {
1072 Some(regex) => regex.is_match(id),
1073 None => true,
1074 }
1075 }
1076
1077 /// Returns true iff we should save the benchmark results in
1078 /// json files on the local disk.
should_save_baseline(&self) -> bool1079 fn should_save_baseline(&self) -> bool {
1080 self.connection.is_none()
1081 && self.load_baseline.is_none()
1082 && !matches!(self.baseline, Baseline::Discard)
1083 }
1084
1085 /// Return a benchmark group. All benchmarks performed using a benchmark group will be
1086 /// grouped together in the final report.
1087 ///
1088 /// # Examples:
1089 ///
1090 /// ```rust
1091 /// #[macro_use] extern crate criterion;
1092 /// use self::criterion::*;
1093 ///
1094 /// fn bench_simple(c: &mut Criterion) {
1095 /// let mut group = c.benchmark_group("My Group");
1096 ///
1097 /// // Now we can perform benchmarks with this group
1098 /// group.bench_function("Bench 1", |b| b.iter(|| 1 ));
1099 /// group.bench_function("Bench 2", |b| b.iter(|| 2 ));
1100 ///
1101 /// group.finish();
1102 /// }
1103 /// criterion_group!(benches, bench_simple);
1104 /// criterion_main!(benches);
1105 /// ```
1106 /// # Panics:
1107 /// Panics if the group name is empty
benchmark_group<S: Into<String>>(&mut self, group_name: S) -> BenchmarkGroup<'_, M>1108 pub fn benchmark_group<S: Into<String>>(&mut self, group_name: S) -> BenchmarkGroup<'_, M> {
1109 let group_name = group_name.into();
1110 assert!(!group_name.is_empty(), "Group name must not be empty.");
1111
1112 if let Some(conn) = &self.connection {
1113 conn.send(&OutgoingMessage::BeginningBenchmarkGroup { group: &group_name })
1114 .unwrap();
1115 }
1116
1117 BenchmarkGroup::new(self, group_name)
1118 }
1119 }
1120 impl<M> Criterion<M>
1121 where
1122 M: Measurement + 'static,
1123 {
1124 /// Benchmarks a function. For comparing multiple functions, see `benchmark_group`.
1125 ///
1126 /// # Example
1127 ///
1128 /// ```rust
1129 /// #[macro_use] extern crate criterion;
1130 /// use self::criterion::*;
1131 ///
1132 /// fn bench(c: &mut Criterion) {
1133 /// // Setup (construct data, allocate memory, etc)
1134 /// c.bench_function(
1135 /// "function_name",
1136 /// |b| b.iter(|| {
1137 /// // Code to benchmark goes here
1138 /// }),
1139 /// );
1140 /// }
1141 ///
1142 /// criterion_group!(benches, bench);
1143 /// criterion_main!(benches);
1144 /// ```
bench_function<F>(&mut self, id: &str, f: F) -> &mut Criterion<M> where F: FnMut(&mut Bencher<'_, M>),1145 pub fn bench_function<F>(&mut self, id: &str, f: F) -> &mut Criterion<M>
1146 where
1147 F: FnMut(&mut Bencher<'_, M>),
1148 {
1149 self.benchmark_group(id)
1150 .bench_function(BenchmarkId::no_function(), f);
1151 self
1152 }
1153
1154 /// Benchmarks a function with an input. For comparing multiple functions or multiple inputs,
1155 /// see `benchmark_group`.
1156 ///
1157 /// # Example
1158 ///
1159 /// ```rust
1160 /// #[macro_use] extern crate criterion;
1161 /// use self::criterion::*;
1162 ///
1163 /// fn bench(c: &mut Criterion) {
1164 /// // Setup (construct data, allocate memory, etc)
1165 /// let input = 5u64;
1166 /// c.bench_with_input(
1167 /// BenchmarkId::new("function_name", input), &input,
1168 /// |b, i| b.iter(|| {
1169 /// // Code to benchmark using input `i` goes here
1170 /// }),
1171 /// );
1172 /// }
1173 ///
1174 /// criterion_group!(benches, bench);
1175 /// criterion_main!(benches);
1176 /// ```
bench_with_input<F, I>(&mut self, id: BenchmarkId, input: &I, f: F) -> &mut Criterion<M> where F: FnMut(&mut Bencher<'_, M>, &I),1177 pub fn bench_with_input<F, I>(&mut self, id: BenchmarkId, input: &I, f: F) -> &mut Criterion<M>
1178 where
1179 F: FnMut(&mut Bencher<'_, M>, &I),
1180 {
1181 // It's possible to use BenchmarkId::from_parameter to create a benchmark ID with no function
1182 // name. That's intended for use with BenchmarkGroups where the function name isn't necessary,
1183 // but here it is.
1184 let group_name = id.function_name.expect(
1185 "Cannot use BenchmarkId::from_parameter with Criterion::bench_with_input. \
1186 Consider using a BenchmarkGroup or BenchmarkId::new instead.",
1187 );
1188 // Guaranteed safe because external callers can't create benchmark IDs without a parameter
1189 let parameter = id.parameter.unwrap();
1190 self.benchmark_group(group_name).bench_with_input(
1191 BenchmarkId::no_function_with_input(parameter),
1192 input,
1193 f,
1194 );
1195 self
1196 }
1197 }
1198
1199 /// Enum representing different ways of measuring the throughput of benchmarked code.
1200 /// If the throughput setting is configured for a benchmark then the estimated throughput will
1201 /// be reported as well as the time per iteration.
1202 // TODO: Remove serialize/deserialize from the public API.
1203 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
1204 pub enum Throughput {
1205 /// Measure throughput in terms of bytes/second. The value should be the number of bytes
1206 /// processed by one iteration of the benchmarked code. Typically, this would be the length of
1207 /// an input string or `&[u8]`.
1208 Bytes(u64),
1209
1210 /// Equivalent to Bytes, but the value will be reported in terms of
1211 /// kilobytes (1000 bytes) per second instead of kibibytes (1024 bytes) per
1212 /// second, megabytes instead of mibibytes, and gigabytes instead of gibibytes.
1213 BytesDecimal(u64),
1214
1215 /// Measure throughput in terms of elements/second. The value should be the number of elements
1216 /// processed by one iteration of the benchmarked code. Typically, this would be the size of a
1217 /// collection, but could also be the number of lines of input text or the number of values to
1218 /// parse.
1219 Elements(u64),
1220 }
1221
1222 /// Axis scaling type
1223 #[derive(Debug, Clone, Copy)]
1224 pub enum AxisScale {
1225 /// Axes scale linearly
1226 Linear,
1227
1228 /// Axes scale logarithmically
1229 Logarithmic,
1230 }
1231
1232 /// Contains the configuration options for the plots generated by a particular benchmark
1233 /// or benchmark group.
1234 ///
1235 /// ```rust
1236 /// use self::criterion::{Bencher, Criterion, PlotConfiguration, AxisScale};
1237 ///
1238 /// let plot_config = PlotConfiguration::default()
1239 /// .summary_scale(AxisScale::Logarithmic);
1240 ///
1241 /// // Using Criterion::default() for simplicity; normally you'd use the macros.
1242 /// let mut criterion = Criterion::default();
1243 /// let mut benchmark_group = criterion.benchmark_group("Group name");
1244 /// benchmark_group.plot_config(plot_config);
1245 /// // Use benchmark group
1246 /// ```
1247 #[derive(Debug, Clone)]
1248 pub struct PlotConfiguration {
1249 summary_scale: AxisScale,
1250 }
1251
1252 impl Default for PlotConfiguration {
default() -> PlotConfiguration1253 fn default() -> PlotConfiguration {
1254 PlotConfiguration {
1255 summary_scale: AxisScale::Linear,
1256 }
1257 }
1258 }
1259
1260 impl PlotConfiguration {
1261 #[must_use]
1262 /// Set the axis scale (linear or logarithmic) for the summary plots. Typically, you would
1263 /// set this to logarithmic if benchmarking over a range of inputs which scale exponentially.
1264 /// Defaults to linear.
summary_scale(mut self, new_scale: AxisScale) -> PlotConfiguration1265 pub fn summary_scale(mut self, new_scale: AxisScale) -> PlotConfiguration {
1266 self.summary_scale = new_scale;
1267 self
1268 }
1269 }
1270
1271 /// This enum allows the user to control how Criterion.rs chooses the iteration count when sampling.
1272 /// The default is Auto, which will choose a method automatically based on the iteration time during
1273 /// the warm-up phase.
1274 #[derive(Debug, Clone, Copy)]
1275 pub enum SamplingMode {
1276 /// Criterion.rs should choose a sampling method automatically. This is the default, and is
1277 /// recommended for most users and most benchmarks.
1278 Auto,
1279
1280 /// Scale the iteration count in each sample linearly. This is suitable for most benchmarks,
1281 /// but it tends to require many iterations which can make it very slow for very long benchmarks.
1282 Linear,
1283
1284 /// Keep the iteration count the same for all samples. This is not recommended, as it affects
1285 /// the statistics that Criterion.rs can compute. However, it requires fewer iterations than
1286 /// the Linear method and therefore is more suitable for very long-running benchmarks where
1287 /// benchmark execution time is more of a problem and statistical precision is less important.
1288 Flat,
1289 }
1290 impl SamplingMode {
choose_sampling_mode( &self, warmup_mean_execution_time: f64, sample_count: u64, target_time: f64, ) -> ActualSamplingMode1291 pub(crate) fn choose_sampling_mode(
1292 &self,
1293 warmup_mean_execution_time: f64,
1294 sample_count: u64,
1295 target_time: f64,
1296 ) -> ActualSamplingMode {
1297 match self {
1298 SamplingMode::Linear => ActualSamplingMode::Linear,
1299 SamplingMode::Flat => ActualSamplingMode::Flat,
1300 SamplingMode::Auto => {
1301 // Estimate execution time with linear sampling
1302 let total_runs = sample_count * (sample_count + 1) / 2;
1303 let d =
1304 (target_time / warmup_mean_execution_time / total_runs as f64).ceil() as u64;
1305 let expected_ns = total_runs as f64 * d as f64 * warmup_mean_execution_time;
1306
1307 if expected_ns > (2.0 * target_time) {
1308 ActualSamplingMode::Flat
1309 } else {
1310 ActualSamplingMode::Linear
1311 }
1312 }
1313 }
1314 }
1315 }
1316
1317 /// Enum to represent the sampling mode without Auto.
1318 #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1319 pub(crate) enum ActualSamplingMode {
1320 Linear,
1321 Flat,
1322 }
1323 impl ActualSamplingMode {
iteration_counts( &self, warmup_mean_execution_time: f64, sample_count: u64, target_time: &Duration, ) -> Vec<u64>1324 pub(crate) fn iteration_counts(
1325 &self,
1326 warmup_mean_execution_time: f64,
1327 sample_count: u64,
1328 target_time: &Duration,
1329 ) -> Vec<u64> {
1330 match self {
1331 ActualSamplingMode::Linear => {
1332 let n = sample_count;
1333 let met = warmup_mean_execution_time;
1334 let m_ns = target_time.as_nanos();
1335 // Solve: [d + 2*d + 3*d + ... + n*d] * met = m_ns
1336 let total_runs = n * (n + 1) / 2;
1337 let d = ((m_ns as f64 / met / total_runs as f64).ceil() as u64).max(1);
1338 let expected_ns = total_runs as f64 * d as f64 * met;
1339
1340 if d == 1 {
1341 let recommended_sample_size =
1342 ActualSamplingMode::recommend_linear_sample_size(m_ns as f64, met);
1343 let actual_time = Duration::from_nanos(expected_ns as u64);
1344 eprint!("\nWarning: Unable to complete {} samples in {:.1?}. You may wish to increase target time to {:.1?}",
1345 n, target_time, actual_time);
1346
1347 if recommended_sample_size != n {
1348 eprintln!(
1349 ", enable flat sampling, or reduce sample count to {}.",
1350 recommended_sample_size
1351 );
1352 } else {
1353 eprintln!(" or enable flat sampling.");
1354 }
1355 }
1356
1357 (1..(n + 1) as u64).map(|a| a * d).collect::<Vec<u64>>()
1358 }
1359 ActualSamplingMode::Flat => {
1360 let n = sample_count;
1361 let met = warmup_mean_execution_time;
1362 let m_ns = target_time.as_nanos() as f64;
1363 let time_per_sample = m_ns / (n as f64);
1364 // This is pretty simplistic; we could do something smarter to fit into the allotted time.
1365 let iterations_per_sample = ((time_per_sample / met).ceil() as u64).max(1);
1366
1367 let expected_ns = met * (iterations_per_sample * n) as f64;
1368
1369 if iterations_per_sample == 1 {
1370 let recommended_sample_size =
1371 ActualSamplingMode::recommend_flat_sample_size(m_ns, met);
1372 let actual_time = Duration::from_nanos(expected_ns as u64);
1373 eprint!("\nWarning: Unable to complete {} samples in {:.1?}. You may wish to increase target time to {:.1?}",
1374 n, target_time, actual_time);
1375
1376 if recommended_sample_size != n {
1377 eprintln!(", or reduce sample count to {}.", recommended_sample_size);
1378 } else {
1379 eprintln!(".");
1380 }
1381 }
1382
1383 vec![iterations_per_sample; n as usize]
1384 }
1385 }
1386 }
1387
is_linear(&self) -> bool1388 fn is_linear(&self) -> bool {
1389 matches!(self, ActualSamplingMode::Linear)
1390 }
1391
recommend_linear_sample_size(target_time: f64, met: f64) -> u641392 fn recommend_linear_sample_size(target_time: f64, met: f64) -> u64 {
1393 // Some math shows that n(n+1)/2 * d * met = target_time. d = 1, so it can be ignored.
1394 // This leaves n(n+1) = (2*target_time)/met, or n^2 + n - (2*target_time)/met = 0
1395 // Which can be solved with the quadratic formula. Since A and B are constant 1,
1396 // this simplifies to sample_size = (-1 +- sqrt(1 - 4C))/2, where C = (2*target_time)/met.
1397 // We don't care about the negative solution. Experimentation shows that this actually tends to
1398 // result in twice the desired execution time (probably because of the ceil used to calculate
1399 // d) so instead I use c = target_time/met.
1400 let c = target_time / met;
1401 let sample_size = (-1.0 + (4.0 * c).sqrt()) / 2.0;
1402 let sample_size = sample_size as u64;
1403
1404 // Round down to the nearest 10 to give a margin and avoid excessive precision
1405 let sample_size = (sample_size / 10) * 10;
1406
1407 // Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes smaller than 10.
1408 if sample_size < 10 {
1409 10
1410 } else {
1411 sample_size
1412 }
1413 }
1414
recommend_flat_sample_size(target_time: f64, met: f64) -> u641415 fn recommend_flat_sample_size(target_time: f64, met: f64) -> u64 {
1416 let sample_size = (target_time / met) as u64;
1417
1418 // Round down to the nearest 10 to give a margin and avoid excessive precision
1419 let sample_size = (sample_size / 10) * 10;
1420
1421 // Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes smaller than 10.
1422 if sample_size < 10 {
1423 10
1424 } else {
1425 sample_size
1426 }
1427 }
1428 }
1429
1430 #[derive(Debug, Serialize, Deserialize)]
1431 pub(crate) struct SavedSample {
1432 sampling_mode: ActualSamplingMode,
1433 iters: Vec<f64>,
1434 times: Vec<f64>,
1435 }
1436
1437 /// Custom-test-framework runner. Should not be called directly.
1438 #[doc(hidden)]
runner(benches: &[&dyn Fn()])1439 pub fn runner(benches: &[&dyn Fn()]) {
1440 for bench in benches {
1441 bench();
1442 }
1443 Criterion::default().configure_from_args().final_summary();
1444 }
1445