1 use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
2 use itertools::free::cloned;
3 use itertools::iproduct;
4 use itertools::Itertools;
5
6 use std::cmp;
7 use std::iter::repeat;
8 use std::ops::{Add, Range};
9
slice_iter(c: &mut Criterion)10 fn slice_iter(c: &mut Criterion) {
11 let xs: Vec<_> = repeat(1i32).take(20).collect();
12
13 c.bench_function("slice iter", move |b| {
14 b.iter(|| {
15 for elt in xs.iter() {
16 black_box(elt);
17 }
18 })
19 });
20 }
21
slice_iter_rev(c: &mut Criterion)22 fn slice_iter_rev(c: &mut Criterion) {
23 let xs: Vec<_> = repeat(1i32).take(20).collect();
24
25 c.bench_function("slice iter rev", move |b| {
26 b.iter(|| {
27 for elt in xs.iter().rev() {
28 black_box(elt);
29 }
30 })
31 });
32 }
33
zip_default_zip(c: &mut Criterion)34 fn zip_default_zip(c: &mut Criterion) {
35 let xs = vec![0; 1024];
36 let ys = vec![0; 768];
37 let xs = black_box(xs);
38 let ys = black_box(ys);
39
40 c.bench_function("zip default zip", move |b| {
41 b.iter(|| {
42 for (&x, &y) in xs.iter().zip(&ys) {
43 black_box(x);
44 black_box(y);
45 }
46 })
47 });
48 }
49
zipdot_i32_default_zip(c: &mut Criterion)50 fn zipdot_i32_default_zip(c: &mut Criterion) {
51 let xs = vec![2; 1024];
52 let ys = vec![2; 768];
53 let xs = black_box(xs);
54 let ys = black_box(ys);
55
56 c.bench_function("zipdot i32 default zip", move |b| {
57 b.iter(|| {
58 let mut s = 0;
59 for (&x, &y) in xs.iter().zip(&ys) {
60 s += x * y;
61 }
62 s
63 })
64 });
65 }
66
zipdot_f32_default_zip(c: &mut Criterion)67 fn zipdot_f32_default_zip(c: &mut Criterion) {
68 let xs = vec![2f32; 1024];
69 let ys = vec![2f32; 768];
70 let xs = black_box(xs);
71 let ys = black_box(ys);
72
73 c.bench_function("zipdot f32 default zip", move |b| {
74 b.iter(|| {
75 let mut s = 0.;
76 for (&x, &y) in xs.iter().zip(&ys) {
77 s += x * y;
78 }
79 s
80 })
81 });
82 }
83
zip_default_zip3(c: &mut Criterion)84 fn zip_default_zip3(c: &mut Criterion) {
85 let xs = vec![0; 1024];
86 let ys = vec![0; 768];
87 let zs = vec![0; 766];
88 let xs = black_box(xs);
89 let ys = black_box(ys);
90 let zs = black_box(zs);
91
92 c.bench_function("zip default zip3", move |b| {
93 b.iter(|| {
94 for ((&x, &y), &z) in xs.iter().zip(&ys).zip(&zs) {
95 black_box(x);
96 black_box(y);
97 black_box(z);
98 }
99 })
100 });
101 }
102
zip_slices_ziptuple(c: &mut Criterion)103 fn zip_slices_ziptuple(c: &mut Criterion) {
104 let xs = vec![0; 1024];
105 let ys = vec![0; 768];
106
107 c.bench_function("zip slices ziptuple", move |b| {
108 b.iter(|| {
109 let xs = black_box(&xs);
110 let ys = black_box(&ys);
111 for (&x, &y) in itertools::multizip((xs, ys)) {
112 black_box(x);
113 black_box(y);
114 }
115 })
116 });
117 }
118
zip_checked_counted_loop(c: &mut Criterion)119 fn zip_checked_counted_loop(c: &mut Criterion) {
120 let xs = vec![0; 1024];
121 let ys = vec![0; 768];
122 let xs = black_box(xs);
123 let ys = black_box(ys);
124
125 c.bench_function("zip checked counted loop", move |b| {
126 b.iter(|| {
127 // Must slice to equal lengths, and then bounds checks are eliminated!
128 let len = cmp::min(xs.len(), ys.len());
129 let xs = &xs[..len];
130 let ys = &ys[..len];
131
132 for i in 0..len {
133 let x = xs[i];
134 let y = ys[i];
135 black_box(x);
136 black_box(y);
137 }
138 })
139 });
140 }
141
zipdot_i32_checked_counted_loop(c: &mut Criterion)142 fn zipdot_i32_checked_counted_loop(c: &mut Criterion) {
143 let xs = vec![2; 1024];
144 let ys = vec![2; 768];
145 let xs = black_box(xs);
146 let ys = black_box(ys);
147
148 c.bench_function("zipdot i32 checked counted loop", move |b| {
149 b.iter(|| {
150 // Must slice to equal lengths, and then bounds checks are eliminated!
151 let len = cmp::min(xs.len(), ys.len());
152 let xs = &xs[..len];
153 let ys = &ys[..len];
154
155 let mut s = 0i32;
156
157 for i in 0..len {
158 s += xs[i] * ys[i];
159 }
160 s
161 })
162 });
163 }
164
zipdot_f32_checked_counted_loop(c: &mut Criterion)165 fn zipdot_f32_checked_counted_loop(c: &mut Criterion) {
166 let xs = vec![2f32; 1024];
167 let ys = vec![2f32; 768];
168 let xs = black_box(xs);
169 let ys = black_box(ys);
170
171 c.bench_function("zipdot f32 checked counted loop", move |b| {
172 b.iter(|| {
173 // Must slice to equal lengths, and then bounds checks are eliminated!
174 let len = cmp::min(xs.len(), ys.len());
175 let xs = &xs[..len];
176 let ys = &ys[..len];
177
178 let mut s = 0.;
179
180 for i in 0..len {
181 s += xs[i] * ys[i];
182 }
183 s
184 })
185 });
186 }
187
zipdot_f32_checked_counted_unrolled_loop(c: &mut Criterion)188 fn zipdot_f32_checked_counted_unrolled_loop(c: &mut Criterion) {
189 let xs = vec![2f32; 1024];
190 let ys = vec![2f32; 768];
191 let xs = black_box(xs);
192 let ys = black_box(ys);
193
194 c.bench_function("zipdot f32 checked counted unrolled loop", move |b| {
195 b.iter(|| {
196 // Must slice to equal lengths, and then bounds checks are eliminated!
197 let len = cmp::min(xs.len(), ys.len());
198 let mut xs = &xs[..len];
199 let mut ys = &ys[..len];
200
201 let mut s = 0.;
202 let (mut p0, mut p1, mut p2, mut p3, mut p4, mut p5, mut p6, mut p7) =
203 (0., 0., 0., 0., 0., 0., 0., 0.);
204
205 // how to unroll and have bounds checks eliminated (by cristicbz)
206 // split sum into eight parts to enable vectorization (by bluss)
207 while xs.len() >= 8 {
208 p0 += xs[0] * ys[0];
209 p1 += xs[1] * ys[1];
210 p2 += xs[2] * ys[2];
211 p3 += xs[3] * ys[3];
212 p4 += xs[4] * ys[4];
213 p5 += xs[5] * ys[5];
214 p6 += xs[6] * ys[6];
215 p7 += xs[7] * ys[7];
216
217 xs = &xs[8..];
218 ys = &ys[8..];
219 }
220 s += p0 + p4;
221 s += p1 + p5;
222 s += p2 + p6;
223 s += p3 + p7;
224
225 for i in 0..xs.len() {
226 s += xs[i] * ys[i];
227 }
228 s
229 })
230 });
231 }
232
zip_unchecked_counted_loop(c: &mut Criterion)233 fn zip_unchecked_counted_loop(c: &mut Criterion) {
234 let xs = vec![0; 1024];
235 let ys = vec![0; 768];
236 let xs = black_box(xs);
237 let ys = black_box(ys);
238
239 c.bench_function("zip unchecked counted loop", move |b| {
240 b.iter(|| {
241 let len = cmp::min(xs.len(), ys.len());
242 for i in 0..len {
243 unsafe {
244 let x = *xs.get_unchecked(i);
245 let y = *ys.get_unchecked(i);
246 black_box(x);
247 black_box(y);
248 }
249 }
250 })
251 });
252 }
253
zipdot_i32_unchecked_counted_loop(c: &mut Criterion)254 fn zipdot_i32_unchecked_counted_loop(c: &mut Criterion) {
255 let xs = vec![2; 1024];
256 let ys = vec![2; 768];
257 let xs = black_box(xs);
258 let ys = black_box(ys);
259
260 c.bench_function("zipdot i32 unchecked counted loop", move |b| {
261 b.iter(|| {
262 let len = cmp::min(xs.len(), ys.len());
263 let mut s = 0i32;
264 for i in 0..len {
265 unsafe {
266 let x = *xs.get_unchecked(i);
267 let y = *ys.get_unchecked(i);
268 s += x * y;
269 }
270 }
271 s
272 })
273 });
274 }
275
zipdot_f32_unchecked_counted_loop(c: &mut Criterion)276 fn zipdot_f32_unchecked_counted_loop(c: &mut Criterion) {
277 let xs = vec![2.; 1024];
278 let ys = vec![2.; 768];
279 let xs = black_box(xs);
280 let ys = black_box(ys);
281
282 c.bench_function("zipdot f32 unchecked counted loop", move |b| {
283 b.iter(|| {
284 let len = cmp::min(xs.len(), ys.len());
285 let mut s = 0f32;
286 for i in 0..len {
287 unsafe {
288 let x = *xs.get_unchecked(i);
289 let y = *ys.get_unchecked(i);
290 s += x * y;
291 }
292 }
293 s
294 })
295 });
296 }
297
zip_unchecked_counted_loop3(c: &mut Criterion)298 fn zip_unchecked_counted_loop3(c: &mut Criterion) {
299 let xs = vec![0; 1024];
300 let ys = vec![0; 768];
301 let zs = vec![0; 766];
302 let xs = black_box(xs);
303 let ys = black_box(ys);
304 let zs = black_box(zs);
305
306 c.bench_function("zip unchecked counted loop3", move |b| {
307 b.iter(|| {
308 let len = cmp::min(xs.len(), cmp::min(ys.len(), zs.len()));
309 for i in 0..len {
310 unsafe {
311 let x = *xs.get_unchecked(i);
312 let y = *ys.get_unchecked(i);
313 let z = *zs.get_unchecked(i);
314 black_box(x);
315 black_box(y);
316 black_box(z);
317 }
318 }
319 })
320 });
321 }
322
chunk_by_lazy_1(c: &mut Criterion)323 fn chunk_by_lazy_1(c: &mut Criterion) {
324 let mut data = vec![0; 1024];
325 for (index, elt) in data.iter_mut().enumerate() {
326 *elt = index / 10;
327 }
328
329 let data = black_box(data);
330
331 c.bench_function("chunk by lazy 1", move |b| {
332 b.iter(|| {
333 for (_key, chunk) in &data.iter().chunk_by(|elt| **elt) {
334 for elt in chunk {
335 black_box(elt);
336 }
337 }
338 })
339 });
340 }
341
chunk_by_lazy_2(c: &mut Criterion)342 fn chunk_by_lazy_2(c: &mut Criterion) {
343 let mut data = vec![0; 1024];
344 for (index, elt) in data.iter_mut().enumerate() {
345 *elt = index / 2;
346 }
347
348 let data = black_box(data);
349
350 c.bench_function("chunk by lazy 2", move |b| {
351 b.iter(|| {
352 for (_key, chunk) in &data.iter().chunk_by(|elt| **elt) {
353 for elt in chunk {
354 black_box(elt);
355 }
356 }
357 })
358 });
359 }
360
slice_chunks(c: &mut Criterion)361 fn slice_chunks(c: &mut Criterion) {
362 let data = vec![0; 1024];
363
364 let data = black_box(data);
365 let sz = black_box(10);
366
367 c.bench_function("slice chunks", move |b| {
368 b.iter(|| {
369 for chunk in data.chunks(sz) {
370 for elt in chunk {
371 black_box(elt);
372 }
373 }
374 })
375 });
376 }
377
chunks_lazy_1(c: &mut Criterion)378 fn chunks_lazy_1(c: &mut Criterion) {
379 let data = vec![0; 1024];
380
381 let data = black_box(data);
382 let sz = black_box(10);
383
384 c.bench_function("chunks lazy 1", move |b| {
385 b.iter(|| {
386 for chunk in &data.iter().chunks(sz) {
387 for elt in chunk {
388 black_box(elt);
389 }
390 }
391 })
392 });
393 }
394
equal(c: &mut Criterion)395 fn equal(c: &mut Criterion) {
396 let data = vec![7; 1024];
397 let l = data.len();
398 let alpha = black_box(&data[1..]);
399 let beta = black_box(&data[..l - 1]);
400
401 c.bench_function("equal", move |b| b.iter(|| itertools::equal(alpha, beta)));
402 }
403
merge_default(c: &mut Criterion)404 fn merge_default(c: &mut Criterion) {
405 let mut data1 = vec![0; 1024];
406 let mut data2 = vec![0; 800];
407 let mut x = 0;
408
409 #[allow(clippy::explicit_counter_loop, clippy::unused_enumerate_index)]
410 for (_, elt) in data1.iter_mut().enumerate() {
411 *elt = x;
412 x += 1;
413 }
414
415 let mut y = 0;
416 for (i, elt) in data2.iter_mut().enumerate() {
417 *elt += y;
418 if i % 3 == 0 {
419 y += 3;
420 } else {
421 y += 0;
422 }
423 }
424 let data1 = black_box(data1);
425 let data2 = black_box(data2);
426
427 c.bench_function("merge default", move |b| {
428 b.iter(|| data1.iter().merge(&data2).count())
429 });
430 }
431
merge_by_cmp(c: &mut Criterion)432 fn merge_by_cmp(c: &mut Criterion) {
433 let mut data1 = vec![0; 1024];
434 let mut data2 = vec![0; 800];
435 let mut x = 0;
436
437 #[allow(clippy::explicit_counter_loop, clippy::unused_enumerate_index)]
438 for (_, elt) in data1.iter_mut().enumerate() {
439 *elt = x;
440 x += 1;
441 }
442
443 let mut y = 0;
444 for (i, elt) in data2.iter_mut().enumerate() {
445 *elt += y;
446 if i % 3 == 0 {
447 y += 3;
448 } else {
449 y += 0;
450 }
451 }
452 let data1 = black_box(data1);
453 let data2 = black_box(data2);
454
455 c.bench_function("merge by cmp", move |b| {
456 b.iter(|| data1.iter().merge_by(&data2, PartialOrd::le).count())
457 });
458 }
459
merge_by_lt(c: &mut Criterion)460 fn merge_by_lt(c: &mut Criterion) {
461 let mut data1 = vec![0; 1024];
462 let mut data2 = vec![0; 800];
463 let mut x = 0;
464
465 #[allow(clippy::explicit_counter_loop, clippy::unused_enumerate_index)]
466 for (_, elt) in data1.iter_mut().enumerate() {
467 *elt = x;
468 x += 1;
469 }
470
471 let mut y = 0;
472 for (i, elt) in data2.iter_mut().enumerate() {
473 *elt += y;
474 if i % 3 == 0 {
475 y += 3;
476 } else {
477 y += 0;
478 }
479 }
480 let data1 = black_box(data1);
481 let data2 = black_box(data2);
482
483 c.bench_function("merge by lt", move |b| {
484 b.iter(|| data1.iter().merge_by(&data2, |a, b| a <= b).count())
485 });
486 }
487
kmerge_default(c: &mut Criterion)488 fn kmerge_default(c: &mut Criterion) {
489 let mut data1 = vec![0; 1024];
490 let mut data2 = vec![0; 800];
491 let mut x = 0;
492
493 #[allow(clippy::explicit_counter_loop, clippy::unused_enumerate_index)]
494 for (_, elt) in data1.iter_mut().enumerate() {
495 *elt = x;
496 x += 1;
497 }
498
499 let mut y = 0;
500 for (i, elt) in data2.iter_mut().enumerate() {
501 *elt += y;
502 if i % 3 == 0 {
503 y += 3;
504 } else {
505 y += 0;
506 }
507 }
508 let data1 = black_box(data1);
509 let data2 = black_box(data2);
510 let its = &[data1.iter(), data2.iter()];
511
512 c.bench_function("kmerge default", move |b| {
513 b.iter(|| its.iter().cloned().kmerge().count())
514 });
515 }
516
kmerge_tenway(c: &mut Criterion)517 fn kmerge_tenway(c: &mut Criterion) {
518 let mut data = vec![0; 10240];
519
520 let mut state = 1729u16;
521 fn rng(state: &mut u16) -> u16 {
522 let new = state.wrapping_mul(31421).wrapping_add(6927);
523 *state = new;
524 new
525 }
526
527 for elt in &mut data {
528 *elt = rng(&mut state);
529 }
530
531 let mut chunks = Vec::new();
532 let mut rest = &mut data[..];
533 while !rest.is_empty() {
534 let chunk_len = 1 + rng(&mut state) % 512;
535 let chunk_len = cmp::min(rest.len(), chunk_len as usize);
536 let (fst, tail) = { rest }.split_at_mut(chunk_len);
537 fst.sort();
538 chunks.push(fst.iter().cloned());
539 rest = tail;
540 }
541
542 // println!("Chunk lengths: {}", chunks.iter().format_with(", ", |elt, f| f(&elt.len())));
543
544 c.bench_function("kmerge tenway", move |b| {
545 b.iter(|| chunks.iter().cloned().kmerge().count())
546 });
547 }
548
fast_integer_sum<I>(iter: I) -> I::Item where I: IntoIterator, I::Item: Default + Add<Output = I::Item>,549 fn fast_integer_sum<I>(iter: I) -> I::Item
550 where
551 I: IntoIterator,
552 I::Item: Default + Add<Output = I::Item>,
553 {
554 iter.into_iter().fold(<_>::default(), |x, y| x + y)
555 }
556
step_vec_2(c: &mut Criterion)557 fn step_vec_2(c: &mut Criterion) {
558 let v = vec![0; 1024];
559
560 c.bench_function("step vec 2", move |b| {
561 b.iter(|| fast_integer_sum(cloned(v.iter().step_by(2))))
562 });
563 }
564
step_vec_10(c: &mut Criterion)565 fn step_vec_10(c: &mut Criterion) {
566 let v = vec![0; 1024];
567
568 c.bench_function("step vec 10", move |b| {
569 b.iter(|| fast_integer_sum(cloned(v.iter().step_by(10))))
570 });
571 }
572
step_range_2(c: &mut Criterion)573 fn step_range_2(c: &mut Criterion) {
574 let v = black_box(0..1024);
575
576 c.bench_function("step range 2", move |b| {
577 b.iter(|| fast_integer_sum(v.clone().step_by(2)))
578 });
579 }
580
step_range_10(c: &mut Criterion)581 fn step_range_10(c: &mut Criterion) {
582 let v = black_box(0..1024);
583
584 c.bench_function("step range 10", move |b| {
585 b.iter(|| fast_integer_sum(v.clone().step_by(10)))
586 });
587 }
588
vec_iter_mut_partition(c: &mut Criterion)589 fn vec_iter_mut_partition(c: &mut Criterion) {
590 let data = std::iter::repeat(-1024i32..1024)
591 .take(256)
592 .flatten()
593 .collect_vec();
594 c.bench_function("vec iter mut partition", move |b| {
595 b.iter_batched(
596 || data.clone(),
597 |mut data| {
598 black_box(itertools::partition(black_box(&mut data), |n| *n >= 0));
599 },
600 BatchSize::LargeInput,
601 )
602 });
603 }
604
cartesian_product_iterator(c: &mut Criterion)605 fn cartesian_product_iterator(c: &mut Criterion) {
606 let xs = vec![0; 16];
607
608 c.bench_function("cartesian product iterator", move |b| {
609 b.iter(|| {
610 let mut sum = 0;
611 for (&x, &y, &z) in iproduct!(&xs, &xs, &xs) {
612 sum += x;
613 sum += y;
614 sum += z;
615 }
616 sum
617 })
618 });
619 }
620
multi_cartesian_product_iterator(c: &mut Criterion)621 fn multi_cartesian_product_iterator(c: &mut Criterion) {
622 let xs = [vec![0; 16], vec![0; 16], vec![0; 16]];
623
624 c.bench_function("multi cartesian product iterator", move |b| {
625 b.iter(|| {
626 let mut sum = 0;
627 for x in xs.iter().multi_cartesian_product() {
628 sum += x[0];
629 sum += x[1];
630 sum += x[2];
631 }
632 sum
633 })
634 });
635 }
636
cartesian_product_nested_for(c: &mut Criterion)637 fn cartesian_product_nested_for(c: &mut Criterion) {
638 let xs = vec![0; 16];
639
640 c.bench_function("cartesian product nested for", move |b| {
641 b.iter(|| {
642 let mut sum = 0;
643 for &x in &xs {
644 for &y in &xs {
645 for &z in &xs {
646 sum += x;
647 sum += y;
648 sum += z;
649 }
650 }
651 }
652 sum
653 })
654 });
655 }
656
all_equal(c: &mut Criterion)657 fn all_equal(c: &mut Criterion) {
658 let mut xs = vec![0; 5_000_000];
659 xs.extend(vec![1; 5_000_000]);
660
661 c.bench_function("all equal", move |b| b.iter(|| xs.iter().all_equal()));
662 }
663
all_equal_for(c: &mut Criterion)664 fn all_equal_for(c: &mut Criterion) {
665 let mut xs = vec![0; 5_000_000];
666 xs.extend(vec![1; 5_000_000]);
667
668 c.bench_function("all equal for", move |b| {
669 b.iter(|| {
670 for &x in &xs {
671 if x != xs[0] {
672 return false;
673 }
674 }
675 true
676 })
677 });
678 }
679
all_equal_default(c: &mut Criterion)680 fn all_equal_default(c: &mut Criterion) {
681 let mut xs = vec![0; 5_000_000];
682 xs.extend(vec![1; 5_000_000]);
683
684 c.bench_function("all equal default", move |b| {
685 b.iter(|| xs.iter().dedup().nth(1).is_none())
686 });
687 }
688
689 const PERM_COUNT: usize = 6;
690
permutations_iter(c: &mut Criterion)691 fn permutations_iter(c: &mut Criterion) {
692 struct NewIterator(Range<usize>);
693
694 impl Iterator for NewIterator {
695 type Item = usize;
696
697 fn next(&mut self) -> Option<Self::Item> {
698 self.0.next()
699 }
700 }
701
702 c.bench_function("permutations iter", move |b| {
703 b.iter(
704 || {
705 for _ in NewIterator(0..PERM_COUNT).permutations(PERM_COUNT) {}
706 },
707 )
708 });
709 }
710
permutations_range(c: &mut Criterion)711 fn permutations_range(c: &mut Criterion) {
712 c.bench_function("permutations range", move |b| {
713 b.iter(|| for _ in (0..PERM_COUNT).permutations(PERM_COUNT) {})
714 });
715 }
716
permutations_slice(c: &mut Criterion)717 fn permutations_slice(c: &mut Criterion) {
718 let v = (0..PERM_COUNT).collect_vec();
719
720 c.bench_function("permutations slice", move |b| {
721 b.iter(|| for _ in v.as_slice().iter().permutations(PERM_COUNT) {})
722 });
723 }
724
725 criterion_group!(
726 benches,
727 slice_iter,
728 slice_iter_rev,
729 zip_default_zip,
730 zipdot_i32_default_zip,
731 zipdot_f32_default_zip,
732 zip_default_zip3,
733 zip_slices_ziptuple,
734 zip_checked_counted_loop,
735 zipdot_i32_checked_counted_loop,
736 zipdot_f32_checked_counted_loop,
737 zipdot_f32_checked_counted_unrolled_loop,
738 zip_unchecked_counted_loop,
739 zipdot_i32_unchecked_counted_loop,
740 zipdot_f32_unchecked_counted_loop,
741 zip_unchecked_counted_loop3,
742 chunk_by_lazy_1,
743 chunk_by_lazy_2,
744 slice_chunks,
745 chunks_lazy_1,
746 equal,
747 merge_default,
748 merge_by_cmp,
749 merge_by_lt,
750 kmerge_default,
751 kmerge_tenway,
752 step_vec_2,
753 step_vec_10,
754 step_range_2,
755 step_range_10,
756 vec_iter_mut_partition,
757 cartesian_product_iterator,
758 multi_cartesian_product_iterator,
759 cartesian_product_nested_for,
760 all_equal,
761 all_equal_for,
762 all_equal_default,
763 permutations_iter,
764 permutations_range,
765 permutations_slice,
766 );
767 criterion_main!(benches);
768