1 /*!
2 Provides architecture independent implementations of `memchr` and friends.
3 
4 The main types in this module are [`One`], [`Two`] and [`Three`]. They are for
5 searching for one, two or three distinct bytes, respectively, in a haystack.
6 Each type also has corresponding double ended iterators. These searchers
7 are typically slower than hand-coded vector routines accomplishing the same
8 task, but are also typically faster than naive scalar code. These routines
9 effectively work by treating a `usize` as a vector of 8-bit lanes, and thus
10 achieves some level of data parallelism even without explicit vector support.
11 
12 The `One` searcher also provides a [`One::count`] routine for efficiently
13 counting the number of times a single byte occurs in a haystack. This is
14 useful, for example, for counting the number of lines in a haystack. This
15 routine exists because it is usually faster, especially with a high match
16 count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its
17 `Iterator::count` implementation to use this routine.)
18 
19 Only one, two and three bytes are supported because three bytes is about
20 the point where one sees diminishing returns. Beyond this point and it's
21 probably (but not necessarily) better to just use a simple `[bool; 256]` array
22 or similar. However, it depends mightily on the specific work-load and the
23 expected match frequency.
24 */
25 
26 use crate::{arch::generic::memchr as generic, ext::Pointer};
27 
28 /// The number of bytes in a single `usize` value.
29 const USIZE_BYTES: usize = (usize::BITS / 8) as usize;
30 /// The bits that must be zero for a `*const usize` to be properly aligned.
31 const USIZE_ALIGN: usize = USIZE_BYTES - 1;
32 
33 /// Finds all occurrences of a single byte in a haystack.
34 #[derive(Clone, Copy, Debug)]
35 pub struct One {
36     s1: u8,
37     v1: usize,
38 }
39 
40 impl One {
41     /// The number of bytes we examine per each iteration of our search loop.
42     const LOOP_BYTES: usize = 2 * USIZE_BYTES;
43 
44     /// Create a new searcher that finds occurrences of the byte given.
45     #[inline]
new(needle: u8) -> One46     pub fn new(needle: u8) -> One {
47         One { s1: needle, v1: splat(needle) }
48     }
49 
50     /// A test-only routine so that we can bundle a bunch of quickcheck
51     /// properties into a single macro. Basically, this provides a constructor
52     /// that makes it identical to most other memchr implementations, which
53     /// have fallible constructors.
54     #[cfg(test)]
try_new(needle: u8) -> Option<One>55     pub(crate) fn try_new(needle: u8) -> Option<One> {
56         Some(One::new(needle))
57     }
58 
59     /// Return the first occurrence of the needle in the given haystack. If no
60     /// such occurrence exists, then `None` is returned.
61     ///
62     /// The occurrence is reported as an offset into `haystack`. Its maximum
63     /// value for a non-empty haystack is `haystack.len() - 1`.
64     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>65     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
66         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
67         // falls within the bounds of the start and end pointers.
68         unsafe {
69             generic::search_slice_with_raw(haystack, |s, e| {
70                 self.find_raw(s, e)
71             })
72         }
73     }
74 
75     /// Return the last occurrence of the needle in the given haystack. If no
76     /// such occurrence exists, then `None` is returned.
77     ///
78     /// The occurrence is reported as an offset into `haystack`. Its maximum
79     /// value for a non-empty haystack is `haystack.len() - 1`.
80     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>81     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
82         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
83         // falls within the bounds of the start and end pointers.
84         unsafe {
85             generic::search_slice_with_raw(haystack, |s, e| {
86                 self.rfind_raw(s, e)
87             })
88         }
89     }
90 
91     /// Counts all occurrences of this byte in the given haystack.
92     #[inline]
count(&self, haystack: &[u8]) -> usize93     pub fn count(&self, haystack: &[u8]) -> usize {
94         // SAFETY: All of our pointers are derived directly from a borrowed
95         // slice, which is guaranteed to be valid.
96         unsafe {
97             let start = haystack.as_ptr();
98             let end = start.add(haystack.len());
99             self.count_raw(start, end)
100         }
101     }
102 
103     /// Like `find`, but accepts and returns raw pointers.
104     ///
105     /// When a match is found, the pointer returned is guaranteed to be
106     /// `>= start` and `< end`.
107     ///
108     /// This routine is useful if you're already using raw pointers and would
109     /// like to avoid converting back to a slice before executing a search.
110     ///
111     /// # Safety
112     ///
113     /// * Both `start` and `end` must be valid for reads.
114     /// * Both `start` and `end` must point to an initialized value.
115     /// * Both `start` and `end` must point to the same allocated object and
116     /// must either be in bounds or at most one byte past the end of the
117     /// allocated object.
118     /// * Both `start` and `end` must be _derived from_ a pointer to the same
119     /// object.
120     /// * The distance between `start` and `end` must not overflow `isize`.
121     /// * The distance being in bounds must not rely on "wrapping around" the
122     /// address space.
123     ///
124     /// Note that callers may pass a pair of pointers such that `start >= end`.
125     /// In that case, `None` will always be returned.
126     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>127     pub unsafe fn find_raw(
128         &self,
129         start: *const u8,
130         end: *const u8,
131     ) -> Option<*const u8> {
132         if start >= end {
133             return None;
134         }
135         let confirm = |b| self.confirm(b);
136         let len = end.distance(start);
137         if len < USIZE_BYTES {
138             return generic::fwd_byte_by_byte(start, end, confirm);
139         }
140 
141         // The start of the search may not be aligned to `*const usize`,
142         // so we do an unaligned load here.
143         let chunk = start.cast::<usize>().read_unaligned();
144         if self.has_needle(chunk) {
145             return generic::fwd_byte_by_byte(start, end, confirm);
146         }
147 
148         // And now we start our search at a guaranteed aligned position.
149         // The first iteration of the loop below will overlap with the the
150         // unaligned chunk above in cases where the search starts at an
151         // unaligned offset, but that's okay as we're only here if that
152         // above didn't find a match.
153         let mut cur =
154             start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN));
155         debug_assert!(cur > start);
156         if len <= One::LOOP_BYTES {
157             return generic::fwd_byte_by_byte(cur, end, confirm);
158         }
159         debug_assert!(end.sub(One::LOOP_BYTES) >= start);
160         while cur <= end.sub(One::LOOP_BYTES) {
161             debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
162 
163             let a = cur.cast::<usize>().read();
164             let b = cur.add(USIZE_BYTES).cast::<usize>().read();
165             if self.has_needle(a) || self.has_needle(b) {
166                 break;
167             }
168             cur = cur.add(One::LOOP_BYTES);
169         }
170         generic::fwd_byte_by_byte(cur, end, confirm)
171     }
172 
173     /// Like `rfind`, but accepts and returns raw pointers.
174     ///
175     /// When a match is found, the pointer returned is guaranteed to be
176     /// `>= start` and `< end`.
177     ///
178     /// This routine is useful if you're already using raw pointers and would
179     /// like to avoid converting back to a slice before executing a search.
180     ///
181     /// # Safety
182     ///
183     /// * Both `start` and `end` must be valid for reads.
184     /// * Both `start` and `end` must point to an initialized value.
185     /// * Both `start` and `end` must point to the same allocated object and
186     /// must either be in bounds or at most one byte past the end of the
187     /// allocated object.
188     /// * Both `start` and `end` must be _derived from_ a pointer to the same
189     /// object.
190     /// * The distance between `start` and `end` must not overflow `isize`.
191     /// * The distance being in bounds must not rely on "wrapping around" the
192     /// address space.
193     ///
194     /// Note that callers may pass a pair of pointers such that `start >= end`.
195     /// In that case, `None` will always be returned.
196     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>197     pub unsafe fn rfind_raw(
198         &self,
199         start: *const u8,
200         end: *const u8,
201     ) -> Option<*const u8> {
202         if start >= end {
203             return None;
204         }
205         let confirm = |b| self.confirm(b);
206         let len = end.distance(start);
207         if len < USIZE_BYTES {
208             return generic::rev_byte_by_byte(start, end, confirm);
209         }
210 
211         let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned();
212         if self.has_needle(chunk) {
213             return generic::rev_byte_by_byte(start, end, confirm);
214         }
215 
216         let mut cur = end.sub(end.as_usize() & USIZE_ALIGN);
217         debug_assert!(start <= cur && cur <= end);
218         if len <= One::LOOP_BYTES {
219             return generic::rev_byte_by_byte(start, cur, confirm);
220         }
221         while cur >= start.add(One::LOOP_BYTES) {
222             debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
223 
224             let a = cur.sub(2 * USIZE_BYTES).cast::<usize>().read();
225             let b = cur.sub(1 * USIZE_BYTES).cast::<usize>().read();
226             if self.has_needle(a) || self.has_needle(b) {
227                 break;
228             }
229             cur = cur.sub(One::LOOP_BYTES);
230         }
231         generic::rev_byte_by_byte(start, cur, confirm)
232     }
233 
234     /// Counts all occurrences of this byte in the given haystack represented
235     /// by raw pointers.
236     ///
237     /// This routine is useful if you're already using raw pointers and would
238     /// like to avoid converting back to a slice before executing a search.
239     ///
240     /// # Safety
241     ///
242     /// * Both `start` and `end` must be valid for reads.
243     /// * Both `start` and `end` must point to an initialized value.
244     /// * Both `start` and `end` must point to the same allocated object and
245     /// must either be in bounds or at most one byte past the end of the
246     /// allocated object.
247     /// * Both `start` and `end` must be _derived from_ a pointer to the same
248     /// object.
249     /// * The distance between `start` and `end` must not overflow `isize`.
250     /// * The distance being in bounds must not rely on "wrapping around" the
251     /// address space.
252     ///
253     /// Note that callers may pass a pair of pointers such that `start >= end`.
254     /// In that case, `0` will always be returned.
255     #[inline]
count_raw(&self, start: *const u8, end: *const u8) -> usize256     pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize {
257         if start >= end {
258             return 0;
259         }
260         // Sadly I couldn't get the SWAR approach to work here, so we just do
261         // one byte at a time for now. PRs to improve this are welcome.
262         let mut ptr = start;
263         let mut count = 0;
264         while ptr < end {
265             count += (ptr.read() == self.s1) as usize;
266             ptr = ptr.offset(1);
267         }
268         count
269     }
270 
271     /// Returns an iterator over all occurrences of the needle byte in the
272     /// given haystack.
273     ///
274     /// The iterator returned implements `DoubleEndedIterator`. This means it
275     /// can also be used to find occurrences in reverse order.
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h>276     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> {
277         OneIter { searcher: self, it: generic::Iter::new(haystack) }
278     }
279 
280     #[inline(always)]
has_needle(&self, chunk: usize) -> bool281     fn has_needle(&self, chunk: usize) -> bool {
282         has_zero_byte(self.v1 ^ chunk)
283     }
284 
285     #[inline(always)]
confirm(&self, haystack_byte: u8) -> bool286     fn confirm(&self, haystack_byte: u8) -> bool {
287         self.s1 == haystack_byte
288     }
289 }
290 
291 /// An iterator over all occurrences of a single byte in a haystack.
292 ///
293 /// This iterator implements `DoubleEndedIterator`, which means it can also be
294 /// used to find occurrences in reverse order.
295 ///
296 /// This iterator is created by the [`One::iter`] method.
297 ///
298 /// The lifetime parameters are as follows:
299 ///
300 /// * `'a` refers to the lifetime of the underlying [`One`] searcher.
301 /// * `'h` refers to the lifetime of the haystack being searched.
302 #[derive(Clone, Debug)]
303 pub struct OneIter<'a, 'h> {
304     /// The underlying memchr searcher.
305     searcher: &'a One,
306     /// Generic iterator implementation.
307     it: generic::Iter<'h>,
308 }
309 
310 impl<'a, 'h> Iterator for OneIter<'a, 'h> {
311     type Item = usize;
312 
313     #[inline]
next(&mut self) -> Option<usize>314     fn next(&mut self) -> Option<usize> {
315         // SAFETY: We rely on the generic iterator to provide valid start
316         // and end pointers, but we guarantee that any pointer returned by
317         // 'find_raw' falls within the bounds of the start and end pointer.
318         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
319     }
320 
321     #[inline]
count(self) -> usize322     fn count(self) -> usize {
323         self.it.count(|s, e| {
324             // SAFETY: We rely on our generic iterator to return valid start
325             // and end pointers.
326             unsafe { self.searcher.count_raw(s, e) }
327         })
328     }
329 
330     #[inline]
size_hint(&self) -> (usize, Option<usize>)331     fn size_hint(&self) -> (usize, Option<usize>) {
332         self.it.size_hint()
333     }
334 }
335 
336 impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> {
337     #[inline]
next_back(&mut self) -> Option<usize>338     fn next_back(&mut self) -> Option<usize> {
339         // SAFETY: We rely on the generic iterator to provide valid start
340         // and end pointers, but we guarantee that any pointer returned by
341         // 'rfind_raw' falls within the bounds of the start and end pointer.
342         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
343     }
344 }
345 
346 /// Finds all occurrences of two bytes in a haystack.
347 ///
348 /// That is, this reports matches of one of two possible bytes. For example,
349 /// searching for `a` or `b` in `afoobar` would report matches at offsets `0`,
350 /// `4` and `5`.
351 #[derive(Clone, Copy, Debug)]
352 pub struct Two {
353     s1: u8,
354     s2: u8,
355     v1: usize,
356     v2: usize,
357 }
358 
359 impl Two {
360     /// Create a new searcher that finds occurrences of the two needle bytes
361     /// given.
362     #[inline]
new(needle1: u8, needle2: u8) -> Two363     pub fn new(needle1: u8, needle2: u8) -> Two {
364         Two {
365             s1: needle1,
366             s2: needle2,
367             v1: splat(needle1),
368             v2: splat(needle2),
369         }
370     }
371 
372     /// A test-only routine so that we can bundle a bunch of quickcheck
373     /// properties into a single macro. Basically, this provides a constructor
374     /// that makes it identical to most other memchr implementations, which
375     /// have fallible constructors.
376     #[cfg(test)]
try_new(needle1: u8, needle2: u8) -> Option<Two>377     pub(crate) fn try_new(needle1: u8, needle2: u8) -> Option<Two> {
378         Some(Two::new(needle1, needle2))
379     }
380 
381     /// Return the first occurrence of one of the needle bytes in the given
382     /// haystack. If no such occurrence exists, then `None` is returned.
383     ///
384     /// The occurrence is reported as an offset into `haystack`. Its maximum
385     /// value for a non-empty haystack is `haystack.len() - 1`.
386     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>387     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
388         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
389         // falls within the bounds of the start and end pointers.
390         unsafe {
391             generic::search_slice_with_raw(haystack, |s, e| {
392                 self.find_raw(s, e)
393             })
394         }
395     }
396 
397     /// Return the last occurrence of one of the needle bytes in the given
398     /// haystack. If no such occurrence exists, then `None` is returned.
399     ///
400     /// The occurrence is reported as an offset into `haystack`. Its maximum
401     /// value for a non-empty haystack is `haystack.len() - 1`.
402     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>403     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
404         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
405         // falls within the bounds of the start and end pointers.
406         unsafe {
407             generic::search_slice_with_raw(haystack, |s, e| {
408                 self.rfind_raw(s, e)
409             })
410         }
411     }
412 
413     /// Like `find`, but accepts and returns raw pointers.
414     ///
415     /// When a match is found, the pointer returned is guaranteed to be
416     /// `>= start` and `< end`.
417     ///
418     /// This routine is useful if you're already using raw pointers and would
419     /// like to avoid converting back to a slice before executing a search.
420     ///
421     /// # Safety
422     ///
423     /// * Both `start` and `end` must be valid for reads.
424     /// * Both `start` and `end` must point to an initialized value.
425     /// * Both `start` and `end` must point to the same allocated object and
426     /// must either be in bounds or at most one byte past the end of the
427     /// allocated object.
428     /// * Both `start` and `end` must be _derived from_ a pointer to the same
429     /// object.
430     /// * The distance between `start` and `end` must not overflow `isize`.
431     /// * The distance being in bounds must not rely on "wrapping around" the
432     /// address space.
433     ///
434     /// Note that callers may pass a pair of pointers such that `start >= end`.
435     /// In that case, `None` will always be returned.
436     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>437     pub unsafe fn find_raw(
438         &self,
439         start: *const u8,
440         end: *const u8,
441     ) -> Option<*const u8> {
442         if start >= end {
443             return None;
444         }
445         let confirm = |b| self.confirm(b);
446         let len = end.distance(start);
447         if len < USIZE_BYTES {
448             return generic::fwd_byte_by_byte(start, end, confirm);
449         }
450 
451         // The start of the search may not be aligned to `*const usize`,
452         // so we do an unaligned load here.
453         let chunk = start.cast::<usize>().read_unaligned();
454         if self.has_needle(chunk) {
455             return generic::fwd_byte_by_byte(start, end, confirm);
456         }
457 
458         // And now we start our search at a guaranteed aligned position.
459         // The first iteration of the loop below will overlap with the the
460         // unaligned chunk above in cases where the search starts at an
461         // unaligned offset, but that's okay as we're only here if that
462         // above didn't find a match.
463         let mut cur =
464             start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN));
465         debug_assert!(cur > start);
466         debug_assert!(end.sub(USIZE_BYTES) >= start);
467         while cur <= end.sub(USIZE_BYTES) {
468             debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
469 
470             let chunk = cur.cast::<usize>().read();
471             if self.has_needle(chunk) {
472                 break;
473             }
474             cur = cur.add(USIZE_BYTES);
475         }
476         generic::fwd_byte_by_byte(cur, end, confirm)
477     }
478 
479     /// Like `rfind`, but accepts and returns raw pointers.
480     ///
481     /// When a match is found, the pointer returned is guaranteed to be
482     /// `>= start` and `< end`.
483     ///
484     /// This routine is useful if you're already using raw pointers and would
485     /// like to avoid converting back to a slice before executing a search.
486     ///
487     /// # Safety
488     ///
489     /// * Both `start` and `end` must be valid for reads.
490     /// * Both `start` and `end` must point to an initialized value.
491     /// * Both `start` and `end` must point to the same allocated object and
492     /// must either be in bounds or at most one byte past the end of the
493     /// allocated object.
494     /// * Both `start` and `end` must be _derived from_ a pointer to the same
495     /// object.
496     /// * The distance between `start` and `end` must not overflow `isize`.
497     /// * The distance being in bounds must not rely on "wrapping around" the
498     /// address space.
499     ///
500     /// Note that callers may pass a pair of pointers such that `start >= end`.
501     /// In that case, `None` will always be returned.
502     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>503     pub unsafe fn rfind_raw(
504         &self,
505         start: *const u8,
506         end: *const u8,
507     ) -> Option<*const u8> {
508         if start >= end {
509             return None;
510         }
511         let confirm = |b| self.confirm(b);
512         let len = end.distance(start);
513         if len < USIZE_BYTES {
514             return generic::rev_byte_by_byte(start, end, confirm);
515         }
516 
517         let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned();
518         if self.has_needle(chunk) {
519             return generic::rev_byte_by_byte(start, end, confirm);
520         }
521 
522         let mut cur = end.sub(end.as_usize() & USIZE_ALIGN);
523         debug_assert!(start <= cur && cur <= end);
524         while cur >= start.add(USIZE_BYTES) {
525             debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
526 
527             let chunk = cur.sub(USIZE_BYTES).cast::<usize>().read();
528             if self.has_needle(chunk) {
529                 break;
530             }
531             cur = cur.sub(USIZE_BYTES);
532         }
533         generic::rev_byte_by_byte(start, cur, confirm)
534     }
535 
536     /// Returns an iterator over all occurrences of one of the needle bytes in
537     /// the given haystack.
538     ///
539     /// The iterator returned implements `DoubleEndedIterator`. This means it
540     /// can also be used to find occurrences in reverse order.
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h>541     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> {
542         TwoIter { searcher: self, it: generic::Iter::new(haystack) }
543     }
544 
545     #[inline(always)]
has_needle(&self, chunk: usize) -> bool546     fn has_needle(&self, chunk: usize) -> bool {
547         has_zero_byte(self.v1 ^ chunk) || has_zero_byte(self.v2 ^ chunk)
548     }
549 
550     #[inline(always)]
confirm(&self, haystack_byte: u8) -> bool551     fn confirm(&self, haystack_byte: u8) -> bool {
552         self.s1 == haystack_byte || self.s2 == haystack_byte
553     }
554 }
555 
556 /// An iterator over all occurrences of two possible bytes in a haystack.
557 ///
558 /// This iterator implements `DoubleEndedIterator`, which means it can also be
559 /// used to find occurrences in reverse order.
560 ///
561 /// This iterator is created by the [`Two::iter`] method.
562 ///
563 /// The lifetime parameters are as follows:
564 ///
565 /// * `'a` refers to the lifetime of the underlying [`Two`] searcher.
566 /// * `'h` refers to the lifetime of the haystack being searched.
567 #[derive(Clone, Debug)]
568 pub struct TwoIter<'a, 'h> {
569     /// The underlying memchr searcher.
570     searcher: &'a Two,
571     /// Generic iterator implementation.
572     it: generic::Iter<'h>,
573 }
574 
575 impl<'a, 'h> Iterator for TwoIter<'a, 'h> {
576     type Item = usize;
577 
578     #[inline]
next(&mut self) -> Option<usize>579     fn next(&mut self) -> Option<usize> {
580         // SAFETY: We rely on the generic iterator to provide valid start
581         // and end pointers, but we guarantee that any pointer returned by
582         // 'find_raw' falls within the bounds of the start and end pointer.
583         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
584     }
585 
586     #[inline]
size_hint(&self) -> (usize, Option<usize>)587     fn size_hint(&self) -> (usize, Option<usize>) {
588         self.it.size_hint()
589     }
590 }
591 
592 impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> {
593     #[inline]
next_back(&mut self) -> Option<usize>594     fn next_back(&mut self) -> Option<usize> {
595         // SAFETY: We rely on the generic iterator to provide valid start
596         // and end pointers, but we guarantee that any pointer returned by
597         // 'rfind_raw' falls within the bounds of the start and end pointer.
598         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
599     }
600 }
601 
602 /// Finds all occurrences of three bytes in a haystack.
603 ///
604 /// That is, this reports matches of one of three possible bytes. For example,
605 /// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets
606 /// `0`, `2`, `3`, `4` and `5`.
607 #[derive(Clone, Copy, Debug)]
608 pub struct Three {
609     s1: u8,
610     s2: u8,
611     s3: u8,
612     v1: usize,
613     v2: usize,
614     v3: usize,
615 }
616 
617 impl Three {
618     /// Create a new searcher that finds occurrences of the three needle bytes
619     /// given.
620     #[inline]
new(needle1: u8, needle2: u8, needle3: u8) -> Three621     pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Three {
622         Three {
623             s1: needle1,
624             s2: needle2,
625             s3: needle3,
626             v1: splat(needle1),
627             v2: splat(needle2),
628             v3: splat(needle3),
629         }
630     }
631 
632     /// A test-only routine so that we can bundle a bunch of quickcheck
633     /// properties into a single macro. Basically, this provides a constructor
634     /// that makes it identical to most other memchr implementations, which
635     /// have fallible constructors.
636     #[cfg(test)]
try_new( needle1: u8, needle2: u8, needle3: u8, ) -> Option<Three>637     pub(crate) fn try_new(
638         needle1: u8,
639         needle2: u8,
640         needle3: u8,
641     ) -> Option<Three> {
642         Some(Three::new(needle1, needle2, needle3))
643     }
644 
645     /// Return the first occurrence of one of the needle bytes in the given
646     /// haystack. If no such occurrence exists, then `None` is returned.
647     ///
648     /// The occurrence is reported as an offset into `haystack`. Its maximum
649     /// value for a non-empty haystack is `haystack.len() - 1`.
650     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>651     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
652         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
653         // falls within the bounds of the start and end pointers.
654         unsafe {
655             generic::search_slice_with_raw(haystack, |s, e| {
656                 self.find_raw(s, e)
657             })
658         }
659     }
660 
661     /// Return the last occurrence of one of the needle bytes in the given
662     /// haystack. If no such occurrence exists, then `None` is returned.
663     ///
664     /// The occurrence is reported as an offset into `haystack`. Its maximum
665     /// value for a non-empty haystack is `haystack.len() - 1`.
666     #[inline]
rfind(&self, haystack: &[u8]) -> Option<usize>667     pub fn rfind(&self, haystack: &[u8]) -> Option<usize> {
668         // SAFETY: `find_raw` guarantees that if a pointer is returned, it
669         // falls within the bounds of the start and end pointers.
670         unsafe {
671             generic::search_slice_with_raw(haystack, |s, e| {
672                 self.rfind_raw(s, e)
673             })
674         }
675     }
676 
677     /// Like `find`, but accepts and returns raw pointers.
678     ///
679     /// When a match is found, the pointer returned is guaranteed to be
680     /// `>= start` and `< end`.
681     ///
682     /// This routine is useful if you're already using raw pointers and would
683     /// like to avoid converting back to a slice before executing a search.
684     ///
685     /// # Safety
686     ///
687     /// * Both `start` and `end` must be valid for reads.
688     /// * Both `start` and `end` must point to an initialized value.
689     /// * Both `start` and `end` must point to the same allocated object and
690     /// must either be in bounds or at most one byte past the end of the
691     /// allocated object.
692     /// * Both `start` and `end` must be _derived from_ a pointer to the same
693     /// object.
694     /// * The distance between `start` and `end` must not overflow `isize`.
695     /// * The distance being in bounds must not rely on "wrapping around" the
696     /// address space.
697     ///
698     /// Note that callers may pass a pair of pointers such that `start >= end`.
699     /// In that case, `None` will always be returned.
700     #[inline]
find_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>701     pub unsafe fn find_raw(
702         &self,
703         start: *const u8,
704         end: *const u8,
705     ) -> Option<*const u8> {
706         if start >= end {
707             return None;
708         }
709         let confirm = |b| self.confirm(b);
710         let len = end.distance(start);
711         if len < USIZE_BYTES {
712             return generic::fwd_byte_by_byte(start, end, confirm);
713         }
714 
715         // The start of the search may not be aligned to `*const usize`,
716         // so we do an unaligned load here.
717         let chunk = start.cast::<usize>().read_unaligned();
718         if self.has_needle(chunk) {
719             return generic::fwd_byte_by_byte(start, end, confirm);
720         }
721 
722         // And now we start our search at a guaranteed aligned position.
723         // The first iteration of the loop below will overlap with the the
724         // unaligned chunk above in cases where the search starts at an
725         // unaligned offset, but that's okay as we're only here if that
726         // above didn't find a match.
727         let mut cur =
728             start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN));
729         debug_assert!(cur > start);
730         debug_assert!(end.sub(USIZE_BYTES) >= start);
731         while cur <= end.sub(USIZE_BYTES) {
732             debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
733 
734             let chunk = cur.cast::<usize>().read();
735             if self.has_needle(chunk) {
736                 break;
737             }
738             cur = cur.add(USIZE_BYTES);
739         }
740         generic::fwd_byte_by_byte(cur, end, confirm)
741     }
742 
743     /// Like `rfind`, but accepts and returns raw pointers.
744     ///
745     /// When a match is found, the pointer returned is guaranteed to be
746     /// `>= start` and `< end`.
747     ///
748     /// This routine is useful if you're already using raw pointers and would
749     /// like to avoid converting back to a slice before executing a search.
750     ///
751     /// # Safety
752     ///
753     /// * Both `start` and `end` must be valid for reads.
754     /// * Both `start` and `end` must point to an initialized value.
755     /// * Both `start` and `end` must point to the same allocated object and
756     /// must either be in bounds or at most one byte past the end of the
757     /// allocated object.
758     /// * Both `start` and `end` must be _derived from_ a pointer to the same
759     /// object.
760     /// * The distance between `start` and `end` must not overflow `isize`.
761     /// * The distance being in bounds must not rely on "wrapping around" the
762     /// address space.
763     ///
764     /// Note that callers may pass a pair of pointers such that `start >= end`.
765     /// In that case, `None` will always be returned.
766     #[inline]
rfind_raw( &self, start: *const u8, end: *const u8, ) -> Option<*const u8>767     pub unsafe fn rfind_raw(
768         &self,
769         start: *const u8,
770         end: *const u8,
771     ) -> Option<*const u8> {
772         if start >= end {
773             return None;
774         }
775         let confirm = |b| self.confirm(b);
776         let len = end.distance(start);
777         if len < USIZE_BYTES {
778             return generic::rev_byte_by_byte(start, end, confirm);
779         }
780 
781         let chunk = end.sub(USIZE_BYTES).cast::<usize>().read_unaligned();
782         if self.has_needle(chunk) {
783             return generic::rev_byte_by_byte(start, end, confirm);
784         }
785 
786         let mut cur = end.sub(end.as_usize() & USIZE_ALIGN);
787         debug_assert!(start <= cur && cur <= end);
788         while cur >= start.add(USIZE_BYTES) {
789             debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES);
790 
791             let chunk = cur.sub(USIZE_BYTES).cast::<usize>().read();
792             if self.has_needle(chunk) {
793                 break;
794             }
795             cur = cur.sub(USIZE_BYTES);
796         }
797         generic::rev_byte_by_byte(start, cur, confirm)
798     }
799 
800     /// Returns an iterator over all occurrences of one of the needle bytes in
801     /// the given haystack.
802     ///
803     /// The iterator returned implements `DoubleEndedIterator`. This means it
804     /// can also be used to find occurrences in reverse order.
iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h>805     pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> {
806         ThreeIter { searcher: self, it: generic::Iter::new(haystack) }
807     }
808 
809     #[inline(always)]
has_needle(&self, chunk: usize) -> bool810     fn has_needle(&self, chunk: usize) -> bool {
811         has_zero_byte(self.v1 ^ chunk)
812             || has_zero_byte(self.v2 ^ chunk)
813             || has_zero_byte(self.v3 ^ chunk)
814     }
815 
816     #[inline(always)]
confirm(&self, haystack_byte: u8) -> bool817     fn confirm(&self, haystack_byte: u8) -> bool {
818         self.s1 == haystack_byte
819             || self.s2 == haystack_byte
820             || self.s3 == haystack_byte
821     }
822 }
823 
824 /// An iterator over all occurrences of three possible bytes in a haystack.
825 ///
826 /// This iterator implements `DoubleEndedIterator`, which means it can also be
827 /// used to find occurrences in reverse order.
828 ///
829 /// This iterator is created by the [`Three::iter`] method.
830 ///
831 /// The lifetime parameters are as follows:
832 ///
833 /// * `'a` refers to the lifetime of the underlying [`Three`] searcher.
834 /// * `'h` refers to the lifetime of the haystack being searched.
835 #[derive(Clone, Debug)]
836 pub struct ThreeIter<'a, 'h> {
837     /// The underlying memchr searcher.
838     searcher: &'a Three,
839     /// Generic iterator implementation.
840     it: generic::Iter<'h>,
841 }
842 
843 impl<'a, 'h> Iterator for ThreeIter<'a, 'h> {
844     type Item = usize;
845 
846     #[inline]
next(&mut self) -> Option<usize>847     fn next(&mut self) -> Option<usize> {
848         // SAFETY: We rely on the generic iterator to provide valid start
849         // and end pointers, but we guarantee that any pointer returned by
850         // 'find_raw' falls within the bounds of the start and end pointer.
851         unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) }
852     }
853 
854     #[inline]
size_hint(&self) -> (usize, Option<usize>)855     fn size_hint(&self) -> (usize, Option<usize>) {
856         self.it.size_hint()
857     }
858 }
859 
860 impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> {
861     #[inline]
next_back(&mut self) -> Option<usize>862     fn next_back(&mut self) -> Option<usize> {
863         // SAFETY: We rely on the generic iterator to provide valid start
864         // and end pointers, but we guarantee that any pointer returned by
865         // 'rfind_raw' falls within the bounds of the start and end pointer.
866         unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) }
867     }
868 }
869 
870 /// Return `true` if `x` contains any zero byte.
871 ///
872 /// That is, this routine treats `x` as a register of 8-bit lanes and returns
873 /// true when any of those lanes is `0`.
874 ///
875 /// From "Matters Computational" by J. Arndt.
876 #[inline(always)]
has_zero_byte(x: usize) -> bool877 fn has_zero_byte(x: usize) -> bool {
878     // "The idea is to subtract one from each of the bytes and then look for
879     // bytes where the borrow propagated all the way to the most significant
880     // bit."
881     const LO: usize = splat(0x01);
882     const HI: usize = splat(0x80);
883 
884     (x.wrapping_sub(LO) & !x & HI) != 0
885 }
886 
887 /// Repeat the given byte into a word size number. That is, every 8 bits
888 /// is equivalent to the given byte. For example, if `b` is `\x4E` or
889 /// `01001110` in binary, then the returned value on a 32-bit system would be:
890 /// `01001110_01001110_01001110_01001110`.
891 #[inline(always)]
splat(b: u8) -> usize892 const fn splat(b: u8) -> usize {
893     // TODO: use `usize::from` once it can be used in const context.
894     (b as usize) * (usize::MAX / 255)
895 }
896 
897 #[cfg(test)]
898 mod tests {
899     use super::*;
900 
901     define_memchr_quickcheck!(super, try_new);
902 
903     #[test]
forward_one()904     fn forward_one() {
905         crate::tests::memchr::Runner::new(1).forward_iter(
906             |haystack, needles| {
907                 Some(One::new(needles[0]).iter(haystack).collect())
908             },
909         )
910     }
911 
912     #[test]
reverse_one()913     fn reverse_one() {
914         crate::tests::memchr::Runner::new(1).reverse_iter(
915             |haystack, needles| {
916                 Some(One::new(needles[0]).iter(haystack).rev().collect())
917             },
918         )
919     }
920 
921     #[test]
count_one()922     fn count_one() {
923         crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| {
924             Some(One::new(needles[0]).iter(haystack).count())
925         })
926     }
927 
928     #[test]
forward_two()929     fn forward_two() {
930         crate::tests::memchr::Runner::new(2).forward_iter(
931             |haystack, needles| {
932                 let n1 = needles.get(0).copied()?;
933                 let n2 = needles.get(1).copied()?;
934                 Some(Two::new(n1, n2).iter(haystack).collect())
935             },
936         )
937     }
938 
939     #[test]
reverse_two()940     fn reverse_two() {
941         crate::tests::memchr::Runner::new(2).reverse_iter(
942             |haystack, needles| {
943                 let n1 = needles.get(0).copied()?;
944                 let n2 = needles.get(1).copied()?;
945                 Some(Two::new(n1, n2).iter(haystack).rev().collect())
946             },
947         )
948     }
949 
950     #[test]
forward_three()951     fn forward_three() {
952         crate::tests::memchr::Runner::new(3).forward_iter(
953             |haystack, needles| {
954                 let n1 = needles.get(0).copied()?;
955                 let n2 = needles.get(1).copied()?;
956                 let n3 = needles.get(2).copied()?;
957                 Some(Three::new(n1, n2, n3).iter(haystack).collect())
958             },
959         )
960     }
961 
962     #[test]
reverse_three()963     fn reverse_three() {
964         crate::tests::memchr::Runner::new(3).reverse_iter(
965             |haystack, needles| {
966                 let n1 = needles.get(0).copied()?;
967                 let n2 = needles.get(1).copied()?;
968                 let n3 = needles.get(2).copied()?;
969                 Some(Three::new(n1, n2, n3).iter(haystack).rev().collect())
970             },
971         )
972     }
973 
974     // This was found by quickcheck in the course of refactoring this crate
975     // after memchr 2.5.0.
976     #[test]
regression_double_ended_iterator()977     fn regression_double_ended_iterator() {
978         let finder = One::new(b'a');
979         let haystack = "a";
980         let mut it = finder.iter(haystack.as_bytes());
981         assert_eq!(Some(0), it.next());
982         assert_eq!(None, it.next_back());
983     }
984 
985     // This regression test was caught by ripgrep's test suite on i686 when
986     // upgrading to memchr 2.6. Namely, something about the \x0B bytes here
987     // screws with the SWAR counting approach I was using. This regression test
988     // prompted me to remove the SWAR counting approach and just replace it
989     // with a byte-at-a-time loop.
990     #[test]
regression_count_new_lines()991     fn regression_count_new_lines() {
992         let haystack = "01234567\x0b\n\x0b\n\x0b\n\x0b\nx";
993         let count = One::new(b'\n').count(haystack.as_bytes());
994         assert_eq!(4, count);
995     }
996 }
997