1 /*!
2 This module provides forward and reverse substring search routines.
3 
4 Unlike the standard library's substring search routines, these work on
5 arbitrary bytes. For all non-empty needles, these routines will report exactly
6 the same values as the corresponding routines in the standard library. For
7 the empty needle, the standard library reports matches only at valid UTF-8
8 boundaries, where as these routines will report matches at every position.
9 
10 Other than being able to work on arbitrary bytes, the primary reason to prefer
11 these routines over the standard library routines is that these will generally
12 be faster. In some cases, significantly so.
13 
14 # Example: iterating over substring matches
15 
16 This example shows how to use [`find_iter`] to find occurrences of a substring
17 in a haystack.
18 
19 ```
20 use memchr::memmem;
21 
22 let haystack = b"foo bar foo baz foo";
23 
24 let mut it = memmem::find_iter(haystack, "foo");
25 assert_eq!(Some(0), it.next());
26 assert_eq!(Some(8), it.next());
27 assert_eq!(Some(16), it.next());
28 assert_eq!(None, it.next());
29 ```
30 
31 # Example: iterating over substring matches in reverse
32 
33 This example shows how to use [`rfind_iter`] to find occurrences of a substring
34 in a haystack starting from the end of the haystack.
35 
36 **NOTE:** This module does not implement double ended iterators, so reverse
37 searches aren't done by calling `rev` on a forward iterator.
38 
39 ```
40 use memchr::memmem;
41 
42 let haystack = b"foo bar foo baz foo";
43 
44 let mut it = memmem::rfind_iter(haystack, "foo");
45 assert_eq!(Some(16), it.next());
46 assert_eq!(Some(8), it.next());
47 assert_eq!(Some(0), it.next());
48 assert_eq!(None, it.next());
49 ```
50 
51 # Example: repeating a search for the same needle
52 
53 It may be possible for the overhead of constructing a substring searcher to be
54 measurable in some workloads. In cases where the same needle is used to search
55 many haystacks, it is possible to do construction once and thus to avoid it for
56 subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for
57 reverse searches).
58 
59 ```
60 use memchr::memmem;
61 
62 let finder = memmem::Finder::new("foo");
63 
64 assert_eq!(Some(4), finder.find(b"baz foo quux"));
65 assert_eq!(None, finder.find(b"quux baz bar"));
66 ```
67 */
68 
69 pub use crate::memmem::searcher::PrefilterConfig as Prefilter;
70 
71 // This is exported here for use in the crate::arch::all::twoway
72 // implementation. This is essentially an abstraction breaker. Namely, the
73 // public API of twoway doesn't support providing a prefilter, but its crate
74 // internal API does. The main reason for this is that I didn't want to do the
75 // API design required to support it without a concrete use case.
76 pub(crate) use crate::memmem::searcher::Pre;
77 
78 use crate::{
79     arch::all::{
80         packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank},
81         rabinkarp,
82     },
83     cow::CowBytes,
84     memmem::searcher::{PrefilterState, Searcher, SearcherRev},
85 };
86 
87 mod searcher;
88 
89 /// Returns an iterator over all non-overlapping occurrences of a substring in
90 /// a haystack.
91 ///
92 /// # Complexity
93 ///
94 /// This routine is guaranteed to have worst case linear time complexity
95 /// with respect to both the needle and the haystack. That is, this runs
96 /// in `O(needle.len() + haystack.len())` time.
97 ///
98 /// This routine is also guaranteed to have worst case constant space
99 /// complexity.
100 ///
101 /// # Examples
102 ///
103 /// Basic usage:
104 ///
105 /// ```
106 /// use memchr::memmem;
107 ///
108 /// let haystack = b"foo bar foo baz foo";
109 /// let mut it = memmem::find_iter(haystack, b"foo");
110 /// assert_eq!(Some(0), it.next());
111 /// assert_eq!(Some(8), it.next());
112 /// assert_eq!(Some(16), it.next());
113 /// assert_eq!(None, it.next());
114 /// ```
115 #[inline]
find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( haystack: &'h [u8], needle: &'n N, ) -> FindIter<'h, 'n>116 pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(
117     haystack: &'h [u8],
118     needle: &'n N,
119 ) -> FindIter<'h, 'n> {
120     FindIter::new(haystack, Finder::new(needle))
121 }
122 
123 /// Returns a reverse iterator over all non-overlapping occurrences of a
124 /// substring in a haystack.
125 ///
126 /// # Complexity
127 ///
128 /// This routine is guaranteed to have worst case linear time complexity
129 /// with respect to both the needle and the haystack. That is, this runs
130 /// in `O(needle.len() + haystack.len())` time.
131 ///
132 /// This routine is also guaranteed to have worst case constant space
133 /// complexity.
134 ///
135 /// # Examples
136 ///
137 /// Basic usage:
138 ///
139 /// ```
140 /// use memchr::memmem;
141 ///
142 /// let haystack = b"foo bar foo baz foo";
143 /// let mut it = memmem::rfind_iter(haystack, b"foo");
144 /// assert_eq!(Some(16), it.next());
145 /// assert_eq!(Some(8), it.next());
146 /// assert_eq!(Some(0), it.next());
147 /// assert_eq!(None, it.next());
148 /// ```
149 #[inline]
rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( haystack: &'h [u8], needle: &'n N, ) -> FindRevIter<'h, 'n>150 pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(
151     haystack: &'h [u8],
152     needle: &'n N,
153 ) -> FindRevIter<'h, 'n> {
154     FindRevIter::new(haystack, FinderRev::new(needle))
155 }
156 
157 /// Returns the index of the first occurrence of the given needle.
158 ///
159 /// Note that if you're are searching for the same needle in many different
160 /// small haystacks, it may be faster to initialize a [`Finder`] once,
161 /// and reuse it for each search.
162 ///
163 /// # Complexity
164 ///
165 /// This routine is guaranteed to have worst case linear time complexity
166 /// with respect to both the needle and the haystack. That is, this runs
167 /// in `O(needle.len() + haystack.len())` time.
168 ///
169 /// This routine is also guaranteed to have worst case constant space
170 /// complexity.
171 ///
172 /// # Examples
173 ///
174 /// Basic usage:
175 ///
176 /// ```
177 /// use memchr::memmem;
178 ///
179 /// let haystack = b"foo bar baz";
180 /// assert_eq!(Some(0), memmem::find(haystack, b"foo"));
181 /// assert_eq!(Some(4), memmem::find(haystack, b"bar"));
182 /// assert_eq!(None, memmem::find(haystack, b"quux"));
183 /// ```
184 #[inline]
find(haystack: &[u8], needle: &[u8]) -> Option<usize>185 pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
186     if haystack.len() < 64 {
187         rabinkarp::Finder::new(needle).find(haystack, needle)
188     } else {
189         Finder::new(needle).find(haystack)
190     }
191 }
192 
193 /// Returns the index of the last occurrence of the given needle.
194 ///
195 /// Note that if you're are searching for the same needle in many different
196 /// small haystacks, it may be faster to initialize a [`FinderRev`] once,
197 /// and reuse it for each search.
198 ///
199 /// # Complexity
200 ///
201 /// This routine is guaranteed to have worst case linear time complexity
202 /// with respect to both the needle and the haystack. That is, this runs
203 /// in `O(needle.len() + haystack.len())` time.
204 ///
205 /// This routine is also guaranteed to have worst case constant space
206 /// complexity.
207 ///
208 /// # Examples
209 ///
210 /// Basic usage:
211 ///
212 /// ```
213 /// use memchr::memmem;
214 ///
215 /// let haystack = b"foo bar baz";
216 /// assert_eq!(Some(0), memmem::rfind(haystack, b"foo"));
217 /// assert_eq!(Some(4), memmem::rfind(haystack, b"bar"));
218 /// assert_eq!(Some(8), memmem::rfind(haystack, b"ba"));
219 /// assert_eq!(None, memmem::rfind(haystack, b"quux"));
220 /// ```
221 #[inline]
rfind(haystack: &[u8], needle: &[u8]) -> Option<usize>222 pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
223     if haystack.len() < 64 {
224         rabinkarp::FinderRev::new(needle).rfind(haystack, needle)
225     } else {
226         FinderRev::new(needle).rfind(haystack)
227     }
228 }
229 
230 /// An iterator over non-overlapping substring matches.
231 ///
232 /// Matches are reported by the byte offset at which they begin.
233 ///
234 /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
235 /// needle.
236 #[derive(Debug, Clone)]
237 pub struct FindIter<'h, 'n> {
238     haystack: &'h [u8],
239     prestate: PrefilterState,
240     finder: Finder<'n>,
241     pos: usize,
242 }
243 
244 impl<'h, 'n> FindIter<'h, 'n> {
245     #[inline(always)]
new( haystack: &'h [u8], finder: Finder<'n>, ) -> FindIter<'h, 'n>246     pub(crate) fn new(
247         haystack: &'h [u8],
248         finder: Finder<'n>,
249     ) -> FindIter<'h, 'n> {
250         let prestate = PrefilterState::new();
251         FindIter { haystack, prestate, finder, pos: 0 }
252     }
253 
254     /// Convert this iterator into its owned variant, such that it no longer
255     /// borrows the finder and needle.
256     ///
257     /// If this is already an owned iterator, then this is a no-op. Otherwise,
258     /// this copies the needle.
259     ///
260     /// This is only available when the `alloc` feature is enabled.
261     #[cfg(feature = "alloc")]
262     #[inline]
into_owned(self) -> FindIter<'h, 'static>263     pub fn into_owned(self) -> FindIter<'h, 'static> {
264         FindIter {
265             haystack: self.haystack,
266             prestate: self.prestate,
267             finder: self.finder.into_owned(),
268             pos: self.pos,
269         }
270     }
271 }
272 
273 impl<'h, 'n> Iterator for FindIter<'h, 'n> {
274     type Item = usize;
275 
next(&mut self) -> Option<usize>276     fn next(&mut self) -> Option<usize> {
277         let needle = self.finder.needle();
278         let haystack = self.haystack.get(self.pos..)?;
279         let idx =
280             self.finder.searcher.find(&mut self.prestate, haystack, needle)?;
281 
282         let pos = self.pos + idx;
283         self.pos = pos + needle.len().max(1);
284 
285         Some(pos)
286     }
287 
size_hint(&self) -> (usize, Option<usize>)288     fn size_hint(&self) -> (usize, Option<usize>) {
289         // The largest possible number of non-overlapping matches is the
290         // quotient of the haystack and the needle (or the length of the
291         // haystack, if the needle is empty)
292         match self.haystack.len().checked_sub(self.pos) {
293             None => (0, Some(0)),
294             Some(haystack_len) => match self.finder.needle().len() {
295                 // Empty needles always succeed and match at every point
296                 // (including the very end)
297                 0 => (
298                     haystack_len.saturating_add(1),
299                     haystack_len.checked_add(1),
300                 ),
301                 needle_len => (0, Some(haystack_len / needle_len)),
302             },
303         }
304     }
305 }
306 
307 /// An iterator over non-overlapping substring matches in reverse.
308 ///
309 /// Matches are reported by the byte offset at which they begin.
310 ///
311 /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
312 /// needle.
313 #[derive(Clone, Debug)]
314 pub struct FindRevIter<'h, 'n> {
315     haystack: &'h [u8],
316     finder: FinderRev<'n>,
317     /// When searching with an empty needle, this gets set to `None` after
318     /// we've yielded the last element at `0`.
319     pos: Option<usize>,
320 }
321 
322 impl<'h, 'n> FindRevIter<'h, 'n> {
323     #[inline(always)]
new( haystack: &'h [u8], finder: FinderRev<'n>, ) -> FindRevIter<'h, 'n>324     pub(crate) fn new(
325         haystack: &'h [u8],
326         finder: FinderRev<'n>,
327     ) -> FindRevIter<'h, 'n> {
328         let pos = Some(haystack.len());
329         FindRevIter { haystack, finder, pos }
330     }
331 
332     /// Convert this iterator into its owned variant, such that it no longer
333     /// borrows the finder and needle.
334     ///
335     /// If this is already an owned iterator, then this is a no-op. Otherwise,
336     /// this copies the needle.
337     ///
338     /// This is only available when the `std` feature is enabled.
339     #[cfg(feature = "alloc")]
340     #[inline]
into_owned(self) -> FindRevIter<'h, 'static>341     pub fn into_owned(self) -> FindRevIter<'h, 'static> {
342         FindRevIter {
343             haystack: self.haystack,
344             finder: self.finder.into_owned(),
345             pos: self.pos,
346         }
347     }
348 }
349 
350 impl<'h, 'n> Iterator for FindRevIter<'h, 'n> {
351     type Item = usize;
352 
next(&mut self) -> Option<usize>353     fn next(&mut self) -> Option<usize> {
354         let pos = match self.pos {
355             None => return None,
356             Some(pos) => pos,
357         };
358         let result = self.finder.rfind(&self.haystack[..pos]);
359         match result {
360             None => None,
361             Some(i) => {
362                 if pos == i {
363                     self.pos = pos.checked_sub(1);
364                 } else {
365                     self.pos = Some(i);
366                 }
367                 Some(i)
368             }
369         }
370     }
371 }
372 
373 /// A single substring searcher fixed to a particular needle.
374 ///
375 /// The purpose of this type is to permit callers to construct a substring
376 /// searcher that can be used to search haystacks without the overhead of
377 /// constructing the searcher in the first place. This is a somewhat niche
378 /// concern when it's necessary to re-use the same needle to search multiple
379 /// different haystacks with as little overhead as possible. In general, using
380 /// [`find`] is good enough, but `Finder` is useful when you can meaningfully
381 /// observe searcher construction time in a profile.
382 ///
383 /// When the `std` feature is enabled, then this type has an `into_owned`
384 /// version which permits building a `Finder` that is not connected to
385 /// the lifetime of its needle.
386 #[derive(Clone, Debug)]
387 pub struct Finder<'n> {
388     needle: CowBytes<'n>,
389     searcher: Searcher,
390 }
391 
392 impl<'n> Finder<'n> {
393     /// Create a new finder for the given needle.
394     #[inline]
new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n>395     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> {
396         FinderBuilder::new().build_forward(needle)
397     }
398 
399     /// Returns the index of the first occurrence of this needle in the given
400     /// haystack.
401     ///
402     /// # Complexity
403     ///
404     /// This routine is guaranteed to have worst case linear time complexity
405     /// with respect to both the needle and the haystack. That is, this runs
406     /// in `O(needle.len() + haystack.len())` time.
407     ///
408     /// This routine is also guaranteed to have worst case constant space
409     /// complexity.
410     ///
411     /// # Examples
412     ///
413     /// Basic usage:
414     ///
415     /// ```
416     /// use memchr::memmem::Finder;
417     ///
418     /// let haystack = b"foo bar baz";
419     /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
420     /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
421     /// assert_eq!(None, Finder::new("quux").find(haystack));
422     /// ```
423     #[inline]
find(&self, haystack: &[u8]) -> Option<usize>424     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
425         let mut prestate = PrefilterState::new();
426         let needle = self.needle.as_slice();
427         self.searcher.find(&mut prestate, haystack, needle)
428     }
429 
430     /// Returns an iterator over all occurrences of a substring in a haystack.
431     ///
432     /// # Complexity
433     ///
434     /// This routine is guaranteed to have worst case linear time complexity
435     /// with respect to both the needle and the haystack. That is, this runs
436     /// in `O(needle.len() + haystack.len())` time.
437     ///
438     /// This routine is also guaranteed to have worst case constant space
439     /// complexity.
440     ///
441     /// # Examples
442     ///
443     /// Basic usage:
444     ///
445     /// ```
446     /// use memchr::memmem::Finder;
447     ///
448     /// let haystack = b"foo bar foo baz foo";
449     /// let finder = Finder::new(b"foo");
450     /// let mut it = finder.find_iter(haystack);
451     /// assert_eq!(Some(0), it.next());
452     /// assert_eq!(Some(8), it.next());
453     /// assert_eq!(Some(16), it.next());
454     /// assert_eq!(None, it.next());
455     /// ```
456     #[inline]
find_iter<'a, 'h>( &'a self, haystack: &'h [u8], ) -> FindIter<'h, 'a>457     pub fn find_iter<'a, 'h>(
458         &'a self,
459         haystack: &'h [u8],
460     ) -> FindIter<'h, 'a> {
461         FindIter::new(haystack, self.as_ref())
462     }
463 
464     /// Convert this finder into its owned variant, such that it no longer
465     /// borrows the needle.
466     ///
467     /// If this is already an owned finder, then this is a no-op. Otherwise,
468     /// this copies the needle.
469     ///
470     /// This is only available when the `alloc` feature is enabled.
471     #[cfg(feature = "alloc")]
472     #[inline]
into_owned(self) -> Finder<'static>473     pub fn into_owned(self) -> Finder<'static> {
474         Finder {
475             needle: self.needle.into_owned(),
476             searcher: self.searcher.clone(),
477         }
478     }
479 
480     /// Convert this finder into its borrowed variant.
481     ///
482     /// This is primarily useful if your finder is owned and you'd like to
483     /// store its borrowed variant in some intermediate data structure.
484     ///
485     /// Note that the lifetime parameter of the returned finder is tied to the
486     /// lifetime of `self`, and may be shorter than the `'n` lifetime of the
487     /// needle itself. Namely, a finder's needle can be either borrowed or
488     /// owned, so the lifetime of the needle returned must necessarily be the
489     /// shorter of the two.
490     #[inline]
as_ref(&self) -> Finder<'_>491     pub fn as_ref(&self) -> Finder<'_> {
492         Finder {
493             needle: CowBytes::new(self.needle()),
494             searcher: self.searcher.clone(),
495         }
496     }
497 
498     /// Returns the needle that this finder searches for.
499     ///
500     /// Note that the lifetime of the needle returned is tied to the lifetime
501     /// of the finder, and may be shorter than the `'n` lifetime. Namely, a
502     /// finder's needle can be either borrowed or owned, so the lifetime of the
503     /// needle returned must necessarily be the shorter of the two.
504     #[inline]
needle(&self) -> &[u8]505     pub fn needle(&self) -> &[u8] {
506         self.needle.as_slice()
507     }
508 }
509 
510 /// A single substring reverse searcher fixed to a particular needle.
511 ///
512 /// The purpose of this type is to permit callers to construct a substring
513 /// searcher that can be used to search haystacks without the overhead of
514 /// constructing the searcher in the first place. This is a somewhat niche
515 /// concern when it's necessary to re-use the same needle to search multiple
516 /// different haystacks with as little overhead as possible. In general,
517 /// using [`rfind`] is good enough, but `FinderRev` is useful when you can
518 /// meaningfully observe searcher construction time in a profile.
519 ///
520 /// When the `std` feature is enabled, then this type has an `into_owned`
521 /// version which permits building a `FinderRev` that is not connected to
522 /// the lifetime of its needle.
523 #[derive(Clone, Debug)]
524 pub struct FinderRev<'n> {
525     needle: CowBytes<'n>,
526     searcher: SearcherRev,
527 }
528 
529 impl<'n> FinderRev<'n> {
530     /// Create a new reverse finder for the given needle.
531     #[inline]
new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n>532     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> {
533         FinderBuilder::new().build_reverse(needle)
534     }
535 
536     /// Returns the index of the last occurrence of this needle in the given
537     /// haystack.
538     ///
539     /// The haystack may be any type that can be cheaply converted into a
540     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
541     ///
542     /// # Complexity
543     ///
544     /// This routine is guaranteed to have worst case linear time complexity
545     /// with respect to both the needle and the haystack. That is, this runs
546     /// in `O(needle.len() + haystack.len())` time.
547     ///
548     /// This routine is also guaranteed to have worst case constant space
549     /// complexity.
550     ///
551     /// # Examples
552     ///
553     /// Basic usage:
554     ///
555     /// ```
556     /// use memchr::memmem::FinderRev;
557     ///
558     /// let haystack = b"foo bar baz";
559     /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack));
560     /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack));
561     /// assert_eq!(None, FinderRev::new("quux").rfind(haystack));
562     /// ```
rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize>563     pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
564         self.searcher.rfind(haystack.as_ref(), self.needle.as_slice())
565     }
566 
567     /// Returns a reverse iterator over all occurrences of a substring in a
568     /// haystack.
569     ///
570     /// # Complexity
571     ///
572     /// This routine is guaranteed to have worst case linear time complexity
573     /// with respect to both the needle and the haystack. That is, this runs
574     /// in `O(needle.len() + haystack.len())` time.
575     ///
576     /// This routine is also guaranteed to have worst case constant space
577     /// complexity.
578     ///
579     /// # Examples
580     ///
581     /// Basic usage:
582     ///
583     /// ```
584     /// use memchr::memmem::FinderRev;
585     ///
586     /// let haystack = b"foo bar foo baz foo";
587     /// let finder = FinderRev::new(b"foo");
588     /// let mut it = finder.rfind_iter(haystack);
589     /// assert_eq!(Some(16), it.next());
590     /// assert_eq!(Some(8), it.next());
591     /// assert_eq!(Some(0), it.next());
592     /// assert_eq!(None, it.next());
593     /// ```
594     #[inline]
rfind_iter<'a, 'h>( &'a self, haystack: &'h [u8], ) -> FindRevIter<'h, 'a>595     pub fn rfind_iter<'a, 'h>(
596         &'a self,
597         haystack: &'h [u8],
598     ) -> FindRevIter<'h, 'a> {
599         FindRevIter::new(haystack, self.as_ref())
600     }
601 
602     /// Convert this finder into its owned variant, such that it no longer
603     /// borrows the needle.
604     ///
605     /// If this is already an owned finder, then this is a no-op. Otherwise,
606     /// this copies the needle.
607     ///
608     /// This is only available when the `std` feature is enabled.
609     #[cfg(feature = "alloc")]
610     #[inline]
into_owned(self) -> FinderRev<'static>611     pub fn into_owned(self) -> FinderRev<'static> {
612         FinderRev {
613             needle: self.needle.into_owned(),
614             searcher: self.searcher.clone(),
615         }
616     }
617 
618     /// Convert this finder into its borrowed variant.
619     ///
620     /// This is primarily useful if your finder is owned and you'd like to
621     /// store its borrowed variant in some intermediate data structure.
622     ///
623     /// Note that the lifetime parameter of the returned finder is tied to the
624     /// lifetime of `self`, and may be shorter than the `'n` lifetime of the
625     /// needle itself. Namely, a finder's needle can be either borrowed or
626     /// owned, so the lifetime of the needle returned must necessarily be the
627     /// shorter of the two.
628     #[inline]
as_ref(&self) -> FinderRev<'_>629     pub fn as_ref(&self) -> FinderRev<'_> {
630         FinderRev {
631             needle: CowBytes::new(self.needle()),
632             searcher: self.searcher.clone(),
633         }
634     }
635 
636     /// Returns the needle that this finder searches for.
637     ///
638     /// Note that the lifetime of the needle returned is tied to the lifetime
639     /// of the finder, and may be shorter than the `'n` lifetime. Namely, a
640     /// finder's needle can be either borrowed or owned, so the lifetime of the
641     /// needle returned must necessarily be the shorter of the two.
642     #[inline]
needle(&self) -> &[u8]643     pub fn needle(&self) -> &[u8] {
644         self.needle.as_slice()
645     }
646 }
647 
648 /// A builder for constructing non-default forward or reverse memmem finders.
649 ///
650 /// A builder is primarily useful for configuring a substring searcher.
651 /// Currently, the only configuration exposed is the ability to disable
652 /// heuristic prefilters used to speed up certain searches.
653 #[derive(Clone, Debug, Default)]
654 pub struct FinderBuilder {
655     prefilter: Prefilter,
656 }
657 
658 impl FinderBuilder {
659     /// Create a new finder builder with default settings.
new() -> FinderBuilder660     pub fn new() -> FinderBuilder {
661         FinderBuilder::default()
662     }
663 
664     /// Build a forward finder using the given needle from the current
665     /// settings.
build_forward<'n, B: ?Sized + AsRef<[u8]>>( &self, needle: &'n B, ) -> Finder<'n>666     pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>(
667         &self,
668         needle: &'n B,
669     ) -> Finder<'n> {
670         self.build_forward_with_ranker(DefaultFrequencyRank, needle)
671     }
672 
673     /// Build a forward finder using the given needle and a custom heuristic for
674     /// determining the frequency of a given byte in the dataset.
675     /// See [`HeuristicFrequencyRank`] for more details.
build_forward_with_ranker< 'n, R: HeuristicFrequencyRank, B: ?Sized + AsRef<[u8]>, >( &self, ranker: R, needle: &'n B, ) -> Finder<'n>676     pub fn build_forward_with_ranker<
677         'n,
678         R: HeuristicFrequencyRank,
679         B: ?Sized + AsRef<[u8]>,
680     >(
681         &self,
682         ranker: R,
683         needle: &'n B,
684     ) -> Finder<'n> {
685         let needle = needle.as_ref();
686         Finder {
687             needle: CowBytes::new(needle),
688             searcher: Searcher::new(self.prefilter, ranker, needle),
689         }
690     }
691 
692     /// Build a reverse finder using the given needle from the current
693     /// settings.
build_reverse<'n, B: ?Sized + AsRef<[u8]>>( &self, needle: &'n B, ) -> FinderRev<'n>694     pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>(
695         &self,
696         needle: &'n B,
697     ) -> FinderRev<'n> {
698         let needle = needle.as_ref();
699         FinderRev {
700             needle: CowBytes::new(needle),
701             searcher: SearcherRev::new(needle),
702         }
703     }
704 
705     /// Configure the prefilter setting for the finder.
706     ///
707     /// See the documentation for [`Prefilter`] for more discussion on why
708     /// you might want to configure this.
prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder709     pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder {
710         self.prefilter = prefilter;
711         self
712     }
713 }
714 
715 #[cfg(test)]
716 mod tests {
717     use super::*;
718 
719     define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h)));
720     define_substring_reverse_quickcheck!(|h, n| Some(
721         FinderRev::new(n).rfind(h)
722     ));
723 
724     #[test]
forward()725     fn forward() {
726         crate::tests::substring::Runner::new()
727             .fwd(|h, n| Some(Finder::new(n).find(h)))
728             .run();
729     }
730 
731     #[test]
reverse()732     fn reverse() {
733         crate::tests::substring::Runner::new()
734             .rev(|h, n| Some(FinderRev::new(n).rfind(h)))
735             .run();
736     }
737 }
738