1 use core::{
2     fmt::Debug,
3     panic::{RefUnwindSafe, UnwindSafe},
4 };
5 
6 use alloc::sync::Arc;
7 
8 use regex_syntax::hir::{literal, Hir};
9 
10 use crate::{
11     meta::{
12         error::{BuildError, RetryError, RetryFailError, RetryQuadraticError},
13         regex::{Cache, RegexInfo},
14         reverse_inner, wrappers,
15     },
16     nfa::thompson::{self, WhichCaptures, NFA},
17     util::{
18         captures::{Captures, GroupInfo},
19         look::LookMatcher,
20         prefilter::{self, Prefilter, PrefilterI},
21         primitives::{NonMaxUsize, PatternID},
22         search::{Anchored, HalfMatch, Input, Match, MatchKind, PatternSet},
23     },
24 };
25 
26 /// A trait that represents a single meta strategy. Its main utility is in
27 /// providing a way to do dynamic dispatch over a few choices.
28 ///
29 /// Why dynamic dispatch? I actually don't have a super compelling reason, and
30 /// importantly, I have not benchmarked it with the main alternative: an enum.
31 /// I went with dynamic dispatch initially because the regex engine search code
32 /// really can't be inlined into caller code in most cases because it's just
33 /// too big. In other words, it is already expected that every regex search
34 /// will entail at least the cost of a function call.
35 ///
36 /// I do wonder whether using enums would result in better codegen overall
37 /// though. It's a worthwhile experiment to try. Probably the most interesting
38 /// benchmark to run in such a case would be one with a high match count. That
39 /// is, a benchmark to test the overall latency of a search call.
40 pub(super) trait Strategy:
41     Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
42 {
group_info(&self) -> &GroupInfo43     fn group_info(&self) -> &GroupInfo;
44 
create_cache(&self) -> Cache45     fn create_cache(&self) -> Cache;
46 
reset_cache(&self, cache: &mut Cache)47     fn reset_cache(&self, cache: &mut Cache);
48 
is_accelerated(&self) -> bool49     fn is_accelerated(&self) -> bool;
50 
memory_usage(&self) -> usize51     fn memory_usage(&self) -> usize;
52 
search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>53     fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>;
54 
search_half( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>55     fn search_half(
56         &self,
57         cache: &mut Cache,
58         input: &Input<'_>,
59     ) -> Option<HalfMatch>;
60 
is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool61     fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool;
62 
search_slots( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>63     fn search_slots(
64         &self,
65         cache: &mut Cache,
66         input: &Input<'_>,
67         slots: &mut [Option<NonMaxUsize>],
68     ) -> Option<PatternID>;
69 
which_overlapping_matches( &self, cache: &mut Cache, input: &Input<'_>, patset: &mut PatternSet, )70     fn which_overlapping_matches(
71         &self,
72         cache: &mut Cache,
73         input: &Input<'_>,
74         patset: &mut PatternSet,
75     );
76 }
77 
new( info: &RegexInfo, hirs: &[&Hir], ) -> Result<Arc<dyn Strategy>, BuildError>78 pub(super) fn new(
79     info: &RegexInfo,
80     hirs: &[&Hir],
81 ) -> Result<Arc<dyn Strategy>, BuildError> {
82     // At this point, we're committed to a regex engine of some kind. So pull
83     // out a prefilter if we can, which will feed to each of the constituent
84     // regex engines.
85     let pre = if info.is_always_anchored_start() {
86         // PERF: I'm not sure we necessarily want to do this... We may want to
87         // run a prefilter for quickly rejecting in some cases. The problem
88         // is that anchored searches overlap quite a bit with the use case
89         // of "run a regex on every line to extract data." In that case, the
90         // regex always matches, so running a prefilter doesn't really help us
91         // there. The main place where a prefilter helps in an anchored search
92         // is if the anchored search is not expected to match frequently. That
93         // is, the prefilter gives us a way to possibly reject a haystack very
94         // quickly.
95         //
96         // Maybe we should do use a prefilter, but only for longer haystacks?
97         // Or maybe we should only use a prefilter when we think it's "fast"?
98         //
99         // Interestingly, I think we currently lack the infrastructure for
100         // disabling a prefilter based on haystack length. That would probably
101         // need to be a new 'Input' option. (Interestingly, an 'Input' used to
102         // carry a 'Prefilter' with it, but I moved away from that.)
103         debug!("skipping literal extraction since regex is anchored");
104         None
105     } else if let Some(pre) = info.config().get_prefilter() {
106         debug!(
107             "skipping literal extraction since the caller provided a prefilter"
108         );
109         Some(pre.clone())
110     } else if info.config().get_auto_prefilter() {
111         let kind = info.config().get_match_kind();
112         let prefixes = crate::util::prefilter::prefixes(kind, hirs);
113         // If we can build a full `Strategy` from just the extracted prefixes,
114         // then we can short-circuit and avoid building a regex engine at all.
115         if let Some(pre) = Pre::from_prefixes(info, &prefixes) {
116             debug!(
117                 "found that the regex can be broken down to a literal \
118                  search, avoiding the regex engine entirely",
119             );
120             return Ok(pre);
121         }
122         // This now attempts another short-circuit of the regex engine: if we
123         // have a huge alternation of just plain literals, then we can just use
124         // Aho-Corasick for that and avoid the regex engine entirely.
125         //
126         // You might think this case would just be handled by
127         // `Pre::from_prefixes`, but that technique relies on heuristic literal
128         // extraction from the corresponding `Hir`. That works, but part of
129         // heuristics limit the size and number of literals returned. This case
130         // will specifically handle patterns with very large alternations.
131         //
132         // One wonders if we should just roll this our heuristic literal
133         // extraction, and then I think this case could disappear entirely.
134         if let Some(pre) = Pre::from_alternation_literals(info, hirs) {
135             debug!(
136                 "found plain alternation of literals, \
137                  avoiding regex engine entirely and using Aho-Corasick"
138             );
139             return Ok(pre);
140         }
141         prefixes.literals().and_then(|strings| {
142             debug!(
143                 "creating prefilter from {} literals: {:?}",
144                 strings.len(),
145                 strings,
146             );
147             Prefilter::new(kind, strings)
148         })
149     } else {
150         debug!("skipping literal extraction since prefilters were disabled");
151         None
152     };
153     let mut core = Core::new(info.clone(), pre.clone(), hirs)?;
154     // Now that we have our core regex engines built, there are a few cases
155     // where we can do a little bit better than just a normal "search forward
156     // and maybe use a prefilter when in a start state." However, these cases
157     // may not always work or otherwise build on top of the Core searcher.
158     // For example, the reverse anchored optimization seems like it might
159     // always work, but only the DFAs support reverse searching and the DFAs
160     // might give up or quit for reasons. If we had, e.g., a PikeVM that
161     // supported reverse searching, then we could avoid building a full Core
162     // engine for this case.
163     core = match ReverseAnchored::new(core) {
164         Err(core) => core,
165         Ok(ra) => {
166             debug!("using reverse anchored strategy");
167             return Ok(Arc::new(ra));
168         }
169     };
170     core = match ReverseSuffix::new(core, hirs) {
171         Err(core) => core,
172         Ok(rs) => {
173             debug!("using reverse suffix strategy");
174             return Ok(Arc::new(rs));
175         }
176     };
177     core = match ReverseInner::new(core, hirs) {
178         Err(core) => core,
179         Ok(ri) => {
180             debug!("using reverse inner strategy");
181             return Ok(Arc::new(ri));
182         }
183     };
184     debug!("using core strategy");
185     Ok(Arc::new(core))
186 }
187 
188 #[derive(Clone, Debug)]
189 struct Pre<P> {
190     pre: P,
191     group_info: GroupInfo,
192 }
193 
194 impl<P: PrefilterI> Pre<P> {
new(pre: P) -> Arc<dyn Strategy>195     fn new(pre: P) -> Arc<dyn Strategy> {
196         // The only thing we support when we use prefilters directly as a
197         // strategy is the start and end of the overall match for a single
198         // pattern. In other words, exactly one implicit capturing group. Which
199         // is exactly what we use here for a GroupInfo.
200         let group_info = GroupInfo::new([[None::<&str>]]).unwrap();
201         Arc::new(Pre { pre, group_info })
202     }
203 }
204 
205 // This is a little weird, but we don't actually care about the type parameter
206 // here because we're selecting which underlying prefilter to use. So we just
207 // define it on an arbitrary type.
208 impl Pre<()> {
209     /// Given a sequence of prefixes, attempt to return a full `Strategy` using
210     /// just the prefixes.
211     ///
212     /// Basically, this occurs when the prefixes given not just prefixes,
213     /// but an enumeration of the entire language matched by the regular
214     /// expression.
215     ///
216     /// A number of other conditions need to be true too. For example, there
217     /// can be only one pattern, the number of explicit capture groups is 0, no
218     /// look-around assertions and so on.
219     ///
220     /// Note that this ignores `Config::get_auto_prefilter` because if this
221     /// returns something, then it isn't a prefilter but a matcher itself.
222     /// Therefore, it shouldn't suffer from the problems typical to prefilters
223     /// (such as a high false positive rate).
from_prefixes( info: &RegexInfo, prefixes: &literal::Seq, ) -> Option<Arc<dyn Strategy>>224     fn from_prefixes(
225         info: &RegexInfo,
226         prefixes: &literal::Seq,
227     ) -> Option<Arc<dyn Strategy>> {
228         let kind = info.config().get_match_kind();
229         // Check to see if our prefixes are exact, which means we might be
230         // able to bypass the regex engine entirely and just rely on literal
231         // searches.
232         if !prefixes.is_exact() {
233             return None;
234         }
235         // We also require that we have a single regex pattern. Namely,
236         // we reuse the prefilter infrastructure to implement search and
237         // prefilters only report spans. Prefilters don't know about pattern
238         // IDs. The multi-regex case isn't a lost cause, we might still use
239         // Aho-Corasick and we might still just use a regular prefilter, but
240         // that's done below.
241         if info.pattern_len() != 1 {
242             return None;
243         }
244         // We can't have any capture groups either. The literal engines don't
245         // know how to deal with things like '(foo)(bar)'. In that case, a
246         // prefilter will just be used and then the regex engine will resolve
247         // the capture groups.
248         if info.props()[0].explicit_captures_len() != 0 {
249             return None;
250         }
251         // We also require that it has zero look-around assertions. Namely,
252         // literal extraction treats look-around assertions as if they match
253         // *every* empty string. But of course, that isn't true. So for
254         // example, 'foo\bquux' never matches anything, but 'fooquux' is
255         // extracted from that as an exact literal. Such cases should just run
256         // the regex engine. 'fooquux' will be used as a normal prefilter, and
257         // then the regex engine will try to look for an actual match.
258         if !info.props()[0].look_set().is_empty() {
259             return None;
260         }
261         // Finally, currently, our prefilters are all oriented around
262         // leftmost-first match semantics, so don't try to use them if the
263         // caller asked for anything else.
264         if kind != MatchKind::LeftmostFirst {
265             return None;
266         }
267         // The above seems like a lot of requirements to meet, but it applies
268         // to a lot of cases. 'foo', '[abc][123]' and 'foo|bar|quux' all meet
269         // the above criteria, for example.
270         //
271         // Note that this is effectively a latency optimization. If we didn't
272         // do this, then the extracted literals would still get bundled into
273         // a prefilter, and every regex engine capable of running unanchored
274         // searches supports prefilters. So this optimization merely sidesteps
275         // having to run the regex engine at all to confirm the match. Thus, it
276         // decreases the latency of a match.
277 
278         // OK because we know the set is exact and thus finite.
279         let prefixes = prefixes.literals().unwrap();
280         debug!(
281             "trying to bypass regex engine by creating \
282              prefilter from {} literals: {:?}",
283             prefixes.len(),
284             prefixes,
285         );
286         let choice = match prefilter::Choice::new(kind, prefixes) {
287             Some(choice) => choice,
288             None => {
289                 debug!(
290                     "regex bypass failed because no prefilter could be built"
291                 );
292                 return None;
293             }
294         };
295         let strat: Arc<dyn Strategy> = match choice {
296             prefilter::Choice::Memchr(pre) => Pre::new(pre),
297             prefilter::Choice::Memchr2(pre) => Pre::new(pre),
298             prefilter::Choice::Memchr3(pre) => Pre::new(pre),
299             prefilter::Choice::Memmem(pre) => Pre::new(pre),
300             prefilter::Choice::Teddy(pre) => Pre::new(pre),
301             prefilter::Choice::ByteSet(pre) => Pre::new(pre),
302             prefilter::Choice::AhoCorasick(pre) => Pre::new(pre),
303         };
304         Some(strat)
305     }
306 
307     /// Attempts to extract an alternation of literals, and if it's deemed
308     /// worth doing, returns an Aho-Corasick prefilter as a strategy.
309     ///
310     /// And currently, this only returns something when 'hirs.len() == 1'. This
311     /// could in theory do something if there are multiple HIRs where all of
312     /// them are alternation of literals, but I haven't had the time to go down
313     /// that path yet.
from_alternation_literals( info: &RegexInfo, hirs: &[&Hir], ) -> Option<Arc<dyn Strategy>>314     fn from_alternation_literals(
315         info: &RegexInfo,
316         hirs: &[&Hir],
317     ) -> Option<Arc<dyn Strategy>> {
318         use crate::util::prefilter::AhoCorasick;
319 
320         let lits = crate::meta::literal::alternation_literals(info, hirs)?;
321         let ac = AhoCorasick::new(MatchKind::LeftmostFirst, &lits)?;
322         Some(Pre::new(ac))
323     }
324 }
325 
326 // This implements Strategy for anything that implements PrefilterI.
327 //
328 // Note that this must only be used for regexes of length 1. Multi-regexes
329 // don't work here. The prefilter interface only provides the span of a match
330 // and not the pattern ID. (I did consider making it more expressive, but I
331 // couldn't figure out how to tie everything together elegantly.) Thus, so long
332 // as the regex only contains one pattern, we can simply assume that a match
333 // corresponds to PatternID::ZERO. And indeed, that's what we do here.
334 //
335 // In practice, since this impl is used to report matches directly and thus
336 // completely bypasses the regex engine, we only wind up using this under the
337 // following restrictions:
338 //
339 // * There must be only one pattern. As explained above.
340 // * The literal sequence must be finite and only contain exact literals.
341 // * There must not be any look-around assertions. If there are, the literals
342 // extracted might be exact, but a match doesn't necessarily imply an overall
343 // match. As a trivial example, 'foo\bbar' does not match 'foobar'.
344 // * The pattern must not have any explicit capturing groups. If it does, the
345 // caller might expect them to be resolved. e.g., 'foo(bar)'.
346 //
347 // So when all of those things are true, we use a prefilter directly as a
348 // strategy.
349 //
350 // In the case where the number of patterns is more than 1, we don't use this
351 // but do use a special Aho-Corasick strategy if all of the regexes are just
352 // simple literals or alternations of literals. (We also use the Aho-Corasick
353 // strategy when len(patterns)==1 if the number of literals is large. In that
354 // case, literal extraction gives up and will return an infinite set.)
355 impl<P: PrefilterI> Strategy for Pre<P> {
356     #[cfg_attr(feature = "perf-inline", inline(always))]
group_info(&self) -> &GroupInfo357     fn group_info(&self) -> &GroupInfo {
358         &self.group_info
359     }
360 
create_cache(&self) -> Cache361     fn create_cache(&self) -> Cache {
362         Cache {
363             capmatches: Captures::all(self.group_info().clone()),
364             pikevm: wrappers::PikeVMCache::none(),
365             backtrack: wrappers::BoundedBacktrackerCache::none(),
366             onepass: wrappers::OnePassCache::none(),
367             hybrid: wrappers::HybridCache::none(),
368             revhybrid: wrappers::ReverseHybridCache::none(),
369         }
370     }
371 
reset_cache(&self, _cache: &mut Cache)372     fn reset_cache(&self, _cache: &mut Cache) {}
373 
is_accelerated(&self) -> bool374     fn is_accelerated(&self) -> bool {
375         self.pre.is_fast()
376     }
377 
memory_usage(&self) -> usize378     fn memory_usage(&self) -> usize {
379         self.pre.memory_usage()
380     }
381 
382     #[cfg_attr(feature = "perf-inline", inline(always))]
search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match>383     fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
384         if input.is_done() {
385             return None;
386         }
387         if input.get_anchored().is_anchored() {
388             return self
389                 .pre
390                 .prefix(input.haystack(), input.get_span())
391                 .map(|sp| Match::new(PatternID::ZERO, sp));
392         }
393         self.pre
394             .find(input.haystack(), input.get_span())
395             .map(|sp| Match::new(PatternID::ZERO, sp))
396     }
397 
398     #[cfg_attr(feature = "perf-inline", inline(always))]
search_half( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>399     fn search_half(
400         &self,
401         cache: &mut Cache,
402         input: &Input<'_>,
403     ) -> Option<HalfMatch> {
404         self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
405     }
406 
407     #[cfg_attr(feature = "perf-inline", inline(always))]
is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool408     fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
409         self.search(cache, input).is_some()
410     }
411 
412     #[cfg_attr(feature = "perf-inline", inline(always))]
search_slots( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>413     fn search_slots(
414         &self,
415         cache: &mut Cache,
416         input: &Input<'_>,
417         slots: &mut [Option<NonMaxUsize>],
418     ) -> Option<PatternID> {
419         let m = self.search(cache, input)?;
420         if let Some(slot) = slots.get_mut(0) {
421             *slot = NonMaxUsize::new(m.start());
422         }
423         if let Some(slot) = slots.get_mut(1) {
424             *slot = NonMaxUsize::new(m.end());
425         }
426         Some(m.pattern())
427     }
428 
429     #[cfg_attr(feature = "perf-inline", inline(always))]
which_overlapping_matches( &self, cache: &mut Cache, input: &Input<'_>, patset: &mut PatternSet, )430     fn which_overlapping_matches(
431         &self,
432         cache: &mut Cache,
433         input: &Input<'_>,
434         patset: &mut PatternSet,
435     ) {
436         if self.search(cache, input).is_some() {
437             patset.insert(PatternID::ZERO);
438         }
439     }
440 }
441 
442 #[derive(Debug)]
443 struct Core {
444     info: RegexInfo,
445     pre: Option<Prefilter>,
446     nfa: NFA,
447     nfarev: Option<NFA>,
448     pikevm: wrappers::PikeVM,
449     backtrack: wrappers::BoundedBacktracker,
450     onepass: wrappers::OnePass,
451     hybrid: wrappers::Hybrid,
452     dfa: wrappers::DFA,
453 }
454 
455 impl Core {
new( info: RegexInfo, pre: Option<Prefilter>, hirs: &[&Hir], ) -> Result<Core, BuildError>456     fn new(
457         info: RegexInfo,
458         pre: Option<Prefilter>,
459         hirs: &[&Hir],
460     ) -> Result<Core, BuildError> {
461         let mut lookm = LookMatcher::new();
462         lookm.set_line_terminator(info.config().get_line_terminator());
463         let thompson_config = thompson::Config::new()
464             .utf8(info.config().get_utf8_empty())
465             .nfa_size_limit(info.config().get_nfa_size_limit())
466             .shrink(false)
467             .which_captures(info.config().get_which_captures())
468             .look_matcher(lookm);
469         let nfa = thompson::Compiler::new()
470             .configure(thompson_config.clone())
471             .build_many_from_hir(hirs)
472             .map_err(BuildError::nfa)?;
473         // It's possible for the PikeVM or the BB to fail to build, even though
474         // at this point, we already have a full NFA in hand. They can fail
475         // when a Unicode word boundary is used but where Unicode word boundary
476         // support is disabled at compile time, thus making it impossible to
477         // match. (Construction can also fail if the NFA was compiled without
478         // captures, but we always enable that above.)
479         let pikevm = wrappers::PikeVM::new(&info, pre.clone(), &nfa)?;
480         let backtrack =
481             wrappers::BoundedBacktracker::new(&info, pre.clone(), &nfa)?;
482         // The onepass engine can of course fail to build, but we expect it to
483         // fail in many cases because it is an optimization that doesn't apply
484         // to all regexes. The 'OnePass' wrapper encapsulates this failure (and
485         // logs a message if it occurs).
486         let onepass = wrappers::OnePass::new(&info, &nfa);
487         // We try to encapsulate whether a particular regex engine should be
488         // used within each respective wrapper, but the DFAs need a reverse NFA
489         // to build itself, and we really do not want to build a reverse NFA if
490         // we know we aren't going to use the lazy DFA. So we do a config check
491         // up front, which is in practice the only way we won't try to use the
492         // DFA.
493         let (nfarev, hybrid, dfa) =
494             if !info.config().get_hybrid() && !info.config().get_dfa() {
495                 (None, wrappers::Hybrid::none(), wrappers::DFA::none())
496             } else {
497                 // FIXME: Technically, we don't quite yet KNOW that we need
498                 // a reverse NFA. It's possible for the DFAs below to both
499                 // fail to build just based on the forward NFA. In which case,
500                 // building the reverse NFA was totally wasted work. But...
501                 // fixing this requires breaking DFA construction apart into
502                 // two pieces: one for the forward part and another for the
503                 // reverse part. Quite annoying. Making it worse, when building
504                 // both DFAs fails, it's quite likely that the NFA is large and
505                 // that it will take quite some time to build the reverse NFA
506                 // too. So... it's really probably worth it to do this!
507                 let nfarev = thompson::Compiler::new()
508                     // Currently, reverse NFAs don't support capturing groups,
509                     // so we MUST disable them. But even if we didn't have to,
510                     // we would, because nothing in this crate does anything
511                     // useful with capturing groups in reverse. And of course,
512                     // the lazy DFA ignores capturing groups in all cases.
513                     .configure(
514                         thompson_config
515                             .clone()
516                             .which_captures(WhichCaptures::None)
517                             .reverse(true),
518                     )
519                     .build_many_from_hir(hirs)
520                     .map_err(BuildError::nfa)?;
521                 let dfa = if !info.config().get_dfa() {
522                     wrappers::DFA::none()
523                 } else {
524                     wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
525                 };
526                 let hybrid = if !info.config().get_hybrid() {
527                     wrappers::Hybrid::none()
528                 } else if dfa.is_some() {
529                     debug!("skipping lazy DFA because we have a full DFA");
530                     wrappers::Hybrid::none()
531                 } else {
532                     wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
533                 };
534                 (Some(nfarev), hybrid, dfa)
535             };
536         Ok(Core {
537             info,
538             pre,
539             nfa,
540             nfarev,
541             pikevm,
542             backtrack,
543             onepass,
544             hybrid,
545             dfa,
546         })
547     }
548 
549     #[cfg_attr(feature = "perf-inline", inline(always))]
try_search_mayfail( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<Result<Option<Match>, RetryFailError>>550     fn try_search_mayfail(
551         &self,
552         cache: &mut Cache,
553         input: &Input<'_>,
554     ) -> Option<Result<Option<Match>, RetryFailError>> {
555         if let Some(e) = self.dfa.get(input) {
556             trace!("using full DFA for search at {:?}", input.get_span());
557             Some(e.try_search(input))
558         } else if let Some(e) = self.hybrid.get(input) {
559             trace!("using lazy DFA for search at {:?}", input.get_span());
560             Some(e.try_search(&mut cache.hybrid, input))
561         } else {
562             None
563         }
564     }
565 
search_nofail( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<Match>566     fn search_nofail(
567         &self,
568         cache: &mut Cache,
569         input: &Input<'_>,
570     ) -> Option<Match> {
571         let caps = &mut cache.capmatches;
572         caps.set_pattern(None);
573         // We manually inline 'try_search_slots_nofail' here because we need to
574         // borrow from 'cache.capmatches' in this method, but if we do, then
575         // we can't pass 'cache' wholesale to to 'try_slots_no_hybrid'. It's a
576         // classic example of how the borrow checker inhibits decomposition.
577         // There are of course work-arounds (more types and/or interior
578         // mutability), but that's more annoying than this IMO.
579         let pid = if let Some(ref e) = self.onepass.get(input) {
580             trace!("using OnePass for search at {:?}", input.get_span());
581             e.search_slots(&mut cache.onepass, input, caps.slots_mut())
582         } else if let Some(ref e) = self.backtrack.get(input) {
583             trace!(
584                 "using BoundedBacktracker for search at {:?}",
585                 input.get_span()
586             );
587             e.search_slots(&mut cache.backtrack, input, caps.slots_mut())
588         } else {
589             trace!("using PikeVM for search at {:?}", input.get_span());
590             let e = self.pikevm.get();
591             e.search_slots(&mut cache.pikevm, input, caps.slots_mut())
592         };
593         caps.set_pattern(pid);
594         caps.get_match()
595     }
596 
search_half_nofail( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>597     fn search_half_nofail(
598         &self,
599         cache: &mut Cache,
600         input: &Input<'_>,
601     ) -> Option<HalfMatch> {
602         // Only the lazy/full DFA returns half-matches, since the DFA requires
603         // a reverse scan to find the start position. These fallback regex
604         // engines can find the start and end in a single pass, so we just do
605         // that and throw away the start offset to conform to the API.
606         let m = self.search_nofail(cache, input)?;
607         Some(HalfMatch::new(m.pattern(), m.end()))
608     }
609 
search_slots_nofail( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>610     fn search_slots_nofail(
611         &self,
612         cache: &mut Cache,
613         input: &Input<'_>,
614         slots: &mut [Option<NonMaxUsize>],
615     ) -> Option<PatternID> {
616         if let Some(ref e) = self.onepass.get(input) {
617             trace!(
618                 "using OnePass for capture search at {:?}",
619                 input.get_span()
620             );
621             e.search_slots(&mut cache.onepass, input, slots)
622         } else if let Some(ref e) = self.backtrack.get(input) {
623             trace!(
624                 "using BoundedBacktracker for capture search at {:?}",
625                 input.get_span()
626             );
627             e.search_slots(&mut cache.backtrack, input, slots)
628         } else {
629             trace!(
630                 "using PikeVM for capture search at {:?}",
631                 input.get_span()
632             );
633             let e = self.pikevm.get();
634             e.search_slots(&mut cache.pikevm, input, slots)
635         }
636     }
637 
is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool638     fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
639         if let Some(ref e) = self.onepass.get(input) {
640             trace!(
641                 "using OnePass for is-match search at {:?}",
642                 input.get_span()
643             );
644             e.search_slots(&mut cache.onepass, input, &mut []).is_some()
645         } else if let Some(ref e) = self.backtrack.get(input) {
646             trace!(
647                 "using BoundedBacktracker for is-match search at {:?}",
648                 input.get_span()
649             );
650             e.is_match(&mut cache.backtrack, input)
651         } else {
652             trace!(
653                 "using PikeVM for is-match search at {:?}",
654                 input.get_span()
655             );
656             let e = self.pikevm.get();
657             e.is_match(&mut cache.pikevm, input)
658         }
659     }
660 
is_capture_search_needed(&self, slots_len: usize) -> bool661     fn is_capture_search_needed(&self, slots_len: usize) -> bool {
662         slots_len > self.nfa.group_info().implicit_slot_len()
663     }
664 }
665 
666 impl Strategy for Core {
667     #[cfg_attr(feature = "perf-inline", inline(always))]
group_info(&self) -> &GroupInfo668     fn group_info(&self) -> &GroupInfo {
669         self.nfa.group_info()
670     }
671 
672     #[cfg_attr(feature = "perf-inline", inline(always))]
create_cache(&self) -> Cache673     fn create_cache(&self) -> Cache {
674         Cache {
675             capmatches: Captures::all(self.group_info().clone()),
676             pikevm: self.pikevm.create_cache(),
677             backtrack: self.backtrack.create_cache(),
678             onepass: self.onepass.create_cache(),
679             hybrid: self.hybrid.create_cache(),
680             revhybrid: wrappers::ReverseHybridCache::none(),
681         }
682     }
683 
684     #[cfg_attr(feature = "perf-inline", inline(always))]
reset_cache(&self, cache: &mut Cache)685     fn reset_cache(&self, cache: &mut Cache) {
686         cache.pikevm.reset(&self.pikevm);
687         cache.backtrack.reset(&self.backtrack);
688         cache.onepass.reset(&self.onepass);
689         cache.hybrid.reset(&self.hybrid);
690     }
691 
is_accelerated(&self) -> bool692     fn is_accelerated(&self) -> bool {
693         self.pre.as_ref().map_or(false, |pre| pre.is_fast())
694     }
695 
memory_usage(&self) -> usize696     fn memory_usage(&self) -> usize {
697         self.info.memory_usage()
698             + self.pre.as_ref().map_or(0, |pre| pre.memory_usage())
699             + self.nfa.memory_usage()
700             + self.nfarev.as_ref().map_or(0, |nfa| nfa.memory_usage())
701             + self.onepass.memory_usage()
702             + self.dfa.memory_usage()
703     }
704 
705     #[cfg_attr(feature = "perf-inline", inline(always))]
search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>706     fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
707         // We manually inline try_search_mayfail here because letting the
708         // compiler do it seems to produce pretty crappy codegen.
709         return if let Some(e) = self.dfa.get(input) {
710             trace!("using full DFA for full search at {:?}", input.get_span());
711             match e.try_search(input) {
712                 Ok(x) => x,
713                 Err(_err) => {
714                     trace!("full DFA search failed: {}", _err);
715                     self.search_nofail(cache, input)
716                 }
717             }
718         } else if let Some(e) = self.hybrid.get(input) {
719             trace!("using lazy DFA for full search at {:?}", input.get_span());
720             match e.try_search(&mut cache.hybrid, input) {
721                 Ok(x) => x,
722                 Err(_err) => {
723                     trace!("lazy DFA search failed: {}", _err);
724                     self.search_nofail(cache, input)
725                 }
726             }
727         } else {
728             self.search_nofail(cache, input)
729         };
730     }
731 
732     #[cfg_attr(feature = "perf-inline", inline(always))]
search_half( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>733     fn search_half(
734         &self,
735         cache: &mut Cache,
736         input: &Input<'_>,
737     ) -> Option<HalfMatch> {
738         // The main difference with 'search' is that if we're using a DFA, we
739         // can use a single forward scan without needing to run the reverse
740         // DFA.
741         if let Some(e) = self.dfa.get(input) {
742             trace!("using full DFA for half search at {:?}", input.get_span());
743             match e.try_search_half_fwd(input) {
744                 Ok(x) => x,
745                 Err(_err) => {
746                     trace!("full DFA half search failed: {}", _err);
747                     self.search_half_nofail(cache, input)
748                 }
749             }
750         } else if let Some(e) = self.hybrid.get(input) {
751             trace!("using lazy DFA for half search at {:?}", input.get_span());
752             match e.try_search_half_fwd(&mut cache.hybrid, input) {
753                 Ok(x) => x,
754                 Err(_err) => {
755                     trace!("lazy DFA half search failed: {}", _err);
756                     self.search_half_nofail(cache, input)
757                 }
758             }
759         } else {
760             self.search_half_nofail(cache, input)
761         }
762     }
763 
764     #[cfg_attr(feature = "perf-inline", inline(always))]
is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool765     fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
766         if let Some(e) = self.dfa.get(input) {
767             trace!(
768                 "using full DFA for is-match search at {:?}",
769                 input.get_span()
770             );
771             match e.try_search_half_fwd(input) {
772                 Ok(x) => x.is_some(),
773                 Err(_err) => {
774                     trace!("full DFA half search failed: {}", _err);
775                     self.is_match_nofail(cache, input)
776                 }
777             }
778         } else if let Some(e) = self.hybrid.get(input) {
779             trace!(
780                 "using lazy DFA for is-match search at {:?}",
781                 input.get_span()
782             );
783             match e.try_search_half_fwd(&mut cache.hybrid, input) {
784                 Ok(x) => x.is_some(),
785                 Err(_err) => {
786                     trace!("lazy DFA half search failed: {}", _err);
787                     self.is_match_nofail(cache, input)
788                 }
789             }
790         } else {
791             self.is_match_nofail(cache, input)
792         }
793     }
794 
795     #[cfg_attr(feature = "perf-inline", inline(always))]
search_slots( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>796     fn search_slots(
797         &self,
798         cache: &mut Cache,
799         input: &Input<'_>,
800         slots: &mut [Option<NonMaxUsize>],
801     ) -> Option<PatternID> {
802         // Even if the regex has explicit capture groups, if the caller didn't
803         // provide any explicit slots, then it doesn't make sense to try and do
804         // extra work to get offsets for those slots. Ideally the caller should
805         // realize this and not call this routine in the first place, but alas,
806         // we try to save the caller from themselves if they do.
807         if !self.is_capture_search_needed(slots.len()) {
808             trace!("asked for slots unnecessarily, trying fast path");
809             let m = self.search(cache, input)?;
810             copy_match_to_slots(m, slots);
811             return Some(m.pattern());
812         }
813         // If the onepass DFA is available for this search (which only happens
814         // when it's anchored), then skip running a fallible DFA. The onepass
815         // DFA isn't as fast as a full or lazy DFA, but it is typically quite
816         // a bit faster than the backtracker or the PikeVM. So it isn't as
817         // advantageous to try and do a full/lazy DFA scan first.
818         //
819         // We still theorize that it's better to do a full/lazy DFA scan, even
820         // when it's anchored, because it's usually much faster and permits us
821         // to say "no match" much more quickly. This does hurt the case of,
822         // say, parsing each line in a log file into capture groups, because
823         // in that case, the line always matches. So the lazy DFA scan is
824         // usually just wasted work. But, the lazy DFA is usually quite fast
825         // and doesn't cost too much here.
826         if self.onepass.get(&input).is_some() {
827             return self.search_slots_nofail(cache, &input, slots);
828         }
829         let m = match self.try_search_mayfail(cache, input) {
830             Some(Ok(Some(m))) => m,
831             Some(Ok(None)) => return None,
832             Some(Err(_err)) => {
833                 trace!("fast capture search failed: {}", _err);
834                 return self.search_slots_nofail(cache, input, slots);
835             }
836             None => {
837                 return self.search_slots_nofail(cache, input, slots);
838             }
839         };
840         // At this point, now that we've found the bounds of the
841         // match, we need to re-run something that can resolve
842         // capturing groups. But we only need to run on it on the
843         // match bounds and not the entire haystack.
844         trace!(
845             "match found at {}..{} in capture search, \
846 		  	 using another engine to find captures",
847             m.start(),
848             m.end(),
849         );
850         let input = input
851             .clone()
852             .span(m.start()..m.end())
853             .anchored(Anchored::Pattern(m.pattern()));
854         Some(
855             self.search_slots_nofail(cache, &input, slots)
856                 .expect("should find a match"),
857         )
858     }
859 
860     #[cfg_attr(feature = "perf-inline", inline(always))]
which_overlapping_matches( &self, cache: &mut Cache, input: &Input<'_>, patset: &mut PatternSet, )861     fn which_overlapping_matches(
862         &self,
863         cache: &mut Cache,
864         input: &Input<'_>,
865         patset: &mut PatternSet,
866     ) {
867         if let Some(e) = self.dfa.get(input) {
868             trace!(
869                 "using full DFA for overlapping search at {:?}",
870                 input.get_span()
871             );
872             let _err = match e.try_which_overlapping_matches(input, patset) {
873                 Ok(()) => return,
874                 Err(err) => err,
875             };
876             trace!("fast overlapping search failed: {}", _err);
877         } else if let Some(e) = self.hybrid.get(input) {
878             trace!(
879                 "using lazy DFA for overlapping search at {:?}",
880                 input.get_span()
881             );
882             let _err = match e.try_which_overlapping_matches(
883                 &mut cache.hybrid,
884                 input,
885                 patset,
886             ) {
887                 Ok(()) => {
888                     return;
889                 }
890                 Err(err) => err,
891             };
892             trace!("fast overlapping search failed: {}", _err);
893         }
894         trace!(
895             "using PikeVM for overlapping search at {:?}",
896             input.get_span()
897         );
898         let e = self.pikevm.get();
899         e.which_overlapping_matches(&mut cache.pikevm, input, patset)
900     }
901 }
902 
903 #[derive(Debug)]
904 struct ReverseAnchored {
905     core: Core,
906 }
907 
908 impl ReverseAnchored {
new(core: Core) -> Result<ReverseAnchored, Core>909     fn new(core: Core) -> Result<ReverseAnchored, Core> {
910         if !core.info.is_always_anchored_end() {
911             debug!(
912                 "skipping reverse anchored optimization because \
913 				 the regex is not always anchored at the end"
914             );
915             return Err(core);
916         }
917         // Note that the caller can still request an anchored search even when
918         // the regex isn't anchored at the start. We detect that case in the
919         // search routines below and just fallback to the core engine. This
920         // is fine because both searches are anchored. It's just a matter of
921         // picking one. Falling back to the core engine is a little simpler,
922         // since if we used the reverse anchored approach, we'd have to add an
923         // extra check to ensure the match reported starts at the place where
924         // the caller requested the search to start.
925         if core.info.is_always_anchored_start() {
926             debug!(
927                 "skipping reverse anchored optimization because \
928 				 the regex is also anchored at the start"
929             );
930             return Err(core);
931         }
932         // Only DFAs can do reverse searches (currently), so we need one of
933         // them in order to do this optimization. It's possible (although
934         // pretty unlikely) that we have neither and need to give up.
935         if !core.hybrid.is_some() && !core.dfa.is_some() {
936             debug!(
937                 "skipping reverse anchored optimization because \
938 				 we don't have a lazy DFA or a full DFA"
939             );
940             return Err(core);
941         }
942         Ok(ReverseAnchored { core })
943     }
944 
945     #[cfg_attr(feature = "perf-inline", inline(always))]
try_search_half_anchored_rev( &self, cache: &mut Cache, input: &Input<'_>, ) -> Result<Option<HalfMatch>, RetryFailError>946     fn try_search_half_anchored_rev(
947         &self,
948         cache: &mut Cache,
949         input: &Input<'_>,
950     ) -> Result<Option<HalfMatch>, RetryFailError> {
951         // We of course always want an anchored search. In theory, the
952         // underlying regex engines should automatically enable anchored
953         // searches since the regex is itself anchored, but this more clearly
954         // expresses intent and is always correct.
955         let input = input.clone().anchored(Anchored::Yes);
956         if let Some(e) = self.core.dfa.get(&input) {
957             trace!(
958                 "using full DFA for reverse anchored search at {:?}",
959                 input.get_span()
960             );
961             e.try_search_half_rev(&input)
962         } else if let Some(e) = self.core.hybrid.get(&input) {
963             trace!(
964                 "using lazy DFA for reverse anchored search at {:?}",
965                 input.get_span()
966             );
967             e.try_search_half_rev(&mut cache.hybrid, &input)
968         } else {
969             unreachable!("ReverseAnchored always has a DFA")
970         }
971     }
972 }
973 
974 // Note that in this impl, we don't check that 'input.end() ==
975 // input.haystack().len()'. In particular, when that condition is false, a
976 // match is always impossible because we know that the regex is always anchored
977 // at the end (or else 'ReverseAnchored' won't be built). We don't check that
978 // here because the 'Regex' wrapper actually does that for us in all cases.
979 // Thus, in this impl, we can actually assume that the end position in 'input'
980 // is equivalent to the length of the haystack.
981 impl Strategy for ReverseAnchored {
982     #[cfg_attr(feature = "perf-inline", inline(always))]
group_info(&self) -> &GroupInfo983     fn group_info(&self) -> &GroupInfo {
984         self.core.group_info()
985     }
986 
987     #[cfg_attr(feature = "perf-inline", inline(always))]
create_cache(&self) -> Cache988     fn create_cache(&self) -> Cache {
989         self.core.create_cache()
990     }
991 
992     #[cfg_attr(feature = "perf-inline", inline(always))]
reset_cache(&self, cache: &mut Cache)993     fn reset_cache(&self, cache: &mut Cache) {
994         self.core.reset_cache(cache);
995     }
996 
is_accelerated(&self) -> bool997     fn is_accelerated(&self) -> bool {
998         // Since this is anchored at the end, a reverse anchored search is
999         // almost certainly guaranteed to result in a much faster search than
1000         // a standard forward search.
1001         true
1002     }
1003 
memory_usage(&self) -> usize1004     fn memory_usage(&self) -> usize {
1005         self.core.memory_usage()
1006     }
1007 
1008     #[cfg_attr(feature = "perf-inline", inline(always))]
search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>1009     fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1010         if input.get_anchored().is_anchored() {
1011             return self.core.search(cache, input);
1012         }
1013         match self.try_search_half_anchored_rev(cache, input) {
1014             Err(_err) => {
1015                 trace!("fast reverse anchored search failed: {}", _err);
1016                 self.core.search_nofail(cache, input)
1017             }
1018             Ok(None) => None,
1019             Ok(Some(hm)) => {
1020                 Some(Match::new(hm.pattern(), hm.offset()..input.end()))
1021             }
1022         }
1023     }
1024 
1025     #[cfg_attr(feature = "perf-inline", inline(always))]
search_half( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>1026     fn search_half(
1027         &self,
1028         cache: &mut Cache,
1029         input: &Input<'_>,
1030     ) -> Option<HalfMatch> {
1031         if input.get_anchored().is_anchored() {
1032             return self.core.search_half(cache, input);
1033         }
1034         match self.try_search_half_anchored_rev(cache, input) {
1035             Err(_err) => {
1036                 trace!("fast reverse anchored search failed: {}", _err);
1037                 self.core.search_half_nofail(cache, input)
1038             }
1039             Ok(None) => None,
1040             Ok(Some(hm)) => {
1041                 // Careful here! 'try_search_half' is a *forward* search that
1042                 // only cares about the *end* position of a match. But
1043                 // 'hm.offset()' is actually the start of the match. So we
1044                 // actually just throw that away here and, since we know we
1045                 // have a match, return the only possible position at which a
1046                 // match can occur: input.end().
1047                 Some(HalfMatch::new(hm.pattern(), input.end()))
1048             }
1049         }
1050     }
1051 
1052     #[cfg_attr(feature = "perf-inline", inline(always))]
is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool1053     fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1054         if input.get_anchored().is_anchored() {
1055             return self.core.is_match(cache, input);
1056         }
1057         match self.try_search_half_anchored_rev(cache, input) {
1058             Err(_err) => {
1059                 trace!("fast reverse anchored search failed: {}", _err);
1060                 self.core.is_match_nofail(cache, input)
1061             }
1062             Ok(None) => false,
1063             Ok(Some(_)) => true,
1064         }
1065     }
1066 
1067     #[cfg_attr(feature = "perf-inline", inline(always))]
search_slots( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>1068     fn search_slots(
1069         &self,
1070         cache: &mut Cache,
1071         input: &Input<'_>,
1072         slots: &mut [Option<NonMaxUsize>],
1073     ) -> Option<PatternID> {
1074         if input.get_anchored().is_anchored() {
1075             return self.core.search_slots(cache, input, slots);
1076         }
1077         match self.try_search_half_anchored_rev(cache, input) {
1078             Err(_err) => {
1079                 trace!("fast reverse anchored search failed: {}", _err);
1080                 self.core.search_slots_nofail(cache, input, slots)
1081             }
1082             Ok(None) => None,
1083             Ok(Some(hm)) => {
1084                 if !self.core.is_capture_search_needed(slots.len()) {
1085                     trace!("asked for slots unnecessarily, skipping captures");
1086                     let m = Match::new(hm.pattern(), hm.offset()..input.end());
1087                     copy_match_to_slots(m, slots);
1088                     return Some(m.pattern());
1089                 }
1090                 let start = hm.offset();
1091                 let input = input
1092                     .clone()
1093                     .span(start..input.end())
1094                     .anchored(Anchored::Pattern(hm.pattern()));
1095                 self.core.search_slots_nofail(cache, &input, slots)
1096             }
1097         }
1098     }
1099 
1100     #[cfg_attr(feature = "perf-inline", inline(always))]
which_overlapping_matches( &self, cache: &mut Cache, input: &Input<'_>, patset: &mut PatternSet, )1101     fn which_overlapping_matches(
1102         &self,
1103         cache: &mut Cache,
1104         input: &Input<'_>,
1105         patset: &mut PatternSet,
1106     ) {
1107         // It seems like this could probably benefit from a reverse anchored
1108         // optimization, perhaps by doing an overlapping reverse search (which
1109         // the DFAs do support). I haven't given it much thought though, and
1110         // I'm currently focus more on the single pattern case.
1111         self.core.which_overlapping_matches(cache, input, patset)
1112     }
1113 }
1114 
1115 #[derive(Debug)]
1116 struct ReverseSuffix {
1117     core: Core,
1118     pre: Prefilter,
1119 }
1120 
1121 impl ReverseSuffix {
new(core: Core, hirs: &[&Hir]) -> Result<ReverseSuffix, Core>1122     fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseSuffix, Core> {
1123         if !core.info.config().get_auto_prefilter() {
1124             debug!(
1125                 "skipping reverse suffix optimization because \
1126                  automatic prefilters are disabled"
1127             );
1128             return Err(core);
1129         }
1130         // Like the reverse inner optimization, we don't do this for regexes
1131         // that are always anchored. It could lead to scanning too much, but
1132         // could say "no match" much more quickly than running the regex
1133         // engine if the initial literal scan doesn't match. With that said,
1134         // the reverse suffix optimization has lower overhead, since it only
1135         // requires a reverse scan after a literal match to confirm or reject
1136         // the match. (Although, in the case of confirmation, it then needs to
1137         // do another forward scan to find the end position.)
1138         //
1139         // Note that the caller can still request an anchored search even
1140         // when the regex isn't anchored. We detect that case in the search
1141         // routines below and just fallback to the core engine. Currently this
1142         // optimization assumes all searches are unanchored, so if we do want
1143         // to enable this optimization for anchored searches, it will need a
1144         // little work to support it.
1145         if core.info.is_always_anchored_start() {
1146             debug!(
1147                 "skipping reverse suffix optimization because \
1148 				 the regex is always anchored at the start",
1149             );
1150             return Err(core);
1151         }
1152         // Only DFAs can do reverse searches (currently), so we need one of
1153         // them in order to do this optimization. It's possible (although
1154         // pretty unlikely) that we have neither and need to give up.
1155         if !core.hybrid.is_some() && !core.dfa.is_some() {
1156             debug!(
1157                 "skipping reverse suffix optimization because \
1158 				 we don't have a lazy DFA or a full DFA"
1159             );
1160             return Err(core);
1161         }
1162         if core.pre.as_ref().map_or(false, |p| p.is_fast()) {
1163             debug!(
1164                 "skipping reverse suffix optimization because \
1165 				 we already have a prefilter that we think is fast"
1166             );
1167             return Err(core);
1168         }
1169         let kind = core.info.config().get_match_kind();
1170         let suffixes = crate::util::prefilter::suffixes(kind, hirs);
1171         let lcs = match suffixes.longest_common_suffix() {
1172             None => {
1173                 debug!(
1174                     "skipping reverse suffix optimization because \
1175                      a longest common suffix could not be found",
1176                 );
1177                 return Err(core);
1178             }
1179             Some(lcs) if lcs.is_empty() => {
1180                 debug!(
1181                     "skipping reverse suffix optimization because \
1182                      the longest common suffix is the empty string",
1183                 );
1184                 return Err(core);
1185             }
1186             Some(lcs) => lcs,
1187         };
1188         let pre = match Prefilter::new(kind, &[lcs]) {
1189             Some(pre) => pre,
1190             None => {
1191                 debug!(
1192                     "skipping reverse suffix optimization because \
1193                      a prefilter could not be constructed from the \
1194                      longest common suffix",
1195                 );
1196                 return Err(core);
1197             }
1198         };
1199         if !pre.is_fast() {
1200             debug!(
1201                 "skipping reverse suffix optimization because \
1202 				 while we have a suffix prefilter, it is not \
1203 				 believed to be 'fast'"
1204             );
1205             return Err(core);
1206         }
1207         Ok(ReverseSuffix { core, pre })
1208     }
1209 
1210     #[cfg_attr(feature = "perf-inline", inline(always))]
try_search_half_start( &self, cache: &mut Cache, input: &Input<'_>, ) -> Result<Option<HalfMatch>, RetryError>1211     fn try_search_half_start(
1212         &self,
1213         cache: &mut Cache,
1214         input: &Input<'_>,
1215     ) -> Result<Option<HalfMatch>, RetryError> {
1216         let mut span = input.get_span();
1217         let mut min_start = 0;
1218         loop {
1219             let litmatch = match self.pre.find(input.haystack(), span) {
1220                 None => return Ok(None),
1221                 Some(span) => span,
1222             };
1223             trace!("reverse suffix scan found suffix match at {:?}", litmatch);
1224             let revinput = input
1225                 .clone()
1226                 .anchored(Anchored::Yes)
1227                 .span(input.start()..litmatch.end);
1228             match self
1229                 .try_search_half_rev_limited(cache, &revinput, min_start)?
1230             {
1231                 None => {
1232                     if span.start >= span.end {
1233                         break;
1234                     }
1235                     span.start = litmatch.start.checked_add(1).unwrap();
1236                 }
1237                 Some(hm) => return Ok(Some(hm)),
1238             }
1239             min_start = litmatch.end;
1240         }
1241         Ok(None)
1242     }
1243 
1244     #[cfg_attr(feature = "perf-inline", inline(always))]
try_search_half_fwd( &self, cache: &mut Cache, input: &Input<'_>, ) -> Result<Option<HalfMatch>, RetryFailError>1245     fn try_search_half_fwd(
1246         &self,
1247         cache: &mut Cache,
1248         input: &Input<'_>,
1249     ) -> Result<Option<HalfMatch>, RetryFailError> {
1250         if let Some(e) = self.core.dfa.get(&input) {
1251             trace!(
1252                 "using full DFA for forward reverse suffix search at {:?}",
1253                 input.get_span()
1254             );
1255             e.try_search_half_fwd(&input)
1256         } else if let Some(e) = self.core.hybrid.get(&input) {
1257             trace!(
1258                 "using lazy DFA for forward reverse suffix search at {:?}",
1259                 input.get_span()
1260             );
1261             e.try_search_half_fwd(&mut cache.hybrid, &input)
1262         } else {
1263             unreachable!("ReverseSuffix always has a DFA")
1264         }
1265     }
1266 
1267     #[cfg_attr(feature = "perf-inline", inline(always))]
try_search_half_rev_limited( &self, cache: &mut Cache, input: &Input<'_>, min_start: usize, ) -> Result<Option<HalfMatch>, RetryError>1268     fn try_search_half_rev_limited(
1269         &self,
1270         cache: &mut Cache,
1271         input: &Input<'_>,
1272         min_start: usize,
1273     ) -> Result<Option<HalfMatch>, RetryError> {
1274         if let Some(e) = self.core.dfa.get(&input) {
1275             trace!(
1276                 "using full DFA for reverse suffix search at {:?}, \
1277                  but will be stopped at {} to avoid quadratic behavior",
1278                 input.get_span(),
1279                 min_start,
1280             );
1281             e.try_search_half_rev_limited(&input, min_start)
1282         } else if let Some(e) = self.core.hybrid.get(&input) {
1283             trace!(
1284                 "using lazy DFA for reverse suffix search at {:?}, \
1285                  but will be stopped at {} to avoid quadratic behavior",
1286                 input.get_span(),
1287                 min_start,
1288             );
1289             e.try_search_half_rev_limited(&mut cache.hybrid, &input, min_start)
1290         } else {
1291             unreachable!("ReverseSuffix always has a DFA")
1292         }
1293     }
1294 }
1295 
1296 impl Strategy for ReverseSuffix {
1297     #[cfg_attr(feature = "perf-inline", inline(always))]
group_info(&self) -> &GroupInfo1298     fn group_info(&self) -> &GroupInfo {
1299         self.core.group_info()
1300     }
1301 
1302     #[cfg_attr(feature = "perf-inline", inline(always))]
create_cache(&self) -> Cache1303     fn create_cache(&self) -> Cache {
1304         self.core.create_cache()
1305     }
1306 
1307     #[cfg_attr(feature = "perf-inline", inline(always))]
reset_cache(&self, cache: &mut Cache)1308     fn reset_cache(&self, cache: &mut Cache) {
1309         self.core.reset_cache(cache);
1310     }
1311 
is_accelerated(&self) -> bool1312     fn is_accelerated(&self) -> bool {
1313         self.pre.is_fast()
1314     }
1315 
memory_usage(&self) -> usize1316     fn memory_usage(&self) -> usize {
1317         self.core.memory_usage() + self.pre.memory_usage()
1318     }
1319 
1320     #[cfg_attr(feature = "perf-inline", inline(always))]
search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>1321     fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1322         if input.get_anchored().is_anchored() {
1323             return self.core.search(cache, input);
1324         }
1325         match self.try_search_half_start(cache, input) {
1326             Err(RetryError::Quadratic(_err)) => {
1327                 trace!("reverse suffix optimization failed: {}", _err);
1328                 self.core.search(cache, input)
1329             }
1330             Err(RetryError::Fail(_err)) => {
1331                 trace!("reverse suffix reverse fast search failed: {}", _err);
1332                 self.core.search_nofail(cache, input)
1333             }
1334             Ok(None) => None,
1335             Ok(Some(hm_start)) => {
1336                 let fwdinput = input
1337                     .clone()
1338                     .anchored(Anchored::Pattern(hm_start.pattern()))
1339                     .span(hm_start.offset()..input.end());
1340                 match self.try_search_half_fwd(cache, &fwdinput) {
1341                     Err(_err) => {
1342                         trace!(
1343                             "reverse suffix forward fast search failed: {}",
1344                             _err
1345                         );
1346                         self.core.search_nofail(cache, input)
1347                     }
1348                     Ok(None) => {
1349                         unreachable!(
1350                             "suffix match plus reverse match implies \
1351 						     there must be a match",
1352                         )
1353                     }
1354                     Ok(Some(hm_end)) => Some(Match::new(
1355                         hm_start.pattern(),
1356                         hm_start.offset()..hm_end.offset(),
1357                     )),
1358                 }
1359             }
1360         }
1361     }
1362 
1363     #[cfg_attr(feature = "perf-inline", inline(always))]
search_half( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>1364     fn search_half(
1365         &self,
1366         cache: &mut Cache,
1367         input: &Input<'_>,
1368     ) -> Option<HalfMatch> {
1369         if input.get_anchored().is_anchored() {
1370             return self.core.search_half(cache, input);
1371         }
1372         match self.try_search_half_start(cache, input) {
1373             Err(RetryError::Quadratic(_err)) => {
1374                 trace!("reverse suffix half optimization failed: {}", _err);
1375                 self.core.search_half(cache, input)
1376             }
1377             Err(RetryError::Fail(_err)) => {
1378                 trace!(
1379                     "reverse suffix reverse fast half search failed: {}",
1380                     _err
1381                 );
1382                 self.core.search_half_nofail(cache, input)
1383             }
1384             Ok(None) => None,
1385             Ok(Some(hm_start)) => {
1386                 // This is a bit subtle. It is tempting to just stop searching
1387                 // at this point and return a half-match with an offset
1388                 // corresponding to where the suffix was found. But the suffix
1389                 // match does not necessarily correspond to the end of the
1390                 // proper leftmost-first match. Consider /[a-z]+ing/ against
1391                 // 'tingling'. The first suffix match is the first 'ing', and
1392                 // the /[a-z]+/ matches the 't'. So if we stopped here, then
1393                 // we'd report 'ting' as the match. But 'tingling' is the
1394                 // correct match because of greediness.
1395                 let fwdinput = input
1396                     .clone()
1397                     .anchored(Anchored::Pattern(hm_start.pattern()))
1398                     .span(hm_start.offset()..input.end());
1399                 match self.try_search_half_fwd(cache, &fwdinput) {
1400                     Err(_err) => {
1401                         trace!(
1402                             "reverse suffix forward fast search failed: {}",
1403                             _err
1404                         );
1405                         self.core.search_half_nofail(cache, input)
1406                     }
1407                     Ok(None) => {
1408                         unreachable!(
1409                             "suffix match plus reverse match implies \
1410 						     there must be a match",
1411                         )
1412                     }
1413                     Ok(Some(hm_end)) => Some(hm_end),
1414                 }
1415             }
1416         }
1417     }
1418 
1419     #[cfg_attr(feature = "perf-inline", inline(always))]
is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool1420     fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1421         if input.get_anchored().is_anchored() {
1422             return self.core.is_match(cache, input);
1423         }
1424         match self.try_search_half_start(cache, input) {
1425             Err(RetryError::Quadratic(_err)) => {
1426                 trace!("reverse suffix half optimization failed: {}", _err);
1427                 self.core.is_match_nofail(cache, input)
1428             }
1429             Err(RetryError::Fail(_err)) => {
1430                 trace!(
1431                     "reverse suffix reverse fast half search failed: {}",
1432                     _err
1433                 );
1434                 self.core.is_match_nofail(cache, input)
1435             }
1436             Ok(None) => false,
1437             Ok(Some(_)) => true,
1438         }
1439     }
1440 
1441     #[cfg_attr(feature = "perf-inline", inline(always))]
search_slots( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>1442     fn search_slots(
1443         &self,
1444         cache: &mut Cache,
1445         input: &Input<'_>,
1446         slots: &mut [Option<NonMaxUsize>],
1447     ) -> Option<PatternID> {
1448         if input.get_anchored().is_anchored() {
1449             return self.core.search_slots(cache, input, slots);
1450         }
1451         if !self.core.is_capture_search_needed(slots.len()) {
1452             trace!("asked for slots unnecessarily, trying fast path");
1453             let m = self.search(cache, input)?;
1454             copy_match_to_slots(m, slots);
1455             return Some(m.pattern());
1456         }
1457         let hm_start = match self.try_search_half_start(cache, input) {
1458             Err(RetryError::Quadratic(_err)) => {
1459                 trace!(
1460                     "reverse suffix captures optimization failed: {}",
1461                     _err
1462                 );
1463                 return self.core.search_slots(cache, input, slots);
1464             }
1465             Err(RetryError::Fail(_err)) => {
1466                 trace!(
1467                     "reverse suffix reverse fast captures search failed: {}",
1468                     _err
1469                 );
1470                 return self.core.search_slots_nofail(cache, input, slots);
1471             }
1472             Ok(None) => return None,
1473             Ok(Some(hm_start)) => hm_start,
1474         };
1475         trace!(
1476             "match found at {}..{} in capture search, \
1477 		  	 using another engine to find captures",
1478             hm_start.offset(),
1479             input.end(),
1480         );
1481         let start = hm_start.offset();
1482         let input = input
1483             .clone()
1484             .span(start..input.end())
1485             .anchored(Anchored::Pattern(hm_start.pattern()));
1486         self.core.search_slots_nofail(cache, &input, slots)
1487     }
1488 
1489     #[cfg_attr(feature = "perf-inline", inline(always))]
which_overlapping_matches( &self, cache: &mut Cache, input: &Input<'_>, patset: &mut PatternSet, )1490     fn which_overlapping_matches(
1491         &self,
1492         cache: &mut Cache,
1493         input: &Input<'_>,
1494         patset: &mut PatternSet,
1495     ) {
1496         self.core.which_overlapping_matches(cache, input, patset)
1497     }
1498 }
1499 
1500 #[derive(Debug)]
1501 struct ReverseInner {
1502     core: Core,
1503     preinner: Prefilter,
1504     nfarev: NFA,
1505     hybrid: wrappers::ReverseHybrid,
1506     dfa: wrappers::ReverseDFA,
1507 }
1508 
1509 impl ReverseInner {
new(core: Core, hirs: &[&Hir]) -> Result<ReverseInner, Core>1510     fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseInner, Core> {
1511         if !core.info.config().get_auto_prefilter() {
1512             debug!(
1513                 "skipping reverse inner optimization because \
1514                  automatic prefilters are disabled"
1515             );
1516             return Err(core);
1517         }
1518         // Currently we hard-code the assumption of leftmost-first match
1519         // semantics. This isn't a huge deal because 'all' semantics tend to
1520         // only be used for forward overlapping searches with multiple regexes,
1521         // and this optimization only supports a single pattern at the moment.
1522         if core.info.config().get_match_kind() != MatchKind::LeftmostFirst {
1523             debug!(
1524                 "skipping reverse inner optimization because \
1525 				 match kind is {:?} but this only supports leftmost-first",
1526                 core.info.config().get_match_kind(),
1527             );
1528             return Err(core);
1529         }
1530         // It's likely that a reverse inner scan has too much overhead for it
1531         // to be worth it when the regex is anchored at the start. It is
1532         // possible for it to be quite a bit faster if the initial literal
1533         // scan fails to detect a match, in which case, we can say "no match"
1534         // very quickly. But this could be undesirable, e.g., scanning too far
1535         // or when the literal scan matches. If it matches, then confirming the
1536         // match requires a reverse scan followed by a forward scan to confirm
1537         // or reject, which is a fair bit of work.
1538         //
1539         // Note that the caller can still request an anchored search even
1540         // when the regex isn't anchored. We detect that case in the search
1541         // routines below and just fallback to the core engine. Currently this
1542         // optimization assumes all searches are unanchored, so if we do want
1543         // to enable this optimization for anchored searches, it will need a
1544         // little work to support it.
1545         if core.info.is_always_anchored_start() {
1546             debug!(
1547                 "skipping reverse inner optimization because \
1548 				 the regex is always anchored at the start",
1549             );
1550             return Err(core);
1551         }
1552         // Only DFAs can do reverse searches (currently), so we need one of
1553         // them in order to do this optimization. It's possible (although
1554         // pretty unlikely) that we have neither and need to give up.
1555         if !core.hybrid.is_some() && !core.dfa.is_some() {
1556             debug!(
1557                 "skipping reverse inner optimization because \
1558 				 we don't have a lazy DFA or a full DFA"
1559             );
1560             return Err(core);
1561         }
1562         if core.pre.as_ref().map_or(false, |p| p.is_fast()) {
1563             debug!(
1564                 "skipping reverse inner optimization because \
1565 				 we already have a prefilter that we think is fast"
1566             );
1567             return Err(core);
1568         } else if core.pre.is_some() {
1569             debug!(
1570                 "core engine has a prefix prefilter, but it is \
1571                  probably not fast, so continuing with attempt to \
1572                  use reverse inner prefilter"
1573             );
1574         }
1575         let (concat_prefix, preinner) = match reverse_inner::extract(hirs) {
1576             Some(x) => x,
1577             // N.B. the 'extract' function emits debug messages explaining
1578             // why we bailed out here.
1579             None => return Err(core),
1580         };
1581         debug!("building reverse NFA for prefix before inner literal");
1582         let mut lookm = LookMatcher::new();
1583         lookm.set_line_terminator(core.info.config().get_line_terminator());
1584         let thompson_config = thompson::Config::new()
1585             .reverse(true)
1586             .utf8(core.info.config().get_utf8_empty())
1587             .nfa_size_limit(core.info.config().get_nfa_size_limit())
1588             .shrink(false)
1589             .which_captures(WhichCaptures::None)
1590             .look_matcher(lookm);
1591         let result = thompson::Compiler::new()
1592             .configure(thompson_config)
1593             .build_from_hir(&concat_prefix);
1594         let nfarev = match result {
1595             Ok(nfarev) => nfarev,
1596             Err(_err) => {
1597                 debug!(
1598                     "skipping reverse inner optimization because the \
1599 					 reverse NFA failed to build: {}",
1600                     _err,
1601                 );
1602                 return Err(core);
1603             }
1604         };
1605         debug!("building reverse DFA for prefix before inner literal");
1606         let dfa = if !core.info.config().get_dfa() {
1607             wrappers::ReverseDFA::none()
1608         } else {
1609             wrappers::ReverseDFA::new(&core.info, &nfarev)
1610         };
1611         let hybrid = if !core.info.config().get_hybrid() {
1612             wrappers::ReverseHybrid::none()
1613         } else if dfa.is_some() {
1614             debug!(
1615                 "skipping lazy DFA for reverse inner optimization \
1616 				 because we have a full DFA"
1617             );
1618             wrappers::ReverseHybrid::none()
1619         } else {
1620             wrappers::ReverseHybrid::new(&core.info, &nfarev)
1621         };
1622         Ok(ReverseInner { core, preinner, nfarev, hybrid, dfa })
1623     }
1624 
1625     #[cfg_attr(feature = "perf-inline", inline(always))]
try_search_full( &self, cache: &mut Cache, input: &Input<'_>, ) -> Result<Option<Match>, RetryError>1626     fn try_search_full(
1627         &self,
1628         cache: &mut Cache,
1629         input: &Input<'_>,
1630     ) -> Result<Option<Match>, RetryError> {
1631         let mut span = input.get_span();
1632         let mut min_match_start = 0;
1633         let mut min_pre_start = 0;
1634         loop {
1635             let litmatch = match self.preinner.find(input.haystack(), span) {
1636                 None => return Ok(None),
1637                 Some(span) => span,
1638             };
1639             if litmatch.start < min_pre_start {
1640                 trace!(
1641                     "found inner prefilter match at {:?}, which starts \
1642 					 before the end of the last forward scan at {}, \
1643 					 quitting to avoid quadratic behavior",
1644                     litmatch,
1645                     min_pre_start,
1646                 );
1647                 return Err(RetryError::Quadratic(RetryQuadraticError::new()));
1648             }
1649             trace!("reverse inner scan found inner match at {:?}", litmatch);
1650             let revinput = input
1651                 .clone()
1652                 .anchored(Anchored::Yes)
1653                 .span(input.start()..litmatch.start);
1654             // Note that in addition to the literal search above scanning past
1655             // our minimum start point, this routine can also return an error
1656             // as a result of detecting possible quadratic behavior if the
1657             // reverse scan goes past the minimum start point. That is, the
1658             // literal search might not, but the reverse regex search for the
1659             // prefix might!
1660             match self.try_search_half_rev_limited(
1661                 cache,
1662                 &revinput,
1663                 min_match_start,
1664             )? {
1665                 None => {
1666                     if span.start >= span.end {
1667                         break;
1668                     }
1669                     span.start = litmatch.start.checked_add(1).unwrap();
1670                 }
1671                 Some(hm_start) => {
1672                     let fwdinput = input
1673                         .clone()
1674                         .anchored(Anchored::Pattern(hm_start.pattern()))
1675                         .span(hm_start.offset()..input.end());
1676                     match self.try_search_half_fwd_stopat(cache, &fwdinput)? {
1677                         Err(stopat) => {
1678                             min_pre_start = stopat;
1679                             span.start =
1680                                 litmatch.start.checked_add(1).unwrap();
1681                         }
1682                         Ok(hm_end) => {
1683                             return Ok(Some(Match::new(
1684                                 hm_start.pattern(),
1685                                 hm_start.offset()..hm_end.offset(),
1686                             )))
1687                         }
1688                     }
1689                 }
1690             }
1691             min_match_start = litmatch.end;
1692         }
1693         Ok(None)
1694     }
1695 
1696     #[cfg_attr(feature = "perf-inline", inline(always))]
try_search_half_fwd_stopat( &self, cache: &mut Cache, input: &Input<'_>, ) -> Result<Result<HalfMatch, usize>, RetryFailError>1697     fn try_search_half_fwd_stopat(
1698         &self,
1699         cache: &mut Cache,
1700         input: &Input<'_>,
1701     ) -> Result<Result<HalfMatch, usize>, RetryFailError> {
1702         if let Some(e) = self.core.dfa.get(&input) {
1703             trace!(
1704                 "using full DFA for forward reverse inner search at {:?}",
1705                 input.get_span()
1706             );
1707             e.try_search_half_fwd_stopat(&input)
1708         } else if let Some(e) = self.core.hybrid.get(&input) {
1709             trace!(
1710                 "using lazy DFA for forward reverse inner search at {:?}",
1711                 input.get_span()
1712             );
1713             e.try_search_half_fwd_stopat(&mut cache.hybrid, &input)
1714         } else {
1715             unreachable!("ReverseInner always has a DFA")
1716         }
1717     }
1718 
1719     #[cfg_attr(feature = "perf-inline", inline(always))]
try_search_half_rev_limited( &self, cache: &mut Cache, input: &Input<'_>, min_start: usize, ) -> Result<Option<HalfMatch>, RetryError>1720     fn try_search_half_rev_limited(
1721         &self,
1722         cache: &mut Cache,
1723         input: &Input<'_>,
1724         min_start: usize,
1725     ) -> Result<Option<HalfMatch>, RetryError> {
1726         if let Some(e) = self.dfa.get(&input) {
1727             trace!(
1728                 "using full DFA for reverse inner search at {:?}, \
1729                  but will be stopped at {} to avoid quadratic behavior",
1730                 input.get_span(),
1731                 min_start,
1732             );
1733             e.try_search_half_rev_limited(&input, min_start)
1734         } else if let Some(e) = self.hybrid.get(&input) {
1735             trace!(
1736                 "using lazy DFA for reverse inner search at {:?}, \
1737                  but will be stopped at {} to avoid quadratic behavior",
1738                 input.get_span(),
1739                 min_start,
1740             );
1741             e.try_search_half_rev_limited(
1742                 &mut cache.revhybrid,
1743                 &input,
1744                 min_start,
1745             )
1746         } else {
1747             unreachable!("ReverseInner always has a DFA")
1748         }
1749     }
1750 }
1751 
1752 impl Strategy for ReverseInner {
1753     #[cfg_attr(feature = "perf-inline", inline(always))]
group_info(&self) -> &GroupInfo1754     fn group_info(&self) -> &GroupInfo {
1755         self.core.group_info()
1756     }
1757 
1758     #[cfg_attr(feature = "perf-inline", inline(always))]
create_cache(&self) -> Cache1759     fn create_cache(&self) -> Cache {
1760         let mut cache = self.core.create_cache();
1761         cache.revhybrid = self.hybrid.create_cache();
1762         cache
1763     }
1764 
1765     #[cfg_attr(feature = "perf-inline", inline(always))]
reset_cache(&self, cache: &mut Cache)1766     fn reset_cache(&self, cache: &mut Cache) {
1767         self.core.reset_cache(cache);
1768         cache.revhybrid.reset(&self.hybrid);
1769     }
1770 
is_accelerated(&self) -> bool1771     fn is_accelerated(&self) -> bool {
1772         self.preinner.is_fast()
1773     }
1774 
memory_usage(&self) -> usize1775     fn memory_usage(&self) -> usize {
1776         self.core.memory_usage()
1777             + self.preinner.memory_usage()
1778             + self.nfarev.memory_usage()
1779             + self.dfa.memory_usage()
1780     }
1781 
1782     #[cfg_attr(feature = "perf-inline", inline(always))]
search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>1783     fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1784         if input.get_anchored().is_anchored() {
1785             return self.core.search(cache, input);
1786         }
1787         match self.try_search_full(cache, input) {
1788             Err(RetryError::Quadratic(_err)) => {
1789                 trace!("reverse inner optimization failed: {}", _err);
1790                 self.core.search(cache, input)
1791             }
1792             Err(RetryError::Fail(_err)) => {
1793                 trace!("reverse inner fast search failed: {}", _err);
1794                 self.core.search_nofail(cache, input)
1795             }
1796             Ok(matornot) => matornot,
1797         }
1798     }
1799 
1800     #[cfg_attr(feature = "perf-inline", inline(always))]
search_half( &self, cache: &mut Cache, input: &Input<'_>, ) -> Option<HalfMatch>1801     fn search_half(
1802         &self,
1803         cache: &mut Cache,
1804         input: &Input<'_>,
1805     ) -> Option<HalfMatch> {
1806         if input.get_anchored().is_anchored() {
1807             return self.core.search_half(cache, input);
1808         }
1809         match self.try_search_full(cache, input) {
1810             Err(RetryError::Quadratic(_err)) => {
1811                 trace!("reverse inner half optimization failed: {}", _err);
1812                 self.core.search_half(cache, input)
1813             }
1814             Err(RetryError::Fail(_err)) => {
1815                 trace!("reverse inner fast half search failed: {}", _err);
1816                 self.core.search_half_nofail(cache, input)
1817             }
1818             Ok(None) => None,
1819             Ok(Some(m)) => Some(HalfMatch::new(m.pattern(), m.end())),
1820         }
1821     }
1822 
1823     #[cfg_attr(feature = "perf-inline", inline(always))]
is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool1824     fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1825         if input.get_anchored().is_anchored() {
1826             return self.core.is_match(cache, input);
1827         }
1828         match self.try_search_full(cache, input) {
1829             Err(RetryError::Quadratic(_err)) => {
1830                 trace!("reverse inner half optimization failed: {}", _err);
1831                 self.core.is_match_nofail(cache, input)
1832             }
1833             Err(RetryError::Fail(_err)) => {
1834                 trace!("reverse inner fast half search failed: {}", _err);
1835                 self.core.is_match_nofail(cache, input)
1836             }
1837             Ok(None) => false,
1838             Ok(Some(_)) => true,
1839         }
1840     }
1841 
1842     #[cfg_attr(feature = "perf-inline", inline(always))]
search_slots( &self, cache: &mut Cache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>1843     fn search_slots(
1844         &self,
1845         cache: &mut Cache,
1846         input: &Input<'_>,
1847         slots: &mut [Option<NonMaxUsize>],
1848     ) -> Option<PatternID> {
1849         if input.get_anchored().is_anchored() {
1850             return self.core.search_slots(cache, input, slots);
1851         }
1852         if !self.core.is_capture_search_needed(slots.len()) {
1853             trace!("asked for slots unnecessarily, trying fast path");
1854             let m = self.search(cache, input)?;
1855             copy_match_to_slots(m, slots);
1856             return Some(m.pattern());
1857         }
1858         let m = match self.try_search_full(cache, input) {
1859             Err(RetryError::Quadratic(_err)) => {
1860                 trace!("reverse inner captures optimization failed: {}", _err);
1861                 return self.core.search_slots(cache, input, slots);
1862             }
1863             Err(RetryError::Fail(_err)) => {
1864                 trace!("reverse inner fast captures search failed: {}", _err);
1865                 return self.core.search_slots_nofail(cache, input, slots);
1866             }
1867             Ok(None) => return None,
1868             Ok(Some(m)) => m,
1869         };
1870         trace!(
1871             "match found at {}..{} in capture search, \
1872 		  	 using another engine to find captures",
1873             m.start(),
1874             m.end(),
1875         );
1876         let input = input
1877             .clone()
1878             .span(m.start()..m.end())
1879             .anchored(Anchored::Pattern(m.pattern()));
1880         self.core.search_slots_nofail(cache, &input, slots)
1881     }
1882 
1883     #[cfg_attr(feature = "perf-inline", inline(always))]
which_overlapping_matches( &self, cache: &mut Cache, input: &Input<'_>, patset: &mut PatternSet, )1884     fn which_overlapping_matches(
1885         &self,
1886         cache: &mut Cache,
1887         input: &Input<'_>,
1888         patset: &mut PatternSet,
1889     ) {
1890         self.core.which_overlapping_matches(cache, input, patset)
1891     }
1892 }
1893 
1894 /// Copies the offsets in the given match to the corresponding positions in
1895 /// `slots`.
1896 ///
1897 /// In effect, this sets the slots corresponding to the implicit group for the
1898 /// pattern in the given match. If the indices for the corresponding slots do
1899 /// not exist, then no slots are set.
1900 ///
1901 /// This is useful when the caller provides slots (or captures), but you use a
1902 /// regex engine that doesn't operate on slots (like a lazy DFA). This function
1903 /// lets you map the match you get back to the slots provided by the caller.
1904 #[cfg_attr(feature = "perf-inline", inline(always))]
copy_match_to_slots(m: Match, slots: &mut [Option<NonMaxUsize>])1905 fn copy_match_to_slots(m: Match, slots: &mut [Option<NonMaxUsize>]) {
1906     let slot_start = m.pattern().as_usize() * 2;
1907     let slot_end = slot_start + 1;
1908     if let Some(slot) = slots.get_mut(slot_start) {
1909         *slot = NonMaxUsize::new(m.start());
1910     }
1911     if let Some(slot) = slots.get_mut(slot_end) {
1912         *slot = NonMaxUsize::new(m.end());
1913     }
1914 }
1915