1 /*! 2 This module contains a boat load of wrappers around each of our internal regex 3 engines. They encapsulate a few things: 4 5 1. The wrappers manage the conditional existence of the regex engine. Namely, 6 the PikeVM is the only required regex engine. The rest are optional. These 7 wrappers present a uniform API regardless of which engines are available. And 8 availability might be determined by compile time features or by dynamic 9 configuration via `meta::Config`. Encapsulating the conditional compilation 10 features is in particular a huge simplification for the higher level code that 11 composes these engines. 12 2. The wrappers manage construction of each engine, including skipping it if 13 the engine is unavailable or configured to not be used. 14 3. The wrappers manage whether an engine *can* be used for a particular 15 search configuration. For example, `BoundedBacktracker::get` only returns a 16 backtracking engine when the haystack is bigger than the maximum supported 17 length. The wrappers also sometimes take a position on when an engine *ought* 18 to be used, but only in cases where the logic is extremely local to the engine 19 itself. Otherwise, things like "choose between the backtracker and the one-pass 20 DFA" are managed by the higher level meta strategy code. 21 22 There are also corresponding wrappers for the various `Cache` types for each 23 regex engine that needs them. If an engine is unavailable or not used, then a 24 cache for it will *not* actually be allocated. 25 */ 26 27 use alloc::vec::Vec; 28 29 use crate::{ 30 meta::{ 31 error::{BuildError, RetryError, RetryFailError}, 32 regex::RegexInfo, 33 }, 34 nfa::thompson::{pikevm, NFA}, 35 util::{prefilter::Prefilter, primitives::NonMaxUsize}, 36 HalfMatch, Input, Match, MatchKind, PatternID, PatternSet, 37 }; 38 39 #[cfg(feature = "dfa-build")] 40 use crate::dfa; 41 #[cfg(feature = "dfa-onepass")] 42 use crate::dfa::onepass; 43 #[cfg(feature = "hybrid")] 44 use crate::hybrid; 45 #[cfg(feature = "nfa-backtrack")] 46 use crate::nfa::thompson::backtrack; 47 48 #[derive(Debug)] 49 pub(crate) struct PikeVM(PikeVMEngine); 50 51 impl PikeVM { new( info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, ) -> Result<PikeVM, BuildError>52 pub(crate) fn new( 53 info: &RegexInfo, 54 pre: Option<Prefilter>, 55 nfa: &NFA, 56 ) -> Result<PikeVM, BuildError> { 57 PikeVMEngine::new(info, pre, nfa).map(PikeVM) 58 } 59 create_cache(&self) -> PikeVMCache60 pub(crate) fn create_cache(&self) -> PikeVMCache { 61 PikeVMCache::new(self) 62 } 63 64 #[cfg_attr(feature = "perf-inline", inline(always))] get(&self) -> &PikeVMEngine65 pub(crate) fn get(&self) -> &PikeVMEngine { 66 &self.0 67 } 68 } 69 70 #[derive(Debug)] 71 pub(crate) struct PikeVMEngine(pikevm::PikeVM); 72 73 impl PikeVMEngine { new( info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, ) -> Result<PikeVMEngine, BuildError>74 pub(crate) fn new( 75 info: &RegexInfo, 76 pre: Option<Prefilter>, 77 nfa: &NFA, 78 ) -> Result<PikeVMEngine, BuildError> { 79 let pikevm_config = pikevm::Config::new() 80 .match_kind(info.config().get_match_kind()) 81 .prefilter(pre); 82 let engine = pikevm::Builder::new() 83 .configure(pikevm_config) 84 .build_from_nfa(nfa.clone()) 85 .map_err(BuildError::nfa)?; 86 debug!("PikeVM built"); 87 Ok(PikeVMEngine(engine)) 88 } 89 90 #[cfg_attr(feature = "perf-inline", inline(always))] is_match( &self, cache: &mut PikeVMCache, input: &Input<'_>, ) -> bool91 pub(crate) fn is_match( 92 &self, 93 cache: &mut PikeVMCache, 94 input: &Input<'_>, 95 ) -> bool { 96 self.0.is_match(cache.0.as_mut().unwrap(), input.clone()) 97 } 98 99 #[cfg_attr(feature = "perf-inline", inline(always))] search_slots( &self, cache: &mut PikeVMCache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>100 pub(crate) fn search_slots( 101 &self, 102 cache: &mut PikeVMCache, 103 input: &Input<'_>, 104 slots: &mut [Option<NonMaxUsize>], 105 ) -> Option<PatternID> { 106 self.0.search_slots(cache.0.as_mut().unwrap(), input, slots) 107 } 108 109 #[cfg_attr(feature = "perf-inline", inline(always))] which_overlapping_matches( &self, cache: &mut PikeVMCache, input: &Input<'_>, patset: &mut PatternSet, )110 pub(crate) fn which_overlapping_matches( 111 &self, 112 cache: &mut PikeVMCache, 113 input: &Input<'_>, 114 patset: &mut PatternSet, 115 ) { 116 self.0.which_overlapping_matches( 117 cache.0.as_mut().unwrap(), 118 input, 119 patset, 120 ) 121 } 122 } 123 124 #[derive(Clone, Debug)] 125 pub(crate) struct PikeVMCache(Option<pikevm::Cache>); 126 127 impl PikeVMCache { none() -> PikeVMCache128 pub(crate) fn none() -> PikeVMCache { 129 PikeVMCache(None) 130 } 131 new(builder: &PikeVM) -> PikeVMCache132 pub(crate) fn new(builder: &PikeVM) -> PikeVMCache { 133 PikeVMCache(Some(builder.get().0.create_cache())) 134 } 135 reset(&mut self, builder: &PikeVM)136 pub(crate) fn reset(&mut self, builder: &PikeVM) { 137 self.0.as_mut().unwrap().reset(&builder.get().0); 138 } 139 memory_usage(&self) -> usize140 pub(crate) fn memory_usage(&self) -> usize { 141 self.0.as_ref().map_or(0, |c| c.memory_usage()) 142 } 143 } 144 145 #[derive(Debug)] 146 pub(crate) struct BoundedBacktracker(Option<BoundedBacktrackerEngine>); 147 148 impl BoundedBacktracker { new( info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, ) -> Result<BoundedBacktracker, BuildError>149 pub(crate) fn new( 150 info: &RegexInfo, 151 pre: Option<Prefilter>, 152 nfa: &NFA, 153 ) -> Result<BoundedBacktracker, BuildError> { 154 BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker) 155 } 156 create_cache(&self) -> BoundedBacktrackerCache157 pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache { 158 BoundedBacktrackerCache::new(self) 159 } 160 161 #[cfg_attr(feature = "perf-inline", inline(always))] get( &self, input: &Input<'_>, ) -> Option<&BoundedBacktrackerEngine>162 pub(crate) fn get( 163 &self, 164 input: &Input<'_>, 165 ) -> Option<&BoundedBacktrackerEngine> { 166 let engine = self.0.as_ref()?; 167 // It is difficult to make the backtracker give up early if it is 168 // guaranteed to eventually wind up in a match state. This is because 169 // of the greedy nature of a backtracker: it just blindly mushes 170 // forward. Every other regex engine is able to give up more quickly, 171 // so even if the backtracker might be able to zip through faster than 172 // (say) the PikeVM, we prefer the theoretical benefit that some other 173 // engine might be able to scan much less of the haystack than the 174 // backtracker. 175 // 176 // Now, if the haystack is really short already, then we allow the 177 // backtracker to run. (This hasn't been litigated quantitatively with 178 // benchmarks. Just a hunch.) 179 if input.get_earliest() && input.haystack().len() > 128 { 180 return None; 181 } 182 // If the backtracker is just going to return an error because the 183 // haystack is too long, then obviously do not use it. 184 if input.get_span().len() > engine.max_haystack_len() { 185 return None; 186 } 187 Some(engine) 188 } 189 } 190 191 #[derive(Debug)] 192 pub(crate) struct BoundedBacktrackerEngine( 193 #[cfg(feature = "nfa-backtrack")] backtrack::BoundedBacktracker, 194 #[cfg(not(feature = "nfa-backtrack"))] (), 195 ); 196 197 impl BoundedBacktrackerEngine { new( info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, ) -> Result<Option<BoundedBacktrackerEngine>, BuildError>198 pub(crate) fn new( 199 info: &RegexInfo, 200 pre: Option<Prefilter>, 201 nfa: &NFA, 202 ) -> Result<Option<BoundedBacktrackerEngine>, BuildError> { 203 #[cfg(feature = "nfa-backtrack")] 204 { 205 if !info.config().get_backtrack() 206 || info.config().get_match_kind() != MatchKind::LeftmostFirst 207 { 208 return Ok(None); 209 } 210 let backtrack_config = backtrack::Config::new().prefilter(pre); 211 let engine = backtrack::Builder::new() 212 .configure(backtrack_config) 213 .build_from_nfa(nfa.clone()) 214 .map_err(BuildError::nfa)?; 215 debug!( 216 "BoundedBacktracker built (max haystack length: {:?})", 217 engine.max_haystack_len() 218 ); 219 Ok(Some(BoundedBacktrackerEngine(engine))) 220 } 221 #[cfg(not(feature = "nfa-backtrack"))] 222 { 223 Ok(None) 224 } 225 } 226 227 #[cfg_attr(feature = "perf-inline", inline(always))] is_match( &self, cache: &mut BoundedBacktrackerCache, input: &Input<'_>, ) -> bool228 pub(crate) fn is_match( 229 &self, 230 cache: &mut BoundedBacktrackerCache, 231 input: &Input<'_>, 232 ) -> bool { 233 #[cfg(feature = "nfa-backtrack")] 234 { 235 // OK because we only permit access to this engine when we know 236 // the haystack is short enough for the backtracker to run without 237 // reporting an error. 238 self.0 239 .try_is_match(cache.0.as_mut().unwrap(), input.clone()) 240 .unwrap() 241 } 242 #[cfg(not(feature = "nfa-backtrack"))] 243 { 244 // Impossible to reach because this engine is never constructed 245 // if the requisite features aren't enabled. 246 unreachable!() 247 } 248 } 249 250 #[cfg_attr(feature = "perf-inline", inline(always))] search_slots( &self, cache: &mut BoundedBacktrackerCache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>251 pub(crate) fn search_slots( 252 &self, 253 cache: &mut BoundedBacktrackerCache, 254 input: &Input<'_>, 255 slots: &mut [Option<NonMaxUsize>], 256 ) -> Option<PatternID> { 257 #[cfg(feature = "nfa-backtrack")] 258 { 259 // OK because we only permit access to this engine when we know 260 // the haystack is short enough for the backtracker to run without 261 // reporting an error. 262 self.0 263 .try_search_slots(cache.0.as_mut().unwrap(), input, slots) 264 .unwrap() 265 } 266 #[cfg(not(feature = "nfa-backtrack"))] 267 { 268 // Impossible to reach because this engine is never constructed 269 // if the requisite features aren't enabled. 270 unreachable!() 271 } 272 } 273 274 #[cfg_attr(feature = "perf-inline", inline(always))] max_haystack_len(&self) -> usize275 fn max_haystack_len(&self) -> usize { 276 #[cfg(feature = "nfa-backtrack")] 277 { 278 self.0.max_haystack_len() 279 } 280 #[cfg(not(feature = "nfa-backtrack"))] 281 { 282 // Impossible to reach because this engine is never constructed 283 // if the requisite features aren't enabled. 284 unreachable!() 285 } 286 } 287 } 288 289 #[derive(Clone, Debug)] 290 pub(crate) struct BoundedBacktrackerCache( 291 #[cfg(feature = "nfa-backtrack")] Option<backtrack::Cache>, 292 #[cfg(not(feature = "nfa-backtrack"))] (), 293 ); 294 295 impl BoundedBacktrackerCache { none() -> BoundedBacktrackerCache296 pub(crate) fn none() -> BoundedBacktrackerCache { 297 #[cfg(feature = "nfa-backtrack")] 298 { 299 BoundedBacktrackerCache(None) 300 } 301 #[cfg(not(feature = "nfa-backtrack"))] 302 { 303 BoundedBacktrackerCache(()) 304 } 305 } 306 new( builder: &BoundedBacktracker, ) -> BoundedBacktrackerCache307 pub(crate) fn new( 308 builder: &BoundedBacktracker, 309 ) -> BoundedBacktrackerCache { 310 #[cfg(feature = "nfa-backtrack")] 311 { 312 BoundedBacktrackerCache( 313 builder.0.as_ref().map(|e| e.0.create_cache()), 314 ) 315 } 316 #[cfg(not(feature = "nfa-backtrack"))] 317 { 318 BoundedBacktrackerCache(()) 319 } 320 } 321 reset(&mut self, builder: &BoundedBacktracker)322 pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) { 323 #[cfg(feature = "nfa-backtrack")] 324 if let Some(ref e) = builder.0 { 325 self.0.as_mut().unwrap().reset(&e.0); 326 } 327 } 328 memory_usage(&self) -> usize329 pub(crate) fn memory_usage(&self) -> usize { 330 #[cfg(feature = "nfa-backtrack")] 331 { 332 self.0.as_ref().map_or(0, |c| c.memory_usage()) 333 } 334 #[cfg(not(feature = "nfa-backtrack"))] 335 { 336 0 337 } 338 } 339 } 340 341 #[derive(Debug)] 342 pub(crate) struct OnePass(Option<OnePassEngine>); 343 344 impl OnePass { new(info: &RegexInfo, nfa: &NFA) -> OnePass345 pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass { 346 OnePass(OnePassEngine::new(info, nfa)) 347 } 348 create_cache(&self) -> OnePassCache349 pub(crate) fn create_cache(&self) -> OnePassCache { 350 OnePassCache::new(self) 351 } 352 353 #[cfg_attr(feature = "perf-inline", inline(always))] get(&self, input: &Input<'_>) -> Option<&OnePassEngine>354 pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> { 355 let engine = self.0.as_ref()?; 356 if !input.get_anchored().is_anchored() 357 && !engine.get_nfa().is_always_start_anchored() 358 { 359 return None; 360 } 361 Some(engine) 362 } 363 memory_usage(&self) -> usize364 pub(crate) fn memory_usage(&self) -> usize { 365 self.0.as_ref().map_or(0, |e| e.memory_usage()) 366 } 367 } 368 369 #[derive(Debug)] 370 pub(crate) struct OnePassEngine( 371 #[cfg(feature = "dfa-onepass")] onepass::DFA, 372 #[cfg(not(feature = "dfa-onepass"))] (), 373 ); 374 375 impl OnePassEngine { new(info: &RegexInfo, nfa: &NFA) -> Option<OnePassEngine>376 pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option<OnePassEngine> { 377 #[cfg(feature = "dfa-onepass")] 378 { 379 if !info.config().get_onepass() { 380 return None; 381 } 382 // In order to even attempt building a one-pass DFA, we require 383 // that we either have at least one explicit capturing group or 384 // there's a Unicode word boundary somewhere. If we don't have 385 // either of these things, then the lazy DFA will almost certainly 386 // be useable and be much faster. The only case where it might 387 // not is if the lazy DFA isn't utilizing its cache effectively, 388 // but in those cases, the underlying regex is almost certainly 389 // not one-pass or is too big to fit within the current one-pass 390 // implementation limits. 391 if info.props_union().explicit_captures_len() == 0 392 && !info.props_union().look_set().contains_word_unicode() 393 { 394 debug!("not building OnePass because it isn't worth it"); 395 return None; 396 } 397 let onepass_config = onepass::Config::new() 398 .match_kind(info.config().get_match_kind()) 399 // Like for the lazy DFA, we unconditionally enable this 400 // because it doesn't cost much and makes the API more 401 // flexible. 402 .starts_for_each_pattern(true) 403 .byte_classes(info.config().get_byte_classes()) 404 .size_limit(info.config().get_onepass_size_limit()); 405 let result = onepass::Builder::new() 406 .configure(onepass_config) 407 .build_from_nfa(nfa.clone()); 408 let engine = match result { 409 Ok(engine) => engine, 410 Err(_err) => { 411 debug!("OnePass failed to build: {}", _err); 412 return None; 413 } 414 }; 415 debug!("OnePass built, {} bytes", engine.memory_usage()); 416 Some(OnePassEngine(engine)) 417 } 418 #[cfg(not(feature = "dfa-onepass"))] 419 { 420 None 421 } 422 } 423 424 #[cfg_attr(feature = "perf-inline", inline(always))] search_slots( &self, cache: &mut OnePassCache, input: &Input<'_>, slots: &mut [Option<NonMaxUsize>], ) -> Option<PatternID>425 pub(crate) fn search_slots( 426 &self, 427 cache: &mut OnePassCache, 428 input: &Input<'_>, 429 slots: &mut [Option<NonMaxUsize>], 430 ) -> Option<PatternID> { 431 #[cfg(feature = "dfa-onepass")] 432 { 433 // OK because we only permit getting a OnePassEngine when we know 434 // the search is anchored and thus an error cannot occur. 435 self.0 436 .try_search_slots(cache.0.as_mut().unwrap(), input, slots) 437 .unwrap() 438 } 439 #[cfg(not(feature = "dfa-onepass"))] 440 { 441 // Impossible to reach because this engine is never constructed 442 // if the requisite features aren't enabled. 443 unreachable!() 444 } 445 } 446 memory_usage(&self) -> usize447 pub(crate) fn memory_usage(&self) -> usize { 448 #[cfg(feature = "dfa-onepass")] 449 { 450 self.0.memory_usage() 451 } 452 #[cfg(not(feature = "dfa-onepass"))] 453 { 454 // Impossible to reach because this engine is never constructed 455 // if the requisite features aren't enabled. 456 unreachable!() 457 } 458 } 459 460 #[cfg_attr(feature = "perf-inline", inline(always))] get_nfa(&self) -> &NFA461 fn get_nfa(&self) -> &NFA { 462 #[cfg(feature = "dfa-onepass")] 463 { 464 self.0.get_nfa() 465 } 466 #[cfg(not(feature = "dfa-onepass"))] 467 { 468 // Impossible to reach because this engine is never constructed 469 // if the requisite features aren't enabled. 470 unreachable!() 471 } 472 } 473 } 474 475 #[derive(Clone, Debug)] 476 pub(crate) struct OnePassCache( 477 #[cfg(feature = "dfa-onepass")] Option<onepass::Cache>, 478 #[cfg(not(feature = "dfa-onepass"))] (), 479 ); 480 481 impl OnePassCache { none() -> OnePassCache482 pub(crate) fn none() -> OnePassCache { 483 #[cfg(feature = "dfa-onepass")] 484 { 485 OnePassCache(None) 486 } 487 #[cfg(not(feature = "dfa-onepass"))] 488 { 489 OnePassCache(()) 490 } 491 } 492 new(builder: &OnePass) -> OnePassCache493 pub(crate) fn new(builder: &OnePass) -> OnePassCache { 494 #[cfg(feature = "dfa-onepass")] 495 { 496 OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache())) 497 } 498 #[cfg(not(feature = "dfa-onepass"))] 499 { 500 OnePassCache(()) 501 } 502 } 503 reset(&mut self, builder: &OnePass)504 pub(crate) fn reset(&mut self, builder: &OnePass) { 505 #[cfg(feature = "dfa-onepass")] 506 if let Some(ref e) = builder.0 { 507 self.0.as_mut().unwrap().reset(&e.0); 508 } 509 } 510 memory_usage(&self) -> usize511 pub(crate) fn memory_usage(&self) -> usize { 512 #[cfg(feature = "dfa-onepass")] 513 { 514 self.0.as_ref().map_or(0, |c| c.memory_usage()) 515 } 516 #[cfg(not(feature = "dfa-onepass"))] 517 { 518 0 519 } 520 } 521 } 522 523 #[derive(Debug)] 524 pub(crate) struct Hybrid(Option<HybridEngine>); 525 526 impl Hybrid { none() -> Hybrid527 pub(crate) fn none() -> Hybrid { 528 Hybrid(None) 529 } 530 new( info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, nfarev: &NFA, ) -> Hybrid531 pub(crate) fn new( 532 info: &RegexInfo, 533 pre: Option<Prefilter>, 534 nfa: &NFA, 535 nfarev: &NFA, 536 ) -> Hybrid { 537 Hybrid(HybridEngine::new(info, pre, nfa, nfarev)) 538 } 539 create_cache(&self) -> HybridCache540 pub(crate) fn create_cache(&self) -> HybridCache { 541 HybridCache::new(self) 542 } 543 544 #[cfg_attr(feature = "perf-inline", inline(always))] get(&self, _input: &Input<'_>) -> Option<&HybridEngine>545 pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> { 546 let engine = self.0.as_ref()?; 547 Some(engine) 548 } 549 is_some(&self) -> bool550 pub(crate) fn is_some(&self) -> bool { 551 self.0.is_some() 552 } 553 } 554 555 #[derive(Debug)] 556 pub(crate) struct HybridEngine( 557 #[cfg(feature = "hybrid")] hybrid::regex::Regex, 558 #[cfg(not(feature = "hybrid"))] (), 559 ); 560 561 impl HybridEngine { new( info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, nfarev: &NFA, ) -> Option<HybridEngine>562 pub(crate) fn new( 563 info: &RegexInfo, 564 pre: Option<Prefilter>, 565 nfa: &NFA, 566 nfarev: &NFA, 567 ) -> Option<HybridEngine> { 568 #[cfg(feature = "hybrid")] 569 { 570 if !info.config().get_hybrid() { 571 return None; 572 } 573 let dfa_config = hybrid::dfa::Config::new() 574 .match_kind(info.config().get_match_kind()) 575 .prefilter(pre.clone()) 576 // Enabling this is necessary for ensuring we can service any 577 // kind of 'Input' search without error. For the lazy DFA, 578 // this is not particularly costly, since the start states are 579 // generated lazily. 580 .starts_for_each_pattern(true) 581 .byte_classes(info.config().get_byte_classes()) 582 .unicode_word_boundary(true) 583 .specialize_start_states(pre.is_some()) 584 .cache_capacity(info.config().get_hybrid_cache_capacity()) 585 // This makes it possible for building a lazy DFA to 586 // fail even though the NFA has already been built. Namely, 587 // if the cache capacity is too small to fit some minimum 588 // number of states (which is small, like 4 or 5), then the 589 // DFA will refuse to build. 590 // 591 // We shouldn't enable this to make building always work, since 592 // this could cause the allocation of a cache bigger than the 593 // provided capacity amount. 594 // 595 // This is effectively the only reason why building a lazy DFA 596 // could fail. If it does, then we simply suppress the error 597 // and return None. 598 .skip_cache_capacity_check(false) 599 // This and enabling heuristic Unicode word boundary support 600 // above make it so the lazy DFA can quit at match time. 601 .minimum_cache_clear_count(Some(3)) 602 .minimum_bytes_per_state(Some(10)); 603 let result = hybrid::dfa::Builder::new() 604 .configure(dfa_config.clone()) 605 .build_from_nfa(nfa.clone()); 606 let fwd = match result { 607 Ok(fwd) => fwd, 608 Err(_err) => { 609 debug!("forward lazy DFA failed to build: {}", _err); 610 return None; 611 } 612 }; 613 let result = hybrid::dfa::Builder::new() 614 .configure( 615 dfa_config 616 .clone() 617 .match_kind(MatchKind::All) 618 .prefilter(None) 619 .specialize_start_states(false), 620 ) 621 .build_from_nfa(nfarev.clone()); 622 let rev = match result { 623 Ok(rev) => rev, 624 Err(_err) => { 625 debug!("reverse lazy DFA failed to build: {}", _err); 626 return None; 627 } 628 }; 629 let engine = 630 hybrid::regex::Builder::new().build_from_dfas(fwd, rev); 631 debug!("lazy DFA built"); 632 Some(HybridEngine(engine)) 633 } 634 #[cfg(not(feature = "hybrid"))] 635 { 636 None 637 } 638 } 639 640 #[cfg_attr(feature = "perf-inline", inline(always))] try_search( &self, cache: &mut HybridCache, input: &Input<'_>, ) -> Result<Option<Match>, RetryFailError>641 pub(crate) fn try_search( 642 &self, 643 cache: &mut HybridCache, 644 input: &Input<'_>, 645 ) -> Result<Option<Match>, RetryFailError> { 646 #[cfg(feature = "hybrid")] 647 { 648 let cache = cache.0.as_mut().unwrap(); 649 self.0.try_search(cache, input).map_err(|e| e.into()) 650 } 651 #[cfg(not(feature = "hybrid"))] 652 { 653 // Impossible to reach because this engine is never constructed 654 // if the requisite features aren't enabled. 655 unreachable!() 656 } 657 } 658 659 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_fwd( &self, cache: &mut HybridCache, input: &Input<'_>, ) -> Result<Option<HalfMatch>, RetryFailError>660 pub(crate) fn try_search_half_fwd( 661 &self, 662 cache: &mut HybridCache, 663 input: &Input<'_>, 664 ) -> Result<Option<HalfMatch>, RetryFailError> { 665 #[cfg(feature = "hybrid")] 666 { 667 let fwd = self.0.forward(); 668 let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; 669 fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into()) 670 } 671 #[cfg(not(feature = "hybrid"))] 672 { 673 // Impossible to reach because this engine is never constructed 674 // if the requisite features aren't enabled. 675 unreachable!() 676 } 677 } 678 679 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_fwd_stopat( &self, cache: &mut HybridCache, input: &Input<'_>, ) -> Result<Result<HalfMatch, usize>, RetryFailError>680 pub(crate) fn try_search_half_fwd_stopat( 681 &self, 682 cache: &mut HybridCache, 683 input: &Input<'_>, 684 ) -> Result<Result<HalfMatch, usize>, RetryFailError> { 685 #[cfg(feature = "hybrid")] 686 { 687 let dfa = self.0.forward(); 688 let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0; 689 crate::meta::stopat::hybrid_try_search_half_fwd( 690 dfa, &mut cache, input, 691 ) 692 } 693 #[cfg(not(feature = "hybrid"))] 694 { 695 // Impossible to reach because this engine is never constructed 696 // if the requisite features aren't enabled. 697 unreachable!() 698 } 699 } 700 701 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_rev( &self, cache: &mut HybridCache, input: &Input<'_>, ) -> Result<Option<HalfMatch>, RetryFailError>702 pub(crate) fn try_search_half_rev( 703 &self, 704 cache: &mut HybridCache, 705 input: &Input<'_>, 706 ) -> Result<Option<HalfMatch>, RetryFailError> { 707 #[cfg(feature = "hybrid")] 708 { 709 let rev = self.0.reverse(); 710 let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1; 711 rev.try_search_rev(&mut revcache, input).map_err(|e| e.into()) 712 } 713 #[cfg(not(feature = "hybrid"))] 714 { 715 // Impossible to reach because this engine is never constructed 716 // if the requisite features aren't enabled. 717 unreachable!() 718 } 719 } 720 721 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_rev_limited( &self, cache: &mut HybridCache, input: &Input<'_>, min_start: usize, ) -> Result<Option<HalfMatch>, RetryError>722 pub(crate) fn try_search_half_rev_limited( 723 &self, 724 cache: &mut HybridCache, 725 input: &Input<'_>, 726 min_start: usize, 727 ) -> Result<Option<HalfMatch>, RetryError> { 728 #[cfg(feature = "hybrid")] 729 { 730 let dfa = self.0.reverse(); 731 let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1; 732 crate::meta::limited::hybrid_try_search_half_rev( 733 dfa, &mut cache, input, min_start, 734 ) 735 } 736 #[cfg(not(feature = "hybrid"))] 737 { 738 // Impossible to reach because this engine is never constructed 739 // if the requisite features aren't enabled. 740 unreachable!() 741 } 742 } 743 744 #[inline] try_which_overlapping_matches( &self, cache: &mut HybridCache, input: &Input<'_>, patset: &mut PatternSet, ) -> Result<(), RetryFailError>745 pub(crate) fn try_which_overlapping_matches( 746 &self, 747 cache: &mut HybridCache, 748 input: &Input<'_>, 749 patset: &mut PatternSet, 750 ) -> Result<(), RetryFailError> { 751 #[cfg(feature = "hybrid")] 752 { 753 let fwd = self.0.forward(); 754 let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; 755 fwd.try_which_overlapping_matches(&mut fwdcache, input, patset) 756 .map_err(|e| e.into()) 757 } 758 #[cfg(not(feature = "hybrid"))] 759 { 760 // Impossible to reach because this engine is never constructed 761 // if the requisite features aren't enabled. 762 unreachable!() 763 } 764 } 765 } 766 767 #[derive(Clone, Debug)] 768 pub(crate) struct HybridCache( 769 #[cfg(feature = "hybrid")] Option<hybrid::regex::Cache>, 770 #[cfg(not(feature = "hybrid"))] (), 771 ); 772 773 impl HybridCache { none() -> HybridCache774 pub(crate) fn none() -> HybridCache { 775 #[cfg(feature = "hybrid")] 776 { 777 HybridCache(None) 778 } 779 #[cfg(not(feature = "hybrid"))] 780 { 781 HybridCache(()) 782 } 783 } 784 new(builder: &Hybrid) -> HybridCache785 pub(crate) fn new(builder: &Hybrid) -> HybridCache { 786 #[cfg(feature = "hybrid")] 787 { 788 HybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) 789 } 790 #[cfg(not(feature = "hybrid"))] 791 { 792 HybridCache(()) 793 } 794 } 795 reset(&mut self, builder: &Hybrid)796 pub(crate) fn reset(&mut self, builder: &Hybrid) { 797 #[cfg(feature = "hybrid")] 798 if let Some(ref e) = builder.0 { 799 self.0.as_mut().unwrap().reset(&e.0); 800 } 801 } 802 memory_usage(&self) -> usize803 pub(crate) fn memory_usage(&self) -> usize { 804 #[cfg(feature = "hybrid")] 805 { 806 self.0.as_ref().map_or(0, |c| c.memory_usage()) 807 } 808 #[cfg(not(feature = "hybrid"))] 809 { 810 0 811 } 812 } 813 } 814 815 #[derive(Debug)] 816 pub(crate) struct DFA(Option<DFAEngine>); 817 818 impl DFA { none() -> DFA819 pub(crate) fn none() -> DFA { 820 DFA(None) 821 } 822 new( info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, nfarev: &NFA, ) -> DFA823 pub(crate) fn new( 824 info: &RegexInfo, 825 pre: Option<Prefilter>, 826 nfa: &NFA, 827 nfarev: &NFA, 828 ) -> DFA { 829 DFA(DFAEngine::new(info, pre, nfa, nfarev)) 830 } 831 832 #[cfg_attr(feature = "perf-inline", inline(always))] get(&self, _input: &Input<'_>) -> Option<&DFAEngine>833 pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> { 834 let engine = self.0.as_ref()?; 835 Some(engine) 836 } 837 is_some(&self) -> bool838 pub(crate) fn is_some(&self) -> bool { 839 self.0.is_some() 840 } 841 memory_usage(&self) -> usize842 pub(crate) fn memory_usage(&self) -> usize { 843 self.0.as_ref().map_or(0, |e| e.memory_usage()) 844 } 845 } 846 847 #[derive(Debug)] 848 pub(crate) struct DFAEngine( 849 #[cfg(feature = "dfa-build")] dfa::regex::Regex, 850 #[cfg(not(feature = "dfa-build"))] (), 851 ); 852 853 impl DFAEngine { new( info: &RegexInfo, pre: Option<Prefilter>, nfa: &NFA, nfarev: &NFA, ) -> Option<DFAEngine>854 pub(crate) fn new( 855 info: &RegexInfo, 856 pre: Option<Prefilter>, 857 nfa: &NFA, 858 nfarev: &NFA, 859 ) -> Option<DFAEngine> { 860 #[cfg(feature = "dfa-build")] 861 { 862 if !info.config().get_dfa() { 863 return None; 864 } 865 // If our NFA is anything but small, don't even bother with a DFA. 866 if let Some(state_limit) = info.config().get_dfa_state_limit() { 867 if nfa.states().len() > state_limit { 868 debug!( 869 "skipping full DFA because NFA has {} states, \ 870 which exceeds the heuristic limit of {}", 871 nfa.states().len(), 872 state_limit, 873 ); 874 return None; 875 } 876 } 877 // We cut the size limit in four because the total heap used by 878 // DFA construction is determinization aux memory and the DFA 879 // itself, and those things are configured independently in the 880 // lower level DFA builder API. And then split that in two because 881 // of forward and reverse DFAs. 882 let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4); 883 let dfa_config = dfa::dense::Config::new() 884 .match_kind(info.config().get_match_kind()) 885 .prefilter(pre.clone()) 886 // Enabling this is necessary for ensuring we can service any 887 // kind of 'Input' search without error. For the full DFA, this 888 // can be quite costly. But since we have such a small bound 889 // on the size of the DFA, in practice, any multl-regexes are 890 // probably going to blow the limit anyway. 891 .starts_for_each_pattern(true) 892 .byte_classes(info.config().get_byte_classes()) 893 .unicode_word_boundary(true) 894 .specialize_start_states(pre.is_some()) 895 .determinize_size_limit(size_limit) 896 .dfa_size_limit(size_limit); 897 let result = dfa::dense::Builder::new() 898 .configure(dfa_config.clone()) 899 .build_from_nfa(&nfa); 900 let fwd = match result { 901 Ok(fwd) => fwd, 902 Err(_err) => { 903 debug!("forward full DFA failed to build: {}", _err); 904 return None; 905 } 906 }; 907 let result = dfa::dense::Builder::new() 908 .configure( 909 dfa_config 910 .clone() 911 // We never need unanchored reverse searches, so 912 // there's no point in building it into the DFA, which 913 // WILL take more space. (This isn't done for the lazy 914 // DFA because the DFA is, well, lazy. It doesn't pay 915 // the cost for supporting unanchored searches unless 916 // you actually do an unanchored search, which we 917 // don't.) 918 .start_kind(dfa::StartKind::Anchored) 919 .match_kind(MatchKind::All) 920 .prefilter(None) 921 .specialize_start_states(false), 922 ) 923 .build_from_nfa(&nfarev); 924 let rev = match result { 925 Ok(rev) => rev, 926 Err(_err) => { 927 debug!("reverse full DFA failed to build: {}", _err); 928 return None; 929 } 930 }; 931 let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev); 932 debug!( 933 "fully compiled forward and reverse DFAs built, {} bytes", 934 engine.forward().memory_usage() 935 + engine.reverse().memory_usage(), 936 ); 937 Some(DFAEngine(engine)) 938 } 939 #[cfg(not(feature = "dfa-build"))] 940 { 941 None 942 } 943 } 944 945 #[cfg_attr(feature = "perf-inline", inline(always))] try_search( &self, input: &Input<'_>, ) -> Result<Option<Match>, RetryFailError>946 pub(crate) fn try_search( 947 &self, 948 input: &Input<'_>, 949 ) -> Result<Option<Match>, RetryFailError> { 950 #[cfg(feature = "dfa-build")] 951 { 952 self.0.try_search(input).map_err(|e| e.into()) 953 } 954 #[cfg(not(feature = "dfa-build"))] 955 { 956 // Impossible to reach because this engine is never constructed 957 // if the requisite features aren't enabled. 958 unreachable!() 959 } 960 } 961 962 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_fwd( &self, input: &Input<'_>, ) -> Result<Option<HalfMatch>, RetryFailError>963 pub(crate) fn try_search_half_fwd( 964 &self, 965 input: &Input<'_>, 966 ) -> Result<Option<HalfMatch>, RetryFailError> { 967 #[cfg(feature = "dfa-build")] 968 { 969 use crate::dfa::Automaton; 970 self.0.forward().try_search_fwd(input).map_err(|e| e.into()) 971 } 972 #[cfg(not(feature = "dfa-build"))] 973 { 974 // Impossible to reach because this engine is never constructed 975 // if the requisite features aren't enabled. 976 unreachable!() 977 } 978 } 979 980 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_fwd_stopat( &self, input: &Input<'_>, ) -> Result<Result<HalfMatch, usize>, RetryFailError>981 pub(crate) fn try_search_half_fwd_stopat( 982 &self, 983 input: &Input<'_>, 984 ) -> Result<Result<HalfMatch, usize>, RetryFailError> { 985 #[cfg(feature = "dfa-build")] 986 { 987 let dfa = self.0.forward(); 988 crate::meta::stopat::dfa_try_search_half_fwd(dfa, input) 989 } 990 #[cfg(not(feature = "dfa-build"))] 991 { 992 // Impossible to reach because this engine is never constructed 993 // if the requisite features aren't enabled. 994 unreachable!() 995 } 996 } 997 998 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_rev( &self, input: &Input<'_>, ) -> Result<Option<HalfMatch>, RetryFailError>999 pub(crate) fn try_search_half_rev( 1000 &self, 1001 input: &Input<'_>, 1002 ) -> Result<Option<HalfMatch>, RetryFailError> { 1003 #[cfg(feature = "dfa-build")] 1004 { 1005 use crate::dfa::Automaton; 1006 self.0.reverse().try_search_rev(&input).map_err(|e| e.into()) 1007 } 1008 #[cfg(not(feature = "dfa-build"))] 1009 { 1010 // Impossible to reach because this engine is never constructed 1011 // if the requisite features aren't enabled. 1012 unreachable!() 1013 } 1014 } 1015 1016 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_rev_limited( &self, input: &Input<'_>, min_start: usize, ) -> Result<Option<HalfMatch>, RetryError>1017 pub(crate) fn try_search_half_rev_limited( 1018 &self, 1019 input: &Input<'_>, 1020 min_start: usize, 1021 ) -> Result<Option<HalfMatch>, RetryError> { 1022 #[cfg(feature = "dfa-build")] 1023 { 1024 let dfa = self.0.reverse(); 1025 crate::meta::limited::dfa_try_search_half_rev( 1026 dfa, input, min_start, 1027 ) 1028 } 1029 #[cfg(not(feature = "dfa-build"))] 1030 { 1031 // Impossible to reach because this engine is never constructed 1032 // if the requisite features aren't enabled. 1033 unreachable!() 1034 } 1035 } 1036 1037 #[inline] try_which_overlapping_matches( &self, input: &Input<'_>, patset: &mut PatternSet, ) -> Result<(), RetryFailError>1038 pub(crate) fn try_which_overlapping_matches( 1039 &self, 1040 input: &Input<'_>, 1041 patset: &mut PatternSet, 1042 ) -> Result<(), RetryFailError> { 1043 #[cfg(feature = "dfa-build")] 1044 { 1045 use crate::dfa::Automaton; 1046 self.0 1047 .forward() 1048 .try_which_overlapping_matches(input, patset) 1049 .map_err(|e| e.into()) 1050 } 1051 #[cfg(not(feature = "dfa-build"))] 1052 { 1053 // Impossible to reach because this engine is never constructed 1054 // if the requisite features aren't enabled. 1055 unreachable!() 1056 } 1057 } 1058 memory_usage(&self) -> usize1059 pub(crate) fn memory_usage(&self) -> usize { 1060 #[cfg(feature = "dfa-build")] 1061 { 1062 self.0.forward().memory_usage() + self.0.reverse().memory_usage() 1063 } 1064 #[cfg(not(feature = "dfa-build"))] 1065 { 1066 // Impossible to reach because this engine is never constructed 1067 // if the requisite features aren't enabled. 1068 unreachable!() 1069 } 1070 } 1071 } 1072 1073 #[derive(Debug)] 1074 pub(crate) struct ReverseHybrid(Option<ReverseHybridEngine>); 1075 1076 impl ReverseHybrid { none() -> ReverseHybrid1077 pub(crate) fn none() -> ReverseHybrid { 1078 ReverseHybrid(None) 1079 } 1080 new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid1081 pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid { 1082 ReverseHybrid(ReverseHybridEngine::new(info, nfarev)) 1083 } 1084 create_cache(&self) -> ReverseHybridCache1085 pub(crate) fn create_cache(&self) -> ReverseHybridCache { 1086 ReverseHybridCache::new(self) 1087 } 1088 1089 #[cfg_attr(feature = "perf-inline", inline(always))] get( &self, _input: &Input<'_>, ) -> Option<&ReverseHybridEngine>1090 pub(crate) fn get( 1091 &self, 1092 _input: &Input<'_>, 1093 ) -> Option<&ReverseHybridEngine> { 1094 let engine = self.0.as_ref()?; 1095 Some(engine) 1096 } 1097 } 1098 1099 #[derive(Debug)] 1100 pub(crate) struct ReverseHybridEngine( 1101 #[cfg(feature = "hybrid")] hybrid::dfa::DFA, 1102 #[cfg(not(feature = "hybrid"))] (), 1103 ); 1104 1105 impl ReverseHybridEngine { new( info: &RegexInfo, nfarev: &NFA, ) -> Option<ReverseHybridEngine>1106 pub(crate) fn new( 1107 info: &RegexInfo, 1108 nfarev: &NFA, 1109 ) -> Option<ReverseHybridEngine> { 1110 #[cfg(feature = "hybrid")] 1111 { 1112 if !info.config().get_hybrid() { 1113 return None; 1114 } 1115 // Since we only use this for reverse searches, we can hard-code 1116 // a number of things like match semantics, prefilters, starts 1117 // for each pattern and so on. 1118 let dfa_config = hybrid::dfa::Config::new() 1119 .match_kind(MatchKind::All) 1120 .prefilter(None) 1121 .starts_for_each_pattern(false) 1122 .byte_classes(info.config().get_byte_classes()) 1123 .unicode_word_boundary(true) 1124 .specialize_start_states(false) 1125 .cache_capacity(info.config().get_hybrid_cache_capacity()) 1126 .skip_cache_capacity_check(false) 1127 .minimum_cache_clear_count(Some(3)) 1128 .minimum_bytes_per_state(Some(10)); 1129 let result = hybrid::dfa::Builder::new() 1130 .configure(dfa_config) 1131 .build_from_nfa(nfarev.clone()); 1132 let rev = match result { 1133 Ok(rev) => rev, 1134 Err(_err) => { 1135 debug!("lazy reverse DFA failed to build: {}", _err); 1136 return None; 1137 } 1138 }; 1139 debug!("lazy reverse DFA built"); 1140 Some(ReverseHybridEngine(rev)) 1141 } 1142 #[cfg(not(feature = "hybrid"))] 1143 { 1144 None 1145 } 1146 } 1147 1148 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_rev_limited( &self, cache: &mut ReverseHybridCache, input: &Input<'_>, min_start: usize, ) -> Result<Option<HalfMatch>, RetryError>1149 pub(crate) fn try_search_half_rev_limited( 1150 &self, 1151 cache: &mut ReverseHybridCache, 1152 input: &Input<'_>, 1153 min_start: usize, 1154 ) -> Result<Option<HalfMatch>, RetryError> { 1155 #[cfg(feature = "hybrid")] 1156 { 1157 let dfa = &self.0; 1158 let mut cache = cache.0.as_mut().unwrap(); 1159 crate::meta::limited::hybrid_try_search_half_rev( 1160 dfa, &mut cache, input, min_start, 1161 ) 1162 } 1163 #[cfg(not(feature = "hybrid"))] 1164 { 1165 // Impossible to reach because this engine is never constructed 1166 // if the requisite features aren't enabled. 1167 unreachable!() 1168 } 1169 } 1170 } 1171 1172 #[derive(Clone, Debug)] 1173 pub(crate) struct ReverseHybridCache( 1174 #[cfg(feature = "hybrid")] Option<hybrid::dfa::Cache>, 1175 #[cfg(not(feature = "hybrid"))] (), 1176 ); 1177 1178 impl ReverseHybridCache { none() -> ReverseHybridCache1179 pub(crate) fn none() -> ReverseHybridCache { 1180 #[cfg(feature = "hybrid")] 1181 { 1182 ReverseHybridCache(None) 1183 } 1184 #[cfg(not(feature = "hybrid"))] 1185 { 1186 ReverseHybridCache(()) 1187 } 1188 } 1189 new(builder: &ReverseHybrid) -> ReverseHybridCache1190 pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache { 1191 #[cfg(feature = "hybrid")] 1192 { 1193 ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) 1194 } 1195 #[cfg(not(feature = "hybrid"))] 1196 { 1197 ReverseHybridCache(()) 1198 } 1199 } 1200 reset(&mut self, builder: &ReverseHybrid)1201 pub(crate) fn reset(&mut self, builder: &ReverseHybrid) { 1202 #[cfg(feature = "hybrid")] 1203 if let Some(ref e) = builder.0 { 1204 self.0.as_mut().unwrap().reset(&e.0); 1205 } 1206 } 1207 memory_usage(&self) -> usize1208 pub(crate) fn memory_usage(&self) -> usize { 1209 #[cfg(feature = "hybrid")] 1210 { 1211 self.0.as_ref().map_or(0, |c| c.memory_usage()) 1212 } 1213 #[cfg(not(feature = "hybrid"))] 1214 { 1215 0 1216 } 1217 } 1218 } 1219 1220 #[derive(Debug)] 1221 pub(crate) struct ReverseDFA(Option<ReverseDFAEngine>); 1222 1223 impl ReverseDFA { none() -> ReverseDFA1224 pub(crate) fn none() -> ReverseDFA { 1225 ReverseDFA(None) 1226 } 1227 new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA1228 pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA { 1229 ReverseDFA(ReverseDFAEngine::new(info, nfarev)) 1230 } 1231 1232 #[cfg_attr(feature = "perf-inline", inline(always))] get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine>1233 pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> { 1234 let engine = self.0.as_ref()?; 1235 Some(engine) 1236 } 1237 is_some(&self) -> bool1238 pub(crate) fn is_some(&self) -> bool { 1239 self.0.is_some() 1240 } 1241 memory_usage(&self) -> usize1242 pub(crate) fn memory_usage(&self) -> usize { 1243 self.0.as_ref().map_or(0, |e| e.memory_usage()) 1244 } 1245 } 1246 1247 #[derive(Debug)] 1248 pub(crate) struct ReverseDFAEngine( 1249 #[cfg(feature = "dfa-build")] dfa::dense::DFA<Vec<u32>>, 1250 #[cfg(not(feature = "dfa-build"))] (), 1251 ); 1252 1253 impl ReverseDFAEngine { new( info: &RegexInfo, nfarev: &NFA, ) -> Option<ReverseDFAEngine>1254 pub(crate) fn new( 1255 info: &RegexInfo, 1256 nfarev: &NFA, 1257 ) -> Option<ReverseDFAEngine> { 1258 #[cfg(feature = "dfa-build")] 1259 { 1260 if !info.config().get_dfa() { 1261 return None; 1262 } 1263 // If our NFA is anything but small, don't even bother with a DFA. 1264 if let Some(state_limit) = info.config().get_dfa_state_limit() { 1265 if nfarev.states().len() > state_limit { 1266 debug!( 1267 "skipping full reverse DFA because NFA has {} states, \ 1268 which exceeds the heuristic limit of {}", 1269 nfarev.states().len(), 1270 state_limit, 1271 ); 1272 return None; 1273 } 1274 } 1275 // We cut the size limit in two because the total heap used by DFA 1276 // construction is determinization aux memory and the DFA itself, 1277 // and those things are configured independently in the lower level 1278 // DFA builder API. 1279 let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2); 1280 // Since we only use this for reverse searches, we can hard-code 1281 // a number of things like match semantics, prefilters, starts 1282 // for each pattern and so on. We also disable acceleration since 1283 // it's incompatible with limited searches (which is the only 1284 // operation we support for this kind of engine at the moment). 1285 let dfa_config = dfa::dense::Config::new() 1286 .match_kind(MatchKind::All) 1287 .prefilter(None) 1288 .accelerate(false) 1289 .start_kind(dfa::StartKind::Anchored) 1290 .starts_for_each_pattern(false) 1291 .byte_classes(info.config().get_byte_classes()) 1292 .unicode_word_boundary(true) 1293 .specialize_start_states(false) 1294 .determinize_size_limit(size_limit) 1295 .dfa_size_limit(size_limit); 1296 let result = dfa::dense::Builder::new() 1297 .configure(dfa_config) 1298 .build_from_nfa(&nfarev); 1299 let rev = match result { 1300 Ok(rev) => rev, 1301 Err(_err) => { 1302 debug!("full reverse DFA failed to build: {}", _err); 1303 return None; 1304 } 1305 }; 1306 debug!( 1307 "fully compiled reverse DFA built, {} bytes", 1308 rev.memory_usage() 1309 ); 1310 Some(ReverseDFAEngine(rev)) 1311 } 1312 #[cfg(not(feature = "dfa-build"))] 1313 { 1314 None 1315 } 1316 } 1317 1318 #[cfg_attr(feature = "perf-inline", inline(always))] try_search_half_rev_limited( &self, input: &Input<'_>, min_start: usize, ) -> Result<Option<HalfMatch>, RetryError>1319 pub(crate) fn try_search_half_rev_limited( 1320 &self, 1321 input: &Input<'_>, 1322 min_start: usize, 1323 ) -> Result<Option<HalfMatch>, RetryError> { 1324 #[cfg(feature = "dfa-build")] 1325 { 1326 let dfa = &self.0; 1327 crate::meta::limited::dfa_try_search_half_rev( 1328 dfa, input, min_start, 1329 ) 1330 } 1331 #[cfg(not(feature = "dfa-build"))] 1332 { 1333 // Impossible to reach because this engine is never constructed 1334 // if the requisite features aren't enabled. 1335 unreachable!() 1336 } 1337 } 1338 memory_usage(&self) -> usize1339 pub(crate) fn memory_usage(&self) -> usize { 1340 #[cfg(feature = "dfa-build")] 1341 { 1342 self.0.memory_usage() 1343 } 1344 #[cfg(not(feature = "dfa-build"))] 1345 { 1346 // Impossible to reach because this engine is never constructed 1347 // if the requisite features aren't enabled. 1348 unreachable!() 1349 } 1350 } 1351 } 1352