1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 //! Different optimizations for pest's ASTs.
11 
12 use crate::ast::*;
13 use std::collections::HashMap;
14 
15 #[cfg(test)]
16 macro_rules! box_tree {
17     ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18       $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19     );
20     ($expr:expr) => ($expr);
21 }
22 
23 mod concatenator;
24 mod factorizer;
25 mod lister;
26 mod restorer;
27 mod rotater;
28 mod skipper;
29 mod unroller;
30 
31 /// Takes pest's ASTs and optimizes them
optimize(rules: Vec<Rule>) -> Vec<OptimizedRule>32 pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33     let optimized: Vec<OptimizedRule> = rules
34         .into_iter()
35         .map(rotater::rotate)
36         .map(skipper::skip)
37         .map(unroller::unroll)
38         .map(concatenator::concatenate)
39         .map(factorizer::factor)
40         .map(lister::list)
41         .map(rule_to_optimized_rule)
42         .collect();
43 
44     let rules = to_hash_map(&optimized);
45     optimized
46         .into_iter()
47         .map(|rule| restorer::restore_on_err(rule, &rules))
48         .collect()
49 }
50 
rule_to_optimized_rule(rule: Rule) -> OptimizedRule51 fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
52     fn to_optimized(expr: Expr) -> OptimizedExpr {
53         match expr {
54             Expr::Str(string) => OptimizedExpr::Str(string),
55             Expr::Insens(string) => OptimizedExpr::Insens(string),
56             Expr::Range(start, end) => OptimizedExpr::Range(start, end),
57             Expr::Ident(ident) => OptimizedExpr::Ident(ident),
58             Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
59             Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
60             Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
61             Expr::Seq(lhs, rhs) => {
62                 OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
63             }
64             Expr::Choice(lhs, rhs) => {
65                 OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
66             }
67             Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
68             Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
69             Expr::Skip(strings) => OptimizedExpr::Skip(strings),
70             Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
71             #[cfg(feature = "grammar-extras")]
72             Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag),
73             #[cfg(feature = "grammar-extras")]
74             Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))),
75             #[cfg(not(feature = "grammar-extras"))]
76             Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule"),
77             Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => {
78                 unreachable!("No valid transformation to OptimizedRule")
79             }
80         }
81     }
82 
83     OptimizedRule {
84         name: rule.name,
85         ty: rule.ty,
86         expr: to_optimized(rule.expr),
87     }
88 }
89 
to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr>90 fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> {
91     rules
92         .iter()
93         .map(|r| (r.name.clone(), r.expr.clone()))
94         .collect()
95 }
96 
97 /// The optimized version of the pest AST's `Rule`.
98 #[derive(Clone, Debug, Eq, PartialEq)]
99 pub struct OptimizedRule {
100     /// The name of the rule.
101     pub name: String,
102     /// The type of the rule.
103     pub ty: RuleType,
104     /// The optimized expression of the rule.
105     pub expr: OptimizedExpr,
106 }
107 
108 /// The optimized version of the pest AST's `Expr`.
109 ///
110 /// # Warning: Semantic Versioning
111 /// There may be non-breaking changes to the meta-grammar
112 /// between minor versions. Those non-breaking changes, however,
113 /// may translate into semver-breaking changes due to the additional variants
114 /// propaged from the `Rule` enum. This is a known issue and will be fixed in the
115 /// future (e.g. by increasing MSRV and non_exhaustive annotations).
116 #[derive(Clone, Debug, Eq, PartialEq)]
117 pub enum OptimizedExpr {
118     /// Matches an exact string, e.g. `"a"`
119     Str(String),
120     /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
121     Insens(String),
122     /// Matches one character in the range, e.g. `'a'..'z'`
123     Range(String, String),
124     /// Matches the rule with the given name, e.g. `a`
125     Ident(String),
126     /// Matches a custom part of the stack, e.g. `PEEK[..]`
127     PeekSlice(i32, Option<i32>),
128     /// Positive lookahead; matches expression without making progress, e.g. `&e`
129     PosPred(Box<OptimizedExpr>),
130     /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
131     NegPred(Box<OptimizedExpr>),
132     /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
133     Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
134     /// Matches either of two expressions, e.g. `e1 | e2`
135     Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
136     /// Optionally matches an expression, e.g. `e?`
137     Opt(Box<OptimizedExpr>),
138     /// Matches an expression zero or more times, e.g. `e*`
139     Rep(Box<OptimizedExpr>),
140     /// Matches an expression one or more times, e.g. `e+`
141     #[cfg(feature = "grammar-extras")]
142     RepOnce(Box<OptimizedExpr>),
143     /// Continues to match expressions until one of the strings in the `Vec` is found
144     Skip(Vec<String>),
145     /// Matches an expression and pushes it to the stack, e.g. `push(e)`
146     Push(Box<OptimizedExpr>),
147     /// Matches an expression and assigns a label to it, e.g. #label = exp
148     #[cfg(feature = "grammar-extras")]
149     NodeTag(Box<OptimizedExpr>, String),
150     /// Restores an expression's checkpoint
151     RestoreOnErr(Box<OptimizedExpr>),
152 }
153 
154 impl OptimizedExpr {
155     /// Returns a top-down iterator over the `OptimizedExpr`.
iter_top_down(&self) -> OptimizedExprTopDownIterator156     pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
157         OptimizedExprTopDownIterator::new(self)
158     }
159 
160     /// Applies `f` to the `OptimizedExpr` top-down.
map_top_down<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,161     pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
162     where
163         F: FnMut(OptimizedExpr) -> OptimizedExpr,
164     {
165         fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
166         where
167             F: FnMut(OptimizedExpr) -> OptimizedExpr,
168         {
169             let expr = f(expr);
170 
171             match expr {
172                 OptimizedExpr::PosPred(expr) => {
173                     let mapped = Box::new(map_internal(*expr, f));
174                     OptimizedExpr::PosPred(mapped)
175                 }
176                 OptimizedExpr::NegPred(expr) => {
177                     let mapped = Box::new(map_internal(*expr, f));
178                     OptimizedExpr::NegPred(mapped)
179                 }
180                 OptimizedExpr::Seq(lhs, rhs) => {
181                     let mapped_lhs = Box::new(map_internal(*lhs, f));
182                     let mapped_rhs = Box::new(map_internal(*rhs, f));
183                     OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
184                 }
185                 OptimizedExpr::Choice(lhs, rhs) => {
186                     let mapped_lhs = Box::new(map_internal(*lhs, f));
187                     let mapped_rhs = Box::new(map_internal(*rhs, f));
188                     OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
189                 }
190                 OptimizedExpr::Rep(expr) => {
191                     let mapped = Box::new(map_internal(*expr, f));
192                     OptimizedExpr::Rep(mapped)
193                 }
194                 OptimizedExpr::Opt(expr) => {
195                     let mapped = Box::new(map_internal(*expr, f));
196                     OptimizedExpr::Opt(mapped)
197                 }
198                 OptimizedExpr::Push(expr) => {
199                     let mapped = Box::new(map_internal(*expr, f));
200                     OptimizedExpr::Push(mapped)
201                 }
202                 expr => expr,
203             }
204         }
205 
206         map_internal(self, &mut f)
207     }
208 
209     /// Applies `f` to the `OptimizedExpr` bottom-up.
map_bottom_up<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,210     pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
211     where
212         F: FnMut(OptimizedExpr) -> OptimizedExpr,
213     {
214         fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
215         where
216             F: FnMut(OptimizedExpr) -> OptimizedExpr,
217         {
218             let mapped = match expr {
219                 OptimizedExpr::PosPred(expr) => {
220                     let mapped = Box::new(map_internal(*expr, f));
221                     OptimizedExpr::PosPred(mapped)
222                 }
223                 OptimizedExpr::NegPred(expr) => {
224                     let mapped = Box::new(map_internal(*expr, f));
225                     OptimizedExpr::NegPred(mapped)
226                 }
227                 OptimizedExpr::Seq(lhs, rhs) => {
228                     let mapped_lhs = Box::new(map_internal(*lhs, f));
229                     let mapped_rhs = Box::new(map_internal(*rhs, f));
230                     OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
231                 }
232                 OptimizedExpr::Choice(lhs, rhs) => {
233                     let mapped_lhs = Box::new(map_internal(*lhs, f));
234                     let mapped_rhs = Box::new(map_internal(*rhs, f));
235                     OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
236                 }
237                 OptimizedExpr::Rep(expr) => {
238                     let mapped = Box::new(map_internal(*expr, f));
239                     OptimizedExpr::Rep(mapped)
240                 }
241                 OptimizedExpr::Opt(expr) => {
242                     let mapped = Box::new(map_internal(*expr, f));
243                     OptimizedExpr::Opt(mapped)
244                 }
245                 OptimizedExpr::Push(expr) => {
246                     let mapped = Box::new(map_internal(*expr, f));
247                     OptimizedExpr::Push(mapped)
248                 }
249                 expr => expr,
250             };
251 
252             f(mapped)
253         }
254 
255         map_internal(self, &mut f)
256     }
257 }
258 
259 impl core::fmt::Display for OptimizedExpr {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result260     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
261         match self {
262             OptimizedExpr::Str(s) => write!(f, "{:?}", s),
263             OptimizedExpr::Insens(s) => write!(f, "^{:?}", s),
264             OptimizedExpr::Range(start, end) => {
265                 let start = start.chars().next().expect("Empty range start.");
266                 let end = end.chars().next().expect("Empty range end.");
267                 write!(f, "({:?}..{:?})", start, end)
268             }
269             OptimizedExpr::Ident(id) => write!(f, "{}", id),
270             OptimizedExpr::PeekSlice(start, end) => match end {
271                 Some(end) => write!(f, "PEEK[{}..{}]", start, end),
272                 None => write!(f, "PEEK[{}..]", start),
273             },
274             OptimizedExpr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
275             OptimizedExpr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
276             OptimizedExpr::Seq(lhs, rhs) => {
277                 let mut nodes = Vec::new();
278                 nodes.push(lhs);
279                 let mut current = rhs;
280                 while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() {
281                     nodes.push(lhs);
282                     current = rhs;
283                 }
284                 nodes.push(current);
285                 let sequence = nodes
286                     .iter()
287                     .map(|node| format!("{}", node))
288                     .collect::<Vec<_>>()
289                     .join(" ~ ");
290                 write!(f, "({})", sequence)
291             }
292             OptimizedExpr::Choice(lhs, rhs) => {
293                 let mut nodes = Vec::new();
294                 nodes.push(lhs);
295                 let mut current = rhs;
296                 while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() {
297                     nodes.push(lhs);
298                     current = rhs;
299                 }
300                 nodes.push(current);
301                 let sequence = nodes
302                     .iter()
303                     .map(|node| format!("{}", node))
304                     .collect::<Vec<_>>()
305                     .join(" | ");
306                 write!(f, "({})", sequence)
307             }
308             OptimizedExpr::Opt(expr) => write!(f, "{}?", expr),
309             OptimizedExpr::Rep(expr) => write!(f, "{}*", expr),
310             #[cfg(feature = "grammar-extras")]
311             OptimizedExpr::RepOnce(expr) => write!(f, "{}+", expr),
312             OptimizedExpr::Skip(strings) => {
313                 let strings = strings
314                     .iter()
315                     .map(|s| format!("{:?}", s))
316                     .collect::<Vec<_>>()
317                     .join(" | ");
318                 write!(f, "(!({}) ~ ANY)*", strings)
319             }
320             OptimizedExpr::Push(expr) => write!(f, "PUSH({})", expr),
321             #[cfg(feature = "grammar-extras")]
322             OptimizedExpr::NodeTag(expr, tag) => {
323                 write!(f, "(#{} = {})", tag, expr)
324             }
325             OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f),
326         }
327     }
328 }
329 
330 /// A top-down iterator over an `OptimizedExpr`.
331 pub struct OptimizedExprTopDownIterator {
332     current: Option<OptimizedExpr>,
333     next: Option<OptimizedExpr>,
334     right_branches: Vec<OptimizedExpr>,
335 }
336 
337 impl OptimizedExprTopDownIterator {
338     /// Creates a new top down iterator from an `OptimizedExpr`.
new(expr: &OptimizedExpr) -> Self339     pub fn new(expr: &OptimizedExpr) -> Self {
340         let mut iter = OptimizedExprTopDownIterator {
341             current: None,
342             next: None,
343             right_branches: vec![],
344         };
345         iter.iterate_expr(expr.clone());
346         iter
347     }
348 
iterate_expr(&mut self, expr: OptimizedExpr)349     fn iterate_expr(&mut self, expr: OptimizedExpr) {
350         self.current = Some(expr.clone());
351         match expr {
352             OptimizedExpr::Seq(lhs, rhs) => {
353                 self.right_branches.push(*rhs);
354                 self.next = Some(*lhs);
355             }
356             OptimizedExpr::Choice(lhs, rhs) => {
357                 self.right_branches.push(*rhs);
358                 self.next = Some(*lhs);
359             }
360             OptimizedExpr::PosPred(expr)
361             | OptimizedExpr::NegPred(expr)
362             | OptimizedExpr::Rep(expr)
363             | OptimizedExpr::Opt(expr)
364             | OptimizedExpr::Push(expr) => {
365                 self.next = Some(*expr);
366             }
367             _ => {
368                 self.next = None;
369             }
370         }
371     }
372 }
373 
374 impl Iterator for OptimizedExprTopDownIterator {
375     type Item = OptimizedExpr;
376 
next(&mut self) -> Option<Self::Item>377     fn next(&mut self) -> Option<Self::Item> {
378         let result = self.current.take();
379 
380         if let Some(expr) = self.next.take() {
381             self.iterate_expr(expr);
382         } else if let Some(expr) = self.right_branches.pop() {
383             self.iterate_expr(expr);
384         }
385 
386         result
387     }
388 }
389 
390 #[cfg(test)]
391 mod tests {
392     use super::*;
393 
394     #[test]
rotate()395     fn rotate() {
396         let rules = {
397             use crate::ast::Expr::*;
398             vec![Rule {
399                 name: "rule".to_owned(),
400                 ty: RuleType::Normal,
401                 expr: box_tree!(Choice(
402                     Choice(
403                         Choice(Str(String::from("a")), Str(String::from("b"))),
404                         Str(String::from("c"))
405                     ),
406                     Str(String::from("d"))
407                 )),
408             }]
409         };
410         let rotated = {
411             use crate::optimizer::OptimizedExpr::*;
412             vec![OptimizedRule {
413                 name: "rule".to_owned(),
414                 ty: RuleType::Normal,
415                 expr: box_tree!(Choice(
416                     Str(String::from("a")),
417                     Choice(
418                         Str(String::from("b")),
419                         Choice(Str(String::from("c")), Str(String::from("d")))
420                     )
421                 )),
422             }]
423         };
424 
425         assert_eq!(optimize(rules), rotated);
426     }
427 
428     #[test]
skip()429     fn skip() {
430         let rules = {
431             use crate::ast::Expr::*;
432             vec![Rule {
433                 name: "rule".to_owned(),
434                 ty: RuleType::Atomic,
435                 expr: box_tree!(Rep(Seq(
436                     NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
437                     Ident(String::from("ANY"))
438                 ))),
439             }]
440         };
441         let skipped = vec![OptimizedRule {
442             name: "rule".to_owned(),
443             ty: RuleType::Atomic,
444             expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
445         }];
446 
447         assert_eq!(optimize(rules), skipped);
448     }
449 
450     #[test]
concat_strings()451     fn concat_strings() {
452         let rules = {
453             use crate::ast::Expr::*;
454             vec![Rule {
455                 name: "rule".to_owned(),
456                 ty: RuleType::Atomic,
457                 expr: box_tree!(Seq(
458                     Seq(Str(String::from("a")), Str(String::from("b"))),
459                     Seq(Str(String::from("c")), Str(String::from("d")))
460                 )),
461             }]
462         };
463         let concatenated = vec![OptimizedRule {
464             name: "rule".to_owned(),
465             ty: RuleType::Atomic,
466             expr: OptimizedExpr::Str(String::from("abcd")),
467         }];
468 
469         assert_eq!(optimize(rules), concatenated);
470     }
471 
472     #[test]
unroll_loop_exact()473     fn unroll_loop_exact() {
474         let rules = vec![Rule {
475             name: "rule".to_owned(),
476             ty: RuleType::Atomic,
477             expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
478         }];
479         let unrolled = {
480             use crate::optimizer::OptimizedExpr::*;
481             vec![OptimizedRule {
482                 name: "rule".to_owned(),
483                 ty: RuleType::Atomic,
484                 expr: box_tree!(Seq(
485                     Ident(String::from("a")),
486                     Seq(Ident(String::from("a")), Ident(String::from("a")))
487                 )),
488             }]
489         };
490 
491         assert_eq!(optimize(rules), unrolled);
492     }
493 
494     #[test]
unroll_loop_max()495     fn unroll_loop_max() {
496         let rules = vec![Rule {
497             name: "rule".to_owned(),
498             ty: RuleType::Atomic,
499             expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
500         }];
501         let unrolled = {
502             use crate::optimizer::OptimizedExpr::*;
503             vec![OptimizedRule {
504                 name: "rule".to_owned(),
505                 ty: RuleType::Atomic,
506                 expr: box_tree!(Seq(
507                     Opt(Str(String::from("a"))),
508                     Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
509                 )),
510             }]
511         };
512 
513         assert_eq!(optimize(rules), unrolled);
514     }
515 
516     #[test]
unroll_loop_min()517     fn unroll_loop_min() {
518         let rules = vec![Rule {
519             name: "rule".to_owned(),
520             ty: RuleType::Atomic,
521             expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
522         }];
523         let unrolled = {
524             use crate::optimizer::OptimizedExpr::*;
525             vec![OptimizedRule {
526                 name: "rule".to_owned(),
527                 ty: RuleType::Atomic,
528                 expr: box_tree!(Seq(
529                     Str(String::from("a")),
530                     Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
531                 )),
532             }]
533         };
534 
535         assert_eq!(optimize(rules), unrolled);
536     }
537 
538     #[test]
unroll_loop_min_max()539     fn unroll_loop_min_max() {
540         let rules = vec![Rule {
541             name: "rule".to_owned(),
542             ty: RuleType::Atomic,
543             expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
544         }];
545         let unrolled = {
546             use crate::optimizer::OptimizedExpr::*;
547             vec![OptimizedRule {
548                 name: "rule".to_owned(),
549                 ty: RuleType::Atomic,
550                 /* TODO possible room for improvement here:
551                  * if the sequences were rolled out in the opposite
552                  * order, we could further optimize the strings
553                  * in cases like this.
554                 Str(String::from(("aa")),
555                 Opt(Str(String::from("a")))
556                 */
557                 expr: box_tree!(Seq(
558                     Str(String::from("a")),
559                     Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
560                 )),
561             }]
562         };
563 
564         assert_eq!(optimize(rules), unrolled);
565     }
566 
567     #[test]
concat_insensitive_strings()568     fn concat_insensitive_strings() {
569         let rules = {
570             use crate::ast::Expr::*;
571             vec![Rule {
572                 name: "rule".to_owned(),
573                 ty: RuleType::Atomic,
574                 expr: box_tree!(Seq(
575                     Seq(Insens(String::from("a")), Insens(String::from("b"))),
576                     Seq(Insens(String::from("c")), Insens(String::from("d")))
577                 )),
578             }]
579         };
580         let concatenated = vec![OptimizedRule {
581             name: "rule".to_owned(),
582             ty: RuleType::Atomic,
583             expr: OptimizedExpr::Insens(String::from("abcd")),
584         }];
585 
586         assert_eq!(optimize(rules), concatenated);
587     }
588 
589     #[test]
long_common_sequence()590     fn long_common_sequence() {
591         let rules = {
592             use crate::ast::Expr::*;
593             vec![Rule {
594                 name: "rule".to_owned(),
595                 ty: RuleType::Silent,
596                 expr: box_tree!(Choice(
597                     Seq(
598                         Ident(String::from("a")),
599                         Seq(Ident(String::from("b")), Ident(String::from("c")))
600                     ),
601                     Seq(
602                         Seq(Ident(String::from("a")), Ident(String::from("b"))),
603                         Ident(String::from("d"))
604                     )
605                 )),
606             }]
607         };
608         let optimized = {
609             use crate::optimizer::OptimizedExpr::*;
610             vec![OptimizedRule {
611                 name: "rule".to_owned(),
612                 ty: RuleType::Silent,
613                 expr: box_tree!(Seq(
614                     Ident(String::from("a")),
615                     Seq(
616                         Ident(String::from("b")),
617                         Choice(Ident(String::from("c")), Ident(String::from("d")))
618                     )
619                 )),
620             }]
621         };
622 
623         assert_eq!(optimize(rules), optimized);
624     }
625 
626     #[test]
short_common_sequence()627     fn short_common_sequence() {
628         let rules = {
629             use crate::ast::Expr::*;
630             vec![Rule {
631                 name: "rule".to_owned(),
632                 ty: RuleType::Atomic,
633                 expr: box_tree!(Choice(
634                     Seq(Ident(String::from("a")), Ident(String::from("b"))),
635                     Ident(String::from("a"))
636                 )),
637             }]
638         };
639         let optimized = {
640             use crate::optimizer::OptimizedExpr::*;
641             vec![OptimizedRule {
642                 name: "rule".to_owned(),
643                 ty: RuleType::Atomic,
644                 expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
645             }]
646         };
647 
648         assert_eq!(optimize(rules), optimized);
649     }
650 
651     #[test]
impossible_common_sequence()652     fn impossible_common_sequence() {
653         let rules = {
654             use crate::ast::Expr::*;
655             vec![Rule {
656                 name: "rule".to_owned(),
657                 ty: RuleType::Silent,
658                 expr: box_tree!(Choice(
659                     Ident(String::from("a")),
660                     Seq(Ident(String::from("a")), Ident(String::from("b")))
661                 )),
662             }]
663         };
664         let optimized = {
665             use crate::optimizer::OptimizedExpr::*;
666             vec![OptimizedRule {
667                 name: "rule".to_owned(),
668                 ty: RuleType::Silent,
669                 expr: box_tree!(Ident(String::from("a"))),
670             }]
671         };
672 
673         assert_eq!(optimize(rules), optimized);
674     }
675 
676     #[test]
lister()677     fn lister() {
678         let rules = {
679             use crate::ast::Expr::*;
680             vec![Rule {
681                 name: "rule".to_owned(),
682                 ty: RuleType::Silent,
683                 expr: box_tree!(Seq(
684                     Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
685                     Ident(String::from("a"))
686                 )),
687             }]
688         };
689         let optimized = {
690             use crate::optimizer::OptimizedExpr::*;
691             vec![OptimizedRule {
692                 name: "rule".to_owned(),
693                 ty: RuleType::Silent,
694                 expr: box_tree!(Seq(
695                     Ident(String::from("a")),
696                     Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
697                 )),
698             }]
699         };
700 
701         assert_eq!(optimize(rules), optimized);
702     }
703 
704     mod display {
705         use super::super::*;
706         /// In previous implementation of Display for OptimizedExpr
707         /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me),
708         /// Str("\n") will be displayed as
709         /// "
710         /// "
711         ///
712         /// It will not break the compilation in normal use.
713         ///
714         /// But when I use it in automatically generating documents,
715         /// it will quite confusing and we'll be unable to distinguish \n and \r.
716         ///
717         /// And `cargo expand` will emit codes that can't be compiled,
718         /// for it expand `#[doc("...")]` to `/// ...`,
719         /// and when the document comment breaks the line,
720         /// it will be expanded into wrong codes.
721         #[test]
control_character()722         fn control_character() {
723             assert_eq!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\\n\"");
724             assert_eq!(
725                 OptimizedExpr::Insens("\n".to_owned()).to_string(),
726                 "^\"\\n\"",
727             );
728             assert_eq!(
729                 OptimizedExpr::Range("\n".to_owned(), "\r".to_owned()).to_string(),
730                 "('\\n'..'\\r')",
731             );
732             assert_eq!(
733                 OptimizedExpr::Skip(vec![
734                     "\n".to_owned(),
735                     "\r".to_owned(),
736                     "\n\r".to_owned(),
737                     "\0".to_owned(),
738                 ])
739                 .to_string(),
740                 r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"#,
741             );
742 
743             assert_ne!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\n\"");
744         }
745 
746         #[test]
str()747         fn str() {
748             assert_eq!(OptimizedExpr::Str("a".to_owned()).to_string(), r#""a""#);
749         }
750 
751         #[test]
insens()752         fn insens() {
753             assert_eq!(OptimizedExpr::Insens("a".to_owned()).to_string(), r#"^"a""#);
754         }
755 
756         #[test]
range()757         fn range() {
758             assert_eq!(
759                 OptimizedExpr::Range("a".to_owned(), "z".to_owned()).to_string(),
760                 r#"('a'..'z')"#,
761             );
762         }
763 
764         #[test]
ident()765         fn ident() {
766             assert_eq!(OptimizedExpr::Ident("a".to_owned()).to_string(), r#"a"#);
767         }
768 
769         #[test]
peek_slice()770         fn peek_slice() {
771             assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
772             assert_eq!(
773                 OptimizedExpr::PeekSlice(0, Some(-1)).to_string(),
774                 "PEEK[0..-1]",
775             );
776             assert_eq!(
777                 OptimizedExpr::PeekSlice(2, Some(3)).to_string(),
778                 "PEEK[2..3]",
779             );
780             assert_eq!(
781                 OptimizedExpr::PeekSlice(2, Some(-1)).to_string(),
782                 "PEEK[2..-1]",
783             );
784             assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
785         }
786 
787         #[test]
pos_pred()788         fn pos_pred() {
789             assert_eq!(
790                 OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new(
791                     OptimizedExpr::Ident("a".to_owned()),
792                 ))))
793                 .to_string(),
794                 "&!a",
795             );
796             assert_eq!(
797                 OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice(
798                     Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident(
799                         "a".to_owned(),
800                     )))),
801                     Box::new(OptimizedExpr::Str("a".to_owned())),
802                 )))
803                 .to_string(),
804                 r#"&(a* | "a")"#,
805             );
806             assert_eq!(
807                 OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new(
808                     OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a".to_owned()))),
809                 ))))
810                 .to_string(),
811                 "&!a",
812             );
813         }
814 
815         #[test]
neg_pred()816         fn neg_pred() {
817             assert_eq!(
818                 OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
819                 r#"!e"#,
820             );
821             assert_eq!(
822                 OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice(
823                     Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
824                         "a".to_owned(),
825                     )))),
826                     Box::new(OptimizedExpr::Str("a".to_owned())),
827                 )))
828                 .to_string(),
829                 r#"!(PUSH(a) | "a")"#,
830             );
831         }
832 
833         #[test]
seq()834         fn seq() {
835             assert_eq!(
836                 OptimizedExpr::Seq(
837                     Box::new(OptimizedExpr::Ident("e1".to_owned())),
838                     Box::new(OptimizedExpr::Ident("e2".to_owned())),
839                 )
840                 .to_string(),
841                 r#"(e1 ~ e2)"#,
842             );
843             assert_eq!(
844                 Expr::Seq(
845                     Box::new(Expr::Ident("e1".to_owned())),
846                     Box::new(Expr::Seq(
847                         Box::new(Expr::Ident("e2".to_owned())),
848                         Box::new(Expr::Ident("e3".to_owned())),
849                     )),
850                 )
851                 .to_string(),
852                 "(e1 ~ e2 ~ e3)",
853             );
854             assert_eq!(
855                 Expr::Seq(
856                     Box::new(Expr::Ident("e1".to_owned())),
857                     Box::new(Expr::Seq(
858                         Box::new(Expr::Ident("e2".to_owned())),
859                         Box::new(Expr::Seq(
860                             Box::new(Expr::Ident("e3".to_owned())),
861                             Box::new(Expr::Ident("e4".to_owned())),
862                         )),
863                     )),
864                 )
865                 .to_string(),
866                 "(e1 ~ e2 ~ e3 ~ e4)",
867             );
868             assert_eq!(
869                 Expr::Seq(
870                     Box::new(Expr::Ident("e1".to_owned())),
871                     Box::new(Expr::Choice(
872                         Box::new(Expr::Ident("e2".to_owned())),
873                         Box::new(Expr::Seq(
874                             Box::new(Expr::Ident("e3".to_owned())),
875                             Box::new(Expr::Ident("e4".to_owned())),
876                         )),
877                     )),
878                 )
879                 .to_string(),
880                 "(e1 ~ (e2 | (e3 ~ e4)))",
881             );
882             assert_eq!(
883                 Expr::Seq(
884                     Box::new(Expr::Ident("e1".to_owned())),
885                     Box::new(Expr::Seq(
886                         Box::new(Expr::Ident("e2".to_owned())),
887                         Box::new(Expr::Choice(
888                             Box::new(Expr::Ident("e3".to_owned())),
889                             Box::new(Expr::Ident("e4".to_owned())),
890                         )),
891                     )),
892                 )
893                 .to_string(),
894                 "(e1 ~ e2 ~ (e3 | e4))",
895             );
896             assert_eq!(
897                 OptimizedExpr::Seq(
898                     Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str(
899                         "a".to_owned(),
900                     )))),
901                     Box::new(OptimizedExpr::Seq(
902                         Box::new(OptimizedExpr::Ident("b".to_owned())),
903                         Box::new(OptimizedExpr::Insens("c".to_owned())),
904                     )),
905                 )
906                 .to_string(),
907                 r#"("a"* ~ b ~ ^"c")"#,
908             );
909             assert_eq!(
910                 OptimizedExpr::Seq(
911                     Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range(
912                         "a".to_owned(),
913                         "z".to_owned(),
914                     )))),
915                     Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt(
916                         Box::new(OptimizedExpr::Range("A".to_owned(), "Z".to_owned())),
917                     )))),
918                 )
919                 .to_string(),
920                 "(&('a'..'z') ~ !('A'..'Z')?)",
921             );
922         }
923 
924         #[test]
choice()925         fn choice() {
926             assert_eq!(
927                 OptimizedExpr::Choice(
928                     Box::new(OptimizedExpr::Ident("e1".to_owned())),
929                     Box::new(OptimizedExpr::Ident("e2".to_owned())),
930                 )
931                 .to_string(),
932                 r#"(e1 | e2)"#,
933             );
934             assert_eq!(
935                 Expr::Choice(
936                     Box::new(Expr::Ident("e1".to_owned())),
937                     Box::new(Expr::Choice(
938                         Box::new(Expr::Ident("e2".to_owned())),
939                         Box::new(Expr::Ident("e3".to_owned())),
940                     )),
941                 )
942                 .to_string(),
943                 "(e1 | e2 | e3)",
944             );
945             assert_eq!(
946                 Expr::Choice(
947                     Box::new(Expr::Ident("e1".to_owned())),
948                     Box::new(Expr::Choice(
949                         Box::new(Expr::Ident("e2".to_owned())),
950                         Box::new(Expr::Choice(
951                             Box::new(Expr::Ident("e3".to_owned())),
952                             Box::new(Expr::Ident("e4".to_owned())),
953                         )),
954                     )),
955                 )
956                 .to_string(),
957                 "(e1 | e2 | e3 | e4)",
958             );
959             assert_eq!(
960                 Expr::Choice(
961                     Box::new(Expr::Ident("e1".to_owned())),
962                     Box::new(Expr::Seq(
963                         Box::new(Expr::Ident("e2".to_owned())),
964                         Box::new(Expr::Choice(
965                             Box::new(Expr::Ident("e3".to_owned())),
966                             Box::new(Expr::Ident("e4".to_owned())),
967                         )),
968                     )),
969                 )
970                 .to_string(),
971                 "(e1 | (e2 ~ (e3 | e4)))",
972             );
973             assert_eq!(
974                 OptimizedExpr::Choice(
975                     Box::new(OptimizedExpr::Str("a".to_owned())),
976                     Box::new(OptimizedExpr::Choice(
977                         Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
978                             "b".to_owned(),
979                         )))),
980                         Box::new(OptimizedExpr::Insens("c".to_owned())),
981                     )),
982                 )
983                 .to_string(),
984                 r#"("a" | PUSH(b) | ^"c")"#,
985             );
986         }
987 
988         #[test]
opt()989         fn opt() {
990             assert_eq!(
991                 OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
992                 "e?",
993             );
994         }
995 
996         #[test]
rep()997         fn rep() {
998             assert_eq!(
999                 OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x".to_owned()))).to_string(),
1000                 "x*",
1001             );
1002             assert_eq!(
1003                 OptimizedExpr::Rep(Box::new(OptimizedExpr::Range(
1004                     "0".to_owned(),
1005                     "9".to_owned(),
1006                 )))
1007                 .to_string(),
1008                 "('0'..'9')*",
1009             );
1010         }
1011 
1012         #[test]
1013         #[cfg(feature = "grammar-extras")]
rep_once()1014         fn rep_once() {
1015             assert_eq!(
1016                 OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1017                 "e+",
1018             );
1019             assert_eq!(
1020                 OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range(
1021                     "0".to_owned(),
1022                     "9".to_owned(),
1023                 )))
1024                 .to_string(),
1025                 "('0'..'9')+",
1026             );
1027         }
1028 
1029         #[test]
skip()1030         fn skip() {
1031             assert_eq!(
1032                 OptimizedExpr::Skip(
1033                     ["a", "bc"]
1034                         .into_iter()
1035                         .map(|s| s.to_owned())
1036                         .collect::<Vec<_>>(),
1037                 )
1038                 .to_string(),
1039                 r#"(!("a" | "bc") ~ ANY)*"#,
1040             );
1041         }
1042 
1043         #[test]
push()1044         fn push() {
1045             assert_eq!(
1046                 OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1047                 "PUSH(e)",
1048             );
1049         }
1050 
1051         #[test]
1052         #[cfg(feature = "grammar-extras")]
node_tag()1053         fn node_tag() {
1054             assert_eq!(
1055                 OptimizedExpr::NodeTag(
1056                     Box::new(OptimizedExpr::Ident("expr".to_owned())),
1057                     "label".to_owned(),
1058                 )
1059                 .to_string(),
1060                 r#"(#label = expr)"#,
1061             );
1062             assert_eq!(
1063                 OptimizedExpr::NodeTag(
1064                     Box::new(OptimizedExpr::Ident("x".to_owned())),
1065                     "X".to_owned(),
1066                 )
1067                 .to_string(),
1068                 r#"(#X = x)"#,
1069             );
1070             assert_eq!(
1071                 OptimizedExpr::NodeTag(
1072                     Box::new(OptimizedExpr::Seq(
1073                         Box::new(OptimizedExpr::Ident("x".to_owned())),
1074                         Box::new(OptimizedExpr::Str("y".to_owned())),
1075                     )),
1076                     "X".to_owned(),
1077                 )
1078                 .to_string(),
1079                 r#"(#X = (x ~ "y"))"#,
1080             );
1081         }
1082 
1083         #[test]
restore_on_err()1084         fn restore_on_err() {
1085             assert_eq!(
1086                 OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e".to_owned())))
1087                     .to_string(),
1088                 "e",
1089             );
1090         }
1091     }
1092 }
1093