1 use std::fmt;
2 
3 use crate::ast::{self, Ast};
4 
5 /// A trait for visiting an abstract syntax tree (AST) in depth first order.
6 ///
7 /// The principle aim of this trait is to enable callers to perform case
8 /// analysis on an abstract syntax tree without necessarily using recursion.
9 /// In particular, this permits callers to do case analysis with constant stack
10 /// usage, which can be important since the size of an abstract syntax tree
11 /// may be proportional to end user input.
12 ///
13 /// Typical usage of this trait involves providing an implementation and then
14 /// running it using the [`visit`](fn.visit.html) function.
15 ///
16 /// Note that the abstract syntax tree for a regular expression is quite
17 /// complex. Unless you specifically need it, you might be able to use the
18 /// much simpler
19 /// [high-level intermediate representation](../hir/struct.Hir.html)
20 /// and its
21 /// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
22 /// instead.
23 pub trait Visitor {
24     /// The result of visiting an AST.
25     type Output;
26     /// An error that visiting an AST might return.
27     type Err;
28 
29     /// All implementors of `Visitor` must provide a `finish` method, which
30     /// yields the result of visiting the AST or an error.
finish(self) -> Result<Self::Output, Self::Err>31     fn finish(self) -> Result<Self::Output, Self::Err>;
32 
33     /// This method is called before beginning traversal of the AST.
start(&mut self)34     fn start(&mut self) {}
35 
36     /// This method is called on an `Ast` before descending into child `Ast`
37     /// nodes.
visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err>38     fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
39         Ok(())
40     }
41 
42     /// This method is called on an `Ast` after descending all of its child
43     /// `Ast` nodes.
visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err>44     fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
45         Ok(())
46     }
47 
48     /// This method is called between child nodes of an
49     /// [`Alternation`](struct.Alternation.html).
visit_alternation_in(&mut self) -> Result<(), Self::Err>50     fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
51         Ok(())
52     }
53 
54     /// This method is called on every
55     /// [`ClassSetItem`](enum.ClassSetItem.html)
56     /// before descending into child nodes.
visit_class_set_item_pre( &mut self, _ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>57     fn visit_class_set_item_pre(
58         &mut self,
59         _ast: &ast::ClassSetItem,
60     ) -> Result<(), Self::Err> {
61         Ok(())
62     }
63 
64     /// This method is called on every
65     /// [`ClassSetItem`](enum.ClassSetItem.html)
66     /// after descending into child nodes.
visit_class_set_item_post( &mut self, _ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>67     fn visit_class_set_item_post(
68         &mut self,
69         _ast: &ast::ClassSetItem,
70     ) -> Result<(), Self::Err> {
71         Ok(())
72     }
73 
74     /// This method is called on every
75     /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
76     /// before descending into child nodes.
visit_class_set_binary_op_pre( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>77     fn visit_class_set_binary_op_pre(
78         &mut self,
79         _ast: &ast::ClassSetBinaryOp,
80     ) -> Result<(), Self::Err> {
81         Ok(())
82     }
83 
84     /// This method is called on every
85     /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
86     /// after descending into child nodes.
visit_class_set_binary_op_post( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>87     fn visit_class_set_binary_op_post(
88         &mut self,
89         _ast: &ast::ClassSetBinaryOp,
90     ) -> Result<(), Self::Err> {
91         Ok(())
92     }
93 
94     /// This method is called between the left hand and right hand child nodes
95     /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
visit_class_set_binary_op_in( &mut self, _ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>96     fn visit_class_set_binary_op_in(
97         &mut self,
98         _ast: &ast::ClassSetBinaryOp,
99     ) -> Result<(), Self::Err> {
100         Ok(())
101     }
102 }
103 
104 /// Executes an implementation of `Visitor` in constant stack space.
105 ///
106 /// This function will visit every node in the given `Ast` while calling the
107 /// appropriate methods provided by the
108 /// [`Visitor`](trait.Visitor.html) trait.
109 ///
110 /// The primary use case for this method is when one wants to perform case
111 /// analysis over an `Ast` without using a stack size proportional to the depth
112 /// of the `Ast`. Namely, this method will instead use constant stack size, but
113 /// will use heap space proportional to the size of the `Ast`. This may be
114 /// desirable in cases where the size of `Ast` is proportional to end user
115 /// input.
116 ///
117 /// If the visitor returns an error at any point, then visiting is stopped and
118 /// the error is returned.
visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err>119 pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
120     HeapVisitor::new().visit(ast, visitor)
121 }
122 
123 /// HeapVisitor visits every item in an `Ast` recursively using constant stack
124 /// size and a heap size proportional to the size of the `Ast`.
125 struct HeapVisitor<'a> {
126     /// A stack of `Ast` nodes. This is roughly analogous to the call stack
127     /// used in a typical recursive visitor.
128     stack: Vec<(&'a Ast, Frame<'a>)>,
129     /// Similar to the `Ast` stack above, but is used only for character
130     /// classes. In particular, character classes embed their own mini
131     /// recursive syntax.
132     stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
133 }
134 
135 /// Represents a single stack frame while performing structural induction over
136 /// an `Ast`.
137 enum Frame<'a> {
138     /// A stack frame allocated just before descending into a repetition
139     /// operator's child node.
140     Repetition(&'a ast::Repetition),
141     /// A stack frame allocated just before descending into a group's child
142     /// node.
143     Group(&'a ast::Group),
144     /// The stack frame used while visiting every child node of a concatenation
145     /// of expressions.
146     Concat {
147         /// The child node we are currently visiting.
148         head: &'a Ast,
149         /// The remaining child nodes to visit (which may be empty).
150         tail: &'a [Ast],
151     },
152     /// The stack frame used while visiting every child node of an alternation
153     /// of expressions.
154     Alternation {
155         /// The child node we are currently visiting.
156         head: &'a Ast,
157         /// The remaining child nodes to visit (which may be empty).
158         tail: &'a [Ast],
159     },
160 }
161 
162 /// Represents a single stack frame while performing structural induction over
163 /// a character class.
164 enum ClassFrame<'a> {
165     /// The stack frame used while visiting every child node of a union of
166     /// character class items.
167     Union {
168         /// The child node we are currently visiting.
169         head: &'a ast::ClassSetItem,
170         /// The remaining child nodes to visit (which may be empty).
171         tail: &'a [ast::ClassSetItem],
172     },
173     /// The stack frame used while a binary class operation.
174     Binary { op: &'a ast::ClassSetBinaryOp },
175     /// A stack frame allocated just before descending into a binary operator's
176     /// left hand child node.
177     BinaryLHS {
178         op: &'a ast::ClassSetBinaryOp,
179         lhs: &'a ast::ClassSet,
180         rhs: &'a ast::ClassSet,
181     },
182     /// A stack frame allocated just before descending into a binary operator's
183     /// right hand child node.
184     BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
185 }
186 
187 /// A representation of the inductive step when performing structural induction
188 /// over a character class.
189 ///
190 /// Note that there is no analogous explicit type for the inductive step for
191 /// `Ast` nodes because the inductive step is just an `Ast`. For character
192 /// classes, the inductive step can produce one of two possible child nodes:
193 /// an item or a binary operation. (An item cannot be a binary operation
194 /// because that would imply binary operations can be unioned in the concrete
195 /// syntax, which is not possible.)
196 enum ClassInduct<'a> {
197     Item(&'a ast::ClassSetItem),
198     BinaryOp(&'a ast::ClassSetBinaryOp),
199 }
200 
201 impl<'a> HeapVisitor<'a> {
new() -> HeapVisitor<'a>202     fn new() -> HeapVisitor<'a> {
203         HeapVisitor { stack: vec![], stack_class: vec![] }
204     }
205 
visit<V: Visitor>( &mut self, mut ast: &'a Ast, mut visitor: V, ) -> Result<V::Output, V::Err>206     fn visit<V: Visitor>(
207         &mut self,
208         mut ast: &'a Ast,
209         mut visitor: V,
210     ) -> Result<V::Output, V::Err> {
211         self.stack.clear();
212         self.stack_class.clear();
213 
214         visitor.start();
215         loop {
216             visitor.visit_pre(ast)?;
217             if let Some(x) = self.induct(ast, &mut visitor)? {
218                 let child = x.child();
219                 self.stack.push((ast, x));
220                 ast = child;
221                 continue;
222             }
223             // No induction means we have a base case, so we can post visit
224             // it now.
225             visitor.visit_post(ast)?;
226 
227             // At this point, we now try to pop our call stack until it is
228             // either empty or we hit another inductive case.
229             loop {
230                 let (post_ast, frame) = match self.stack.pop() {
231                     None => return visitor.finish(),
232                     Some((post_ast, frame)) => (post_ast, frame),
233                 };
234                 // If this is a concat/alternate, then we might have additional
235                 // inductive steps to process.
236                 if let Some(x) = self.pop(frame) {
237                     if let Frame::Alternation { .. } = x {
238                         visitor.visit_alternation_in()?;
239                     }
240                     ast = x.child();
241                     self.stack.push((post_ast, x));
242                     break;
243                 }
244                 // Otherwise, we've finished visiting all the child nodes for
245                 // this AST, so we can post visit it now.
246                 visitor.visit_post(post_ast)?;
247             }
248         }
249     }
250 
251     /// Build a stack frame for the given AST if one is needed (which occurs if
252     /// and only if there are child nodes in the AST). Otherwise, return None.
253     ///
254     /// If this visits a class, then the underlying visitor implementation may
255     /// return an error which will be passed on here.
induct<V: Visitor>( &mut self, ast: &'a Ast, visitor: &mut V, ) -> Result<Option<Frame<'a>>, V::Err>256     fn induct<V: Visitor>(
257         &mut self,
258         ast: &'a Ast,
259         visitor: &mut V,
260     ) -> Result<Option<Frame<'a>>, V::Err> {
261         Ok(match *ast {
262             Ast::Class(ast::Class::Bracketed(ref x)) => {
263                 self.visit_class(x, visitor)?;
264                 None
265             }
266             Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
267             Ast::Group(ref x) => Some(Frame::Group(x)),
268             Ast::Concat(ref x) if x.asts.is_empty() => None,
269             Ast::Concat(ref x) => {
270                 Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
271             }
272             Ast::Alternation(ref x) if x.asts.is_empty() => None,
273             Ast::Alternation(ref x) => Some(Frame::Alternation {
274                 head: &x.asts[0],
275                 tail: &x.asts[1..],
276             }),
277             _ => None,
278         })
279     }
280 
281     /// Pops the given frame. If the frame has an additional inductive step,
282     /// then return it, otherwise return `None`.
pop(&self, induct: Frame<'a>) -> Option<Frame<'a>>283     fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
284         match induct {
285             Frame::Repetition(_) => None,
286             Frame::Group(_) => None,
287             Frame::Concat { tail, .. } => {
288                 if tail.is_empty() {
289                     None
290                 } else {
291                     Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
292                 }
293             }
294             Frame::Alternation { tail, .. } => {
295                 if tail.is_empty() {
296                     None
297                 } else {
298                     Some(Frame::Alternation {
299                         head: &tail[0],
300                         tail: &tail[1..],
301                     })
302                 }
303             }
304         }
305     }
306 
visit_class<V: Visitor>( &mut self, ast: &'a ast::ClassBracketed, visitor: &mut V, ) -> Result<(), V::Err>307     fn visit_class<V: Visitor>(
308         &mut self,
309         ast: &'a ast::ClassBracketed,
310         visitor: &mut V,
311     ) -> Result<(), V::Err> {
312         let mut ast = ClassInduct::from_bracketed(ast);
313         loop {
314             self.visit_class_pre(&ast, visitor)?;
315             if let Some(x) = self.induct_class(&ast) {
316                 let child = x.child();
317                 self.stack_class.push((ast, x));
318                 ast = child;
319                 continue;
320             }
321             self.visit_class_post(&ast, visitor)?;
322 
323             // At this point, we now try to pop our call stack until it is
324             // either empty or we hit another inductive case.
325             loop {
326                 let (post_ast, frame) = match self.stack_class.pop() {
327                     None => return Ok(()),
328                     Some((post_ast, frame)) => (post_ast, frame),
329                 };
330                 // If this is a union or a binary op, then we might have
331                 // additional inductive steps to process.
332                 if let Some(x) = self.pop_class(frame) {
333                     if let ClassFrame::BinaryRHS { ref op, .. } = x {
334                         visitor.visit_class_set_binary_op_in(op)?;
335                     }
336                     ast = x.child();
337                     self.stack_class.push((post_ast, x));
338                     break;
339                 }
340                 // Otherwise, we've finished visiting all the child nodes for
341                 // this class node, so we can post visit it now.
342                 self.visit_class_post(&post_ast, visitor)?;
343             }
344         }
345     }
346 
347     /// Call the appropriate `Visitor` methods given an inductive step.
visit_class_pre<V: Visitor>( &self, ast: &ClassInduct<'a>, visitor: &mut V, ) -> Result<(), V::Err>348     fn visit_class_pre<V: Visitor>(
349         &self,
350         ast: &ClassInduct<'a>,
351         visitor: &mut V,
352     ) -> Result<(), V::Err> {
353         match *ast {
354             ClassInduct::Item(item) => {
355                 visitor.visit_class_set_item_pre(item)?;
356             }
357             ClassInduct::BinaryOp(op) => {
358                 visitor.visit_class_set_binary_op_pre(op)?;
359             }
360         }
361         Ok(())
362     }
363 
364     /// Call the appropriate `Visitor` methods given an inductive step.
visit_class_post<V: Visitor>( &self, ast: &ClassInduct<'a>, visitor: &mut V, ) -> Result<(), V::Err>365     fn visit_class_post<V: Visitor>(
366         &self,
367         ast: &ClassInduct<'a>,
368         visitor: &mut V,
369     ) -> Result<(), V::Err> {
370         match *ast {
371             ClassInduct::Item(item) => {
372                 visitor.visit_class_set_item_post(item)?;
373             }
374             ClassInduct::BinaryOp(op) => {
375                 visitor.visit_class_set_binary_op_post(op)?;
376             }
377         }
378         Ok(())
379     }
380 
381     /// Build a stack frame for the given class node if one is needed (which
382     /// occurs if and only if there are child nodes). Otherwise, return None.
induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>>383     fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
384         match *ast {
385             ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
386                 match x.kind {
387                     ast::ClassSet::Item(ref item) => {
388                         Some(ClassFrame::Union { head: item, tail: &[] })
389                     }
390                     ast::ClassSet::BinaryOp(ref op) => {
391                         Some(ClassFrame::Binary { op })
392                     }
393                 }
394             }
395             ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
396                 if x.items.is_empty() {
397                     None
398                 } else {
399                     Some(ClassFrame::Union {
400                         head: &x.items[0],
401                         tail: &x.items[1..],
402                     })
403                 }
404             }
405             ClassInduct::BinaryOp(op) => {
406                 Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs })
407             }
408             _ => None,
409         }
410     }
411 
412     /// Pops the given frame. If the frame has an additional inductive step,
413     /// then return it, otherwise return `None`.
pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>>414     fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
415         match induct {
416             ClassFrame::Union { tail, .. } => {
417                 if tail.is_empty() {
418                     None
419                 } else {
420                     Some(ClassFrame::Union {
421                         head: &tail[0],
422                         tail: &tail[1..],
423                     })
424                 }
425             }
426             ClassFrame::Binary { .. } => None,
427             ClassFrame::BinaryLHS { op, rhs, .. } => {
428                 Some(ClassFrame::BinaryRHS { op, rhs })
429             }
430             ClassFrame::BinaryRHS { .. } => None,
431         }
432     }
433 }
434 
435 impl<'a> Frame<'a> {
436     /// Perform the next inductive step on this frame and return the next
437     /// child AST node to visit.
child(&self) -> &'a Ast438     fn child(&self) -> &'a Ast {
439         match *self {
440             Frame::Repetition(rep) => &rep.ast,
441             Frame::Group(group) => &group.ast,
442             Frame::Concat { head, .. } => head,
443             Frame::Alternation { head, .. } => head,
444         }
445     }
446 }
447 
448 impl<'a> ClassFrame<'a> {
449     /// Perform the next inductive step on this frame and return the next
450     /// child class node to visit.
child(&self) -> ClassInduct<'a>451     fn child(&self) -> ClassInduct<'a> {
452         match *self {
453             ClassFrame::Union { head, .. } => ClassInduct::Item(head),
454             ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
455             ClassFrame::BinaryLHS { ref lhs, .. } => {
456                 ClassInduct::from_set(lhs)
457             }
458             ClassFrame::BinaryRHS { ref rhs, .. } => {
459                 ClassInduct::from_set(rhs)
460             }
461         }
462     }
463 }
464 
465 impl<'a> ClassInduct<'a> {
from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a>466     fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
467         ClassInduct::from_set(&ast.kind)
468     }
469 
from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a>470     fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
471         match *ast {
472             ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
473             ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
474         }
475     }
476 }
477 
478 impl<'a> fmt::Debug for ClassFrame<'a> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result479     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
480         let x = match *self {
481             ClassFrame::Union { .. } => "Union",
482             ClassFrame::Binary { .. } => "Binary",
483             ClassFrame::BinaryLHS { .. } => "BinaryLHS",
484             ClassFrame::BinaryRHS { .. } => "BinaryRHS",
485         };
486         write!(f, "{}", x)
487     }
488 }
489 
490 impl<'a> fmt::Debug for ClassInduct<'a> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result491     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
492         let x = match *self {
493             ClassInduct::Item(it) => match *it {
494                 ast::ClassSetItem::Empty(_) => "Item(Empty)",
495                 ast::ClassSetItem::Literal(_) => "Item(Literal)",
496                 ast::ClassSetItem::Range(_) => "Item(Range)",
497                 ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
498                 ast::ClassSetItem::Perl(_) => "Item(Perl)",
499                 ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
500                 ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
501                 ast::ClassSetItem::Union(_) => "Item(Union)",
502             },
503             ClassInduct::BinaryOp(it) => match it.kind {
504                 ast::ClassSetBinaryOpKind::Intersection => {
505                     "BinaryOp(Intersection)"
506                 }
507                 ast::ClassSetBinaryOpKind::Difference => {
508                     "BinaryOp(Difference)"
509                 }
510                 ast::ClassSetBinaryOpKind::SymmetricDifference => {
511                     "BinaryOp(SymmetricDifference)"
512                 }
513             },
514         };
515         write!(f, "{}", x)
516     }
517 }
518