1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 //! Constructs useful in prefix, postfix, and infix operator parsing with the
11 //! Pratt parsing method.
12 
13 use core::iter::Peekable;
14 use core::marker::PhantomData;
15 use core::ops::BitOr;
16 
17 use alloc::boxed::Box;
18 use alloc::collections::BTreeMap;
19 
20 use crate::iterators::Pair;
21 use crate::RuleType;
22 
23 /// Associativity of an infix binary operator, used by [`Op::infix(Assoc)`].
24 ///
25 /// [`Op::infix(Assoc)`]: struct.Op.html
26 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
27 pub enum Assoc {
28     /// Left operator associativity. Evaluate expressions from left-to-right.
29     Left,
30     /// Right operator associativity. Evaluate expressions from right-to-left.
31     Right,
32 }
33 
34 type Prec = u32;
35 const PREC_STEP: Prec = 10;
36 
37 /// An operator that corresponds to a rule.
38 pub struct Op<R: RuleType> {
39     rule: R,
40     affix: Affix,
41     next: Option<Box<Op<R>>>,
42 }
43 
44 enum Affix {
45     Prefix,
46     Postfix,
47     Infix(Assoc),
48 }
49 
50 impl<R: RuleType> Op<R> {
51     /// Defines `rule` as a prefix unary operator.
prefix(rule: R) -> Self52     pub fn prefix(rule: R) -> Self {
53         Self {
54             rule,
55             affix: Affix::Prefix,
56             next: None,
57         }
58     }
59 
60     /// Defines `rule` as a postfix unary operator.
postfix(rule: R) -> Self61     pub fn postfix(rule: R) -> Self {
62         Self {
63             rule,
64             affix: Affix::Postfix,
65             next: None,
66         }
67     }
68 
69     /// Defines `rule` as an infix binary operator with associativity `assoc`.
infix(rule: R, assoc: Assoc) -> Self70     pub fn infix(rule: R, assoc: Assoc) -> Self {
71         Self {
72             rule,
73             affix: Affix::Infix(assoc),
74             next: None,
75         }
76     }
77 }
78 
79 impl<R: RuleType> BitOr for Op<R> {
80     type Output = Self;
81 
bitor(mut self, rhs: Self) -> Self82     fn bitor(mut self, rhs: Self) -> Self {
83         fn assign_next<R: RuleType>(op: &mut Op<R>, next: Op<R>) {
84             if let Some(ref mut child) = op.next {
85                 assign_next(child, next);
86             } else {
87                 op.next = Some(Box::new(next));
88             }
89         }
90 
91         assign_next(&mut self, rhs);
92         self
93     }
94 }
95 
96 /// Struct containing operators and precedences, which can perform [Pratt parsing][1] on
97 /// primary, prefix, postfix and infix expressions over [`Pairs`]. The tokens in [`Pairs`]
98 /// should alternate in the order:
99 /// `prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix*)*`
100 ///
101 /// # Panics
102 ///
103 /// Panics will occur when:
104 /// * `pairs` is empty
105 /// * The tokens in `pairs` does not alternate in the expected order.
106 /// * No `map_*` function is specified for a certain kind of operator encountered in `pairs`.
107 ///
108 /// # Example
109 ///
110 /// The following pest grammar defines a calculator which can be used for Pratt parsing.
111 ///
112 /// ```pest
113 /// WHITESPACE   =  _{ " " | "\t" | NEWLINE }
114 ///
115 /// program      =   { SOI ~ expr ~ EOI }
116 ///   expr       =   { prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix* )* }
117 ///     infix    =  _{ add | sub | mul | div | pow }
118 ///       add    =   { "+" } // Addition
119 ///       sub    =   { "-" } // Subtraction
120 ///       mul    =   { "*" } // Multiplication
121 ///       div    =   { "/" } // Division
122 ///       pow    =   { "^" } // Exponentiation
123 ///     prefix   =  _{ neg }
124 ///       neg    =   { "-" } // Negation
125 ///     postfix  =  _{ fac }
126 ///       fac    =   { "!" } // Factorial
127 ///     primary  =  _{ int | "(" ~ expr ~ ")" }
128 ///       int    =  @{ (ASCII_NONZERO_DIGIT ~ ASCII_DIGIT+ | ASCII_DIGIT) }
129 /// ```
130 ///
131 /// Below is a [`PrattParser`] that is able to parse an `expr` in the above grammar. The order
132 /// of precedence corresponds to the order in which [`op`] is called. Thus, `mul` will
133 /// have higher precedence than `add`. Operators can also be chained with `|` to give them equal
134 /// precedence.
135 ///
136 /// ```
137 /// # use pest::pratt_parser::{Assoc, Op, PrattParser};
138 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
139 /// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg }
140 /// let pratt =
141 ///     PrattParser::new()
142 ///         .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left))
143 ///         .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left))
144 ///         .op(Op::infix(Rule::pow, Assoc::Right))
145 ///         .op(Op::prefix(Rule::neg))
146 ///         .op(Op::postfix(Rule::fac));
147 /// ```
148 ///
149 /// To parse an expression, call the [`map_primary`], [`map_prefix`], [`map_postfix`],
150 /// [`map_infix`] and [`parse`] methods as follows:
151 ///
152 /// ```
153 /// # use pest::{iterators::Pairs, pratt_parser::PrattParser};
154 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
155 /// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg }
156 /// fn parse_expr(pairs: Pairs<Rule>, pratt: &PrattParser<Rule>) -> i32 {
157 ///     pratt
158 ///         .map_primary(|primary| match primary.as_rule() {
159 ///             Rule::int  => primary.as_str().parse().unwrap(),
160 ///             Rule::expr => parse_expr(primary.into_inner(), pratt), // from "(" ~ expr ~ ")"
161 ///             _          => unreachable!(),
162 ///         })
163 ///         .map_prefix(|op, rhs| match op.as_rule() {
164 ///             Rule::neg  => -rhs,
165 ///             _          => unreachable!(),
166 ///         })
167 ///         .map_postfix(|lhs, op| match op.as_rule() {
168 ///             Rule::fac  => (1..lhs+1).product(),
169 ///             _          => unreachable!(),
170 ///         })
171 ///         .map_infix(|lhs, op, rhs| match op.as_rule() {
172 ///             Rule::add  => lhs + rhs,
173 ///             Rule::sub  => lhs - rhs,
174 ///             Rule::mul  => lhs * rhs,
175 ///             Rule::div  => lhs / rhs,
176 ///             Rule::pow  => (1..rhs+1).map(|_| lhs).product(),
177 ///             _          => unreachable!(),
178 ///         })
179 ///         .parse(pairs)
180 /// }
181 /// ```
182 ///
183 /// Note that [`map_prefix`], [`map_postfix`] and [`map_infix`] only need to be specified if the
184 /// grammar contains the corresponding operators.
185 ///
186 /// [1]: https://en.wikipedia.org/wiki/Pratt_parser
187 /// [`Pairs`]: ../iterators/struct.Pairs.html
188 /// [`PrattParser`]: struct.PrattParser.html
189 /// [`map_primary`]: struct.PrattParser.html#method.map_primary
190 /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix
191 /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix
192 /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix
193 /// [`parse`]: struct.PrattParserMap.html#method.parse
194 /// [`op`]: struct.PrattParserMap.html#method.op
195 pub struct PrattParser<R: RuleType> {
196     prec: Prec,
197     ops: BTreeMap<R, (Affix, Prec)>,
198     has_prefix: bool,
199     has_postfix: bool,
200     has_infix: bool,
201 }
202 
203 impl<R: RuleType> Default for PrattParser<R> {
default() -> Self204     fn default() -> Self {
205         Self::new()
206     }
207 }
208 
209 impl<R: RuleType> PrattParser<R> {
210     /// Instantiate a new `PrattParser`.
new() -> Self211     pub fn new() -> Self {
212         Self {
213             prec: PREC_STEP,
214             ops: BTreeMap::new(),
215             has_prefix: false,
216             has_postfix: false,
217             has_infix: false,
218         }
219     }
220 
221     /// Add `op` to `PrattParser`.
op(mut self, op: Op<R>) -> Self222     pub fn op(mut self, op: Op<R>) -> Self {
223         self.prec += PREC_STEP;
224         let mut iter = Some(op);
225         while let Some(Op { rule, affix, next }) = iter.take() {
226             match affix {
227                 Affix::Prefix => self.has_prefix = true,
228                 Affix::Postfix => self.has_postfix = true,
229                 Affix::Infix(_) => self.has_infix = true,
230             }
231             self.ops.insert(rule, (affix, self.prec));
232             iter = next.map(|op| *op);
233         }
234         self
235     }
236 
237     /// Maps primary expressions with a closure `primary`.
map_primary<'pratt, 'a, 'i, X, T>( &'pratt self, primary: X, ) -> PrattParserMap<'pratt, 'a, 'i, R, X, T> where X: FnMut(Pair<'i, R>) -> T, R: 'pratt,238     pub fn map_primary<'pratt, 'a, 'i, X, T>(
239         &'pratt self,
240         primary: X,
241     ) -> PrattParserMap<'pratt, 'a, 'i, R, X, T>
242     where
243         X: FnMut(Pair<'i, R>) -> T,
244         R: 'pratt,
245     {
246         PrattParserMap {
247             pratt: self,
248             primary,
249             prefix: None,
250             postfix: None,
251             infix: None,
252             phantom: PhantomData,
253         }
254     }
255 }
256 
257 type PrefixFn<'a, 'i, R, T> = Box<dyn FnMut(Pair<'i, R>, T) -> T + 'a>;
258 type PostfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>) -> T + 'a>;
259 type InfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>, T) -> T + 'a>;
260 
261 /// Product of calling [`map_primary`] on [`PrattParser`], defines how expressions should
262 /// be mapped.
263 ///
264 /// [`map_primary`]: struct.PrattParser.html#method.map_primary
265 /// [`PrattParser`]: struct.PrattParser.html
266 pub struct PrattParserMap<'pratt, 'a, 'i, R, F, T>
267 where
268     R: RuleType,
269     F: FnMut(Pair<'i, R>) -> T,
270 {
271     pratt: &'pratt PrattParser<R>,
272     primary: F,
273     prefix: Option<PrefixFn<'a, 'i, R, T>>,
274     postfix: Option<PostfixFn<'a, 'i, R, T>>,
275     infix: Option<InfixFn<'a, 'i, R, T>>,
276     phantom: PhantomData<T>,
277 }
278 
279 impl<'pratt, 'a, 'i, R, F, T> PrattParserMap<'pratt, 'a, 'i, R, F, T>
280 where
281     R: RuleType + 'pratt,
282     F: FnMut(Pair<'i, R>) -> T,
283 {
284     /// Maps prefix operators with closure `prefix`.
map_prefix<X>(mut self, prefix: X) -> Self where X: FnMut(Pair<'i, R>, T) -> T + 'a,285     pub fn map_prefix<X>(mut self, prefix: X) -> Self
286     where
287         X: FnMut(Pair<'i, R>, T) -> T + 'a,
288     {
289         self.prefix = Some(Box::new(prefix));
290         self
291     }
292 
293     /// Maps postfix operators with closure `postfix`.
map_postfix<X>(mut self, postfix: X) -> Self where X: FnMut(T, Pair<'i, R>) -> T + 'a,294     pub fn map_postfix<X>(mut self, postfix: X) -> Self
295     where
296         X: FnMut(T, Pair<'i, R>) -> T + 'a,
297     {
298         self.postfix = Some(Box::new(postfix));
299         self
300     }
301 
302     /// Maps infix operators with a closure `infix`.
map_infix<X>(mut self, infix: X) -> Self where X: FnMut(T, Pair<'i, R>, T) -> T + 'a,303     pub fn map_infix<X>(mut self, infix: X) -> Self
304     where
305         X: FnMut(T, Pair<'i, R>, T) -> T + 'a,
306     {
307         self.infix = Some(Box::new(infix));
308         self
309     }
310 
311     /// The last method to call on the provided pairs to execute the Pratt
312     /// parser (previously defined using [`map_primary`], [`map_prefix`], [`map_postfix`],
313     /// and [`map_infix`] methods).
314     ///
315     /// [`map_primary`]: struct.PrattParser.html#method.map_primary
316     /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix
317     /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix
318     /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix
parse<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: P) -> T319     pub fn parse<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: P) -> T {
320         self.expr(&mut pairs.peekable(), 0)
321     }
322 
expr<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, rbp: Prec) -> T323     fn expr<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, rbp: Prec) -> T {
324         let mut lhs = self.nud(pairs);
325         while rbp < self.lbp(pairs) {
326             lhs = self.led(pairs, lhs);
327         }
328         lhs
329     }
330 
331     /// Null-Denotation
332     ///
333     /// "the action that should happen when the symbol is encountered
334     ///  as start of an expression (most notably, prefix operators)
nud<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> T335     fn nud<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> T {
336         let pair = pairs.next().expect("Pratt parsing expects non-empty Pairs");
337         match self.pratt.ops.get(&pair.as_rule()) {
338             Some((Affix::Prefix, prec)) => {
339                 let rhs = self.expr(pairs, *prec - 1);
340                 match self.prefix.as_mut() {
341                     Some(prefix) => prefix(pair, rhs),
342                     None => panic!("Could not map {}, no `.map_prefix(...)` specified", pair),
343                 }
344             }
345             None => (self.primary)(pair),
346             _ => panic!("Expected prefix or primary expression, found {}", pair),
347         }
348     }
349 
350     /// Left-Denotation
351     ///
352     /// "the action that should happen when the symbol is encountered
353     /// after the start of an expression (most notably, infix and postfix operators)"
led<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, lhs: T) -> T354     fn led<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, lhs: T) -> T {
355         let pair = pairs.next().unwrap();
356         match self.pratt.ops.get(&pair.as_rule()) {
357             Some((Affix::Infix(assoc), prec)) => {
358                 let rhs = match *assoc {
359                     Assoc::Left => self.expr(pairs, *prec),
360                     Assoc::Right => self.expr(pairs, *prec - 1),
361                 };
362                 match self.infix.as_mut() {
363                     Some(infix) => infix(lhs, pair, rhs),
364                     None => panic!("Could not map {}, no `.map_infix(...)` specified", pair),
365                 }
366             }
367             Some((Affix::Postfix, _)) => match self.postfix.as_mut() {
368                 Some(postfix) => postfix(lhs, pair),
369                 None => panic!("Could not map {}, no `.map_postfix(...)` specified", pair),
370             },
371             _ => panic!("Expected postfix or infix expression, found {}", pair),
372         }
373     }
374 
375     /// Left-Binding-Power
376     ///
377     /// "describes the symbol's precedence in infix form (most notably, operator precedence)"
lbp<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> Prec378     fn lbp<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> Prec {
379         match pairs.peek() {
380             Some(pair) => match self.pratt.ops.get(&pair.as_rule()) {
381                 Some((_, prec)) => *prec,
382                 None => panic!("Expected operator, found {}", pair),
383             },
384             None => 0,
385         }
386     }
387 }
388