1 // pest. The Elegant Parser 2 // Copyright (c) 2018 Dragoș Tiselice 3 // 4 // Licensed under the Apache License, Version 2.0 5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT 6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 7 // option. All files in the project carrying such notice may not be copied, 8 // modified, or distributed except according to those terms. 9 10 //! Constructs useful in prefix, postfix, and infix operator parsing with the 11 //! Pratt parsing method. 12 13 use core::iter::Peekable; 14 use core::marker::PhantomData; 15 use core::ops::BitOr; 16 17 use alloc::boxed::Box; 18 use alloc::collections::BTreeMap; 19 20 use crate::iterators::Pair; 21 use crate::RuleType; 22 23 /// Associativity of an infix binary operator, used by [`Op::infix(Assoc)`]. 24 /// 25 /// [`Op::infix(Assoc)`]: struct.Op.html 26 #[derive(Clone, Copy, Debug, Eq, PartialEq)] 27 pub enum Assoc { 28 /// Left operator associativity. Evaluate expressions from left-to-right. 29 Left, 30 /// Right operator associativity. Evaluate expressions from right-to-left. 31 Right, 32 } 33 34 type Prec = u32; 35 const PREC_STEP: Prec = 10; 36 37 /// An operator that corresponds to a rule. 38 pub struct Op<R: RuleType> { 39 rule: R, 40 affix: Affix, 41 next: Option<Box<Op<R>>>, 42 } 43 44 enum Affix { 45 Prefix, 46 Postfix, 47 Infix(Assoc), 48 } 49 50 impl<R: RuleType> Op<R> { 51 /// Defines `rule` as a prefix unary operator. prefix(rule: R) -> Self52 pub fn prefix(rule: R) -> Self { 53 Self { 54 rule, 55 affix: Affix::Prefix, 56 next: None, 57 } 58 } 59 60 /// Defines `rule` as a postfix unary operator. postfix(rule: R) -> Self61 pub fn postfix(rule: R) -> Self { 62 Self { 63 rule, 64 affix: Affix::Postfix, 65 next: None, 66 } 67 } 68 69 /// Defines `rule` as an infix binary operator with associativity `assoc`. infix(rule: R, assoc: Assoc) -> Self70 pub fn infix(rule: R, assoc: Assoc) -> Self { 71 Self { 72 rule, 73 affix: Affix::Infix(assoc), 74 next: None, 75 } 76 } 77 } 78 79 impl<R: RuleType> BitOr for Op<R> { 80 type Output = Self; 81 bitor(mut self, rhs: Self) -> Self82 fn bitor(mut self, rhs: Self) -> Self { 83 fn assign_next<R: RuleType>(op: &mut Op<R>, next: Op<R>) { 84 if let Some(ref mut child) = op.next { 85 assign_next(child, next); 86 } else { 87 op.next = Some(Box::new(next)); 88 } 89 } 90 91 assign_next(&mut self, rhs); 92 self 93 } 94 } 95 96 /// Struct containing operators and precedences, which can perform [Pratt parsing][1] on 97 /// primary, prefix, postfix and infix expressions over [`Pairs`]. The tokens in [`Pairs`] 98 /// should alternate in the order: 99 /// `prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix*)*` 100 /// 101 /// # Panics 102 /// 103 /// Panics will occur when: 104 /// * `pairs` is empty 105 /// * The tokens in `pairs` does not alternate in the expected order. 106 /// * No `map_*` function is specified for a certain kind of operator encountered in `pairs`. 107 /// 108 /// # Example 109 /// 110 /// The following pest grammar defines a calculator which can be used for Pratt parsing. 111 /// 112 /// ```pest 113 /// WHITESPACE = _{ " " | "\t" | NEWLINE } 114 /// 115 /// program = { SOI ~ expr ~ EOI } 116 /// expr = { prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix* )* } 117 /// infix = _{ add | sub | mul | div | pow } 118 /// add = { "+" } // Addition 119 /// sub = { "-" } // Subtraction 120 /// mul = { "*" } // Multiplication 121 /// div = { "/" } // Division 122 /// pow = { "^" } // Exponentiation 123 /// prefix = _{ neg } 124 /// neg = { "-" } // Negation 125 /// postfix = _{ fac } 126 /// fac = { "!" } // Factorial 127 /// primary = _{ int | "(" ~ expr ~ ")" } 128 /// int = @{ (ASCII_NONZERO_DIGIT ~ ASCII_DIGIT+ | ASCII_DIGIT) } 129 /// ``` 130 /// 131 /// Below is a [`PrattParser`] that is able to parse an `expr` in the above grammar. The order 132 /// of precedence corresponds to the order in which [`op`] is called. Thus, `mul` will 133 /// have higher precedence than `add`. Operators can also be chained with `|` to give them equal 134 /// precedence. 135 /// 136 /// ``` 137 /// # use pest::pratt_parser::{Assoc, Op, PrattParser}; 138 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 139 /// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg } 140 /// let pratt = 141 /// PrattParser::new() 142 /// .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left)) 143 /// .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left)) 144 /// .op(Op::infix(Rule::pow, Assoc::Right)) 145 /// .op(Op::prefix(Rule::neg)) 146 /// .op(Op::postfix(Rule::fac)); 147 /// ``` 148 /// 149 /// To parse an expression, call the [`map_primary`], [`map_prefix`], [`map_postfix`], 150 /// [`map_infix`] and [`parse`] methods as follows: 151 /// 152 /// ``` 153 /// # use pest::{iterators::Pairs, pratt_parser::PrattParser}; 154 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 155 /// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg } 156 /// fn parse_expr(pairs: Pairs<Rule>, pratt: &PrattParser<Rule>) -> i32 { 157 /// pratt 158 /// .map_primary(|primary| match primary.as_rule() { 159 /// Rule::int => primary.as_str().parse().unwrap(), 160 /// Rule::expr => parse_expr(primary.into_inner(), pratt), // from "(" ~ expr ~ ")" 161 /// _ => unreachable!(), 162 /// }) 163 /// .map_prefix(|op, rhs| match op.as_rule() { 164 /// Rule::neg => -rhs, 165 /// _ => unreachable!(), 166 /// }) 167 /// .map_postfix(|lhs, op| match op.as_rule() { 168 /// Rule::fac => (1..lhs+1).product(), 169 /// _ => unreachable!(), 170 /// }) 171 /// .map_infix(|lhs, op, rhs| match op.as_rule() { 172 /// Rule::add => lhs + rhs, 173 /// Rule::sub => lhs - rhs, 174 /// Rule::mul => lhs * rhs, 175 /// Rule::div => lhs / rhs, 176 /// Rule::pow => (1..rhs+1).map(|_| lhs).product(), 177 /// _ => unreachable!(), 178 /// }) 179 /// .parse(pairs) 180 /// } 181 /// ``` 182 /// 183 /// Note that [`map_prefix`], [`map_postfix`] and [`map_infix`] only need to be specified if the 184 /// grammar contains the corresponding operators. 185 /// 186 /// [1]: https://en.wikipedia.org/wiki/Pratt_parser 187 /// [`Pairs`]: ../iterators/struct.Pairs.html 188 /// [`PrattParser`]: struct.PrattParser.html 189 /// [`map_primary`]: struct.PrattParser.html#method.map_primary 190 /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix 191 /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix 192 /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix 193 /// [`parse`]: struct.PrattParserMap.html#method.parse 194 /// [`op`]: struct.PrattParserMap.html#method.op 195 pub struct PrattParser<R: RuleType> { 196 prec: Prec, 197 ops: BTreeMap<R, (Affix, Prec)>, 198 has_prefix: bool, 199 has_postfix: bool, 200 has_infix: bool, 201 } 202 203 impl<R: RuleType> Default for PrattParser<R> { default() -> Self204 fn default() -> Self { 205 Self::new() 206 } 207 } 208 209 impl<R: RuleType> PrattParser<R> { 210 /// Instantiate a new `PrattParser`. new() -> Self211 pub fn new() -> Self { 212 Self { 213 prec: PREC_STEP, 214 ops: BTreeMap::new(), 215 has_prefix: false, 216 has_postfix: false, 217 has_infix: false, 218 } 219 } 220 221 /// Add `op` to `PrattParser`. op(mut self, op: Op<R>) -> Self222 pub fn op(mut self, op: Op<R>) -> Self { 223 self.prec += PREC_STEP; 224 let mut iter = Some(op); 225 while let Some(Op { rule, affix, next }) = iter.take() { 226 match affix { 227 Affix::Prefix => self.has_prefix = true, 228 Affix::Postfix => self.has_postfix = true, 229 Affix::Infix(_) => self.has_infix = true, 230 } 231 self.ops.insert(rule, (affix, self.prec)); 232 iter = next.map(|op| *op); 233 } 234 self 235 } 236 237 /// Maps primary expressions with a closure `primary`. map_primary<'pratt, 'a, 'i, X, T>( &'pratt self, primary: X, ) -> PrattParserMap<'pratt, 'a, 'i, R, X, T> where X: FnMut(Pair<'i, R>) -> T, R: 'pratt,238 pub fn map_primary<'pratt, 'a, 'i, X, T>( 239 &'pratt self, 240 primary: X, 241 ) -> PrattParserMap<'pratt, 'a, 'i, R, X, T> 242 where 243 X: FnMut(Pair<'i, R>) -> T, 244 R: 'pratt, 245 { 246 PrattParserMap { 247 pratt: self, 248 primary, 249 prefix: None, 250 postfix: None, 251 infix: None, 252 phantom: PhantomData, 253 } 254 } 255 } 256 257 type PrefixFn<'a, 'i, R, T> = Box<dyn FnMut(Pair<'i, R>, T) -> T + 'a>; 258 type PostfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>) -> T + 'a>; 259 type InfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>, T) -> T + 'a>; 260 261 /// Product of calling [`map_primary`] on [`PrattParser`], defines how expressions should 262 /// be mapped. 263 /// 264 /// [`map_primary`]: struct.PrattParser.html#method.map_primary 265 /// [`PrattParser`]: struct.PrattParser.html 266 pub struct PrattParserMap<'pratt, 'a, 'i, R, F, T> 267 where 268 R: RuleType, 269 F: FnMut(Pair<'i, R>) -> T, 270 { 271 pratt: &'pratt PrattParser<R>, 272 primary: F, 273 prefix: Option<PrefixFn<'a, 'i, R, T>>, 274 postfix: Option<PostfixFn<'a, 'i, R, T>>, 275 infix: Option<InfixFn<'a, 'i, R, T>>, 276 phantom: PhantomData<T>, 277 } 278 279 impl<'pratt, 'a, 'i, R, F, T> PrattParserMap<'pratt, 'a, 'i, R, F, T> 280 where 281 R: RuleType + 'pratt, 282 F: FnMut(Pair<'i, R>) -> T, 283 { 284 /// Maps prefix operators with closure `prefix`. map_prefix<X>(mut self, prefix: X) -> Self where X: FnMut(Pair<'i, R>, T) -> T + 'a,285 pub fn map_prefix<X>(mut self, prefix: X) -> Self 286 where 287 X: FnMut(Pair<'i, R>, T) -> T + 'a, 288 { 289 self.prefix = Some(Box::new(prefix)); 290 self 291 } 292 293 /// Maps postfix operators with closure `postfix`. map_postfix<X>(mut self, postfix: X) -> Self where X: FnMut(T, Pair<'i, R>) -> T + 'a,294 pub fn map_postfix<X>(mut self, postfix: X) -> Self 295 where 296 X: FnMut(T, Pair<'i, R>) -> T + 'a, 297 { 298 self.postfix = Some(Box::new(postfix)); 299 self 300 } 301 302 /// Maps infix operators with a closure `infix`. map_infix<X>(mut self, infix: X) -> Self where X: FnMut(T, Pair<'i, R>, T) -> T + 'a,303 pub fn map_infix<X>(mut self, infix: X) -> Self 304 where 305 X: FnMut(T, Pair<'i, R>, T) -> T + 'a, 306 { 307 self.infix = Some(Box::new(infix)); 308 self 309 } 310 311 /// The last method to call on the provided pairs to execute the Pratt 312 /// parser (previously defined using [`map_primary`], [`map_prefix`], [`map_postfix`], 313 /// and [`map_infix`] methods). 314 /// 315 /// [`map_primary`]: struct.PrattParser.html#method.map_primary 316 /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix 317 /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix 318 /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix parse<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: P) -> T319 pub fn parse<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: P) -> T { 320 self.expr(&mut pairs.peekable(), 0) 321 } 322 expr<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, rbp: Prec) -> T323 fn expr<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, rbp: Prec) -> T { 324 let mut lhs = self.nud(pairs); 325 while rbp < self.lbp(pairs) { 326 lhs = self.led(pairs, lhs); 327 } 328 lhs 329 } 330 331 /// Null-Denotation 332 /// 333 /// "the action that should happen when the symbol is encountered 334 /// as start of an expression (most notably, prefix operators) nud<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> T335 fn nud<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> T { 336 let pair = pairs.next().expect("Pratt parsing expects non-empty Pairs"); 337 match self.pratt.ops.get(&pair.as_rule()) { 338 Some((Affix::Prefix, prec)) => { 339 let rhs = self.expr(pairs, *prec - 1); 340 match self.prefix.as_mut() { 341 Some(prefix) => prefix(pair, rhs), 342 None => panic!("Could not map {}, no `.map_prefix(...)` specified", pair), 343 } 344 } 345 None => (self.primary)(pair), 346 _ => panic!("Expected prefix or primary expression, found {}", pair), 347 } 348 } 349 350 /// Left-Denotation 351 /// 352 /// "the action that should happen when the symbol is encountered 353 /// after the start of an expression (most notably, infix and postfix operators)" led<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, lhs: T) -> T354 fn led<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, lhs: T) -> T { 355 let pair = pairs.next().unwrap(); 356 match self.pratt.ops.get(&pair.as_rule()) { 357 Some((Affix::Infix(assoc), prec)) => { 358 let rhs = match *assoc { 359 Assoc::Left => self.expr(pairs, *prec), 360 Assoc::Right => self.expr(pairs, *prec - 1), 361 }; 362 match self.infix.as_mut() { 363 Some(infix) => infix(lhs, pair, rhs), 364 None => panic!("Could not map {}, no `.map_infix(...)` specified", pair), 365 } 366 } 367 Some((Affix::Postfix, _)) => match self.postfix.as_mut() { 368 Some(postfix) => postfix(lhs, pair), 369 None => panic!("Could not map {}, no `.map_postfix(...)` specified", pair), 370 }, 371 _ => panic!("Expected postfix or infix expression, found {}", pair), 372 } 373 } 374 375 /// Left-Binding-Power 376 /// 377 /// "describes the symbol's precedence in infix form (most notably, operator precedence)" lbp<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> Prec378 fn lbp<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> Prec { 379 match pairs.peek() { 380 Some(pair) => match self.pratt.ops.get(&pair.as_rule()) { 381 Some((_, prec)) => *prec, 382 None => panic!("Expected operator, found {}", pair), 383 }, 384 None => 0, 385 } 386 } 387 } 388