1 //! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression)
2 //! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter.
3 //! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees.
4
5 use winnow::{
6 ascii::{alpha1, digit1, multispace0, multispace1},
7 combinator::alt,
8 combinator::repeat,
9 combinator::{cut_err, opt},
10 combinator::{delimited, preceded, terminated},
11 error::ContextError,
12 error::StrContext,
13 prelude::*,
14 token::one_of,
15 };
16
17 /// We start with a top-level function to tie everything together, letting
18 /// us call eval on a string directly
eval_from_str(src: &str) -> Result<Expr, String>19 pub fn eval_from_str(src: &str) -> Result<Expr, String> {
20 parse_expr
21 .parse(src)
22 .map_err(|e| e.to_string())
23 .and_then(|exp| eval_expression(exp).ok_or_else(|| "Eval failed".to_string()))
24 }
25
26 /// For parsing, we start by defining the types that define the shape of data that we want.
27 /// In this case, we want something tree-like
28
29 /// The remaining half is Lists. We implement these as recursive Expressions.
30 /// For a list of numbers, we have `'(1 2 3)`, which we'll parse to:
31 /// ```
32 /// Expr::Quote(vec![Expr::Constant(Atom::Num(1)),
33 /// Expr::Constant(Atom::Num(2)),
34 /// Expr::Constant(Atom::Num(3))])
35 /// Quote takes an S-expression and prevents evaluation of it, making it a data
36 /// structure that we can deal with programmatically. Thus any valid expression
37 /// is also a valid data structure in Lisp itself.
38 #[derive(Debug, Eq, PartialEq, Clone)]
39 pub enum Expr {
40 Constant(Atom),
41 /// (func-name arg1 arg2)
42 Application(Box<Expr>, Vec<Expr>),
43 /// (if predicate do-this)
44 If(Box<Expr>, Box<Expr>),
45 /// (if predicate do-this otherwise-do-this)
46 IfElse(Box<Expr>, Box<Expr>, Box<Expr>),
47 /// '(3 (if (+ 3 3) 4 5) 7)
48 Quote(Vec<Expr>),
49 }
50
51 /// We now wrap this type and a few other primitives into our Atom type.
52 /// Remember from before that Atoms form one half of our language.
53 #[derive(Debug, Eq, PartialEq, Clone)]
54 pub enum Atom {
55 Num(i32),
56 Keyword(String),
57 Boolean(bool),
58 BuiltIn(BuiltIn),
59 }
60
61 /// Now, the most basic type. We define some built-in functions that our lisp has
62 #[derive(Debug, Eq, PartialEq, Clone, Copy)]
63 pub enum BuiltIn {
64 Plus,
65 Minus,
66 Times,
67 Divide,
68 Equal,
69 Not,
70 }
71
72 /// With types defined, we move onto the top-level expression parser!
parse_expr(i: &mut &'_ str) -> PResult<Expr>73 fn parse_expr(i: &mut &'_ str) -> PResult<Expr> {
74 preceded(
75 multispace0,
76 alt((parse_constant, parse_application, parse_if, parse_quote)),
77 )
78 .parse_next(i)
79 }
80
81 /// We then add the Expr layer on top
parse_constant(i: &mut &'_ str) -> PResult<Expr>82 fn parse_constant(i: &mut &'_ str) -> PResult<Expr> {
83 parse_atom.map(Expr::Constant).parse_next(i)
84 }
85
86 /// Now we take all these simple parsers and connect them.
87 /// We can now parse half of our language!
parse_atom(i: &mut &'_ str) -> PResult<Atom>88 fn parse_atom(i: &mut &'_ str) -> PResult<Atom> {
89 alt((
90 parse_num,
91 parse_bool,
92 parse_builtin.map(Atom::BuiltIn),
93 parse_keyword,
94 ))
95 .parse_next(i)
96 }
97
98 /// Next up is number parsing. We're keeping it simple here by accepting any number (> 1)
99 /// of digits but ending the program if it doesn't fit into an i32.
parse_num(i: &mut &'_ str) -> PResult<Atom>100 fn parse_num(i: &mut &'_ str) -> PResult<Atom> {
101 alt((
102 digit1.try_map(|digit_str: &str| digit_str.parse::<i32>().map(Atom::Num)),
103 preceded("-", digit1).map(|digit_str: &str| Atom::Num(-digit_str.parse::<i32>().unwrap())),
104 ))
105 .parse_next(i)
106 }
107
108 /// Our boolean values are also constant, so we can do it the same way
parse_bool(i: &mut &'_ str) -> PResult<Atom>109 fn parse_bool(i: &mut &'_ str) -> PResult<Atom> {
110 alt((
111 "#t".map(|_| Atom::Boolean(true)),
112 "#f".map(|_| Atom::Boolean(false)),
113 ))
114 .parse_next(i)
115 }
116
parse_builtin(i: &mut &'_ str) -> PResult<BuiltIn>117 fn parse_builtin(i: &mut &'_ str) -> PResult<BuiltIn> {
118 // alt gives us the result of first parser that succeeds, of the series of
119 // parsers we give it
120 alt((
121 parse_builtin_op,
122 // map lets us process the parsed output, in this case we know what we parsed,
123 // so we ignore the input and return the BuiltIn directly
124 "not".map(|_| BuiltIn::Not),
125 ))
126 .parse_next(i)
127 }
128
129 /// Continuing the trend of starting from the simplest piece and building up,
130 /// we start by creating a parser for the built-in operator functions.
parse_builtin_op(i: &mut &'_ str) -> PResult<BuiltIn>131 fn parse_builtin_op(i: &mut &'_ str) -> PResult<BuiltIn> {
132 // one_of matches one of the characters we give it
133 let t = one_of(['+', '-', '*', '/', '=']).parse_next(i)?;
134
135 // because we are matching single character tokens, we can do the matching logic
136 // on the returned value
137 Ok(match t {
138 '+' => BuiltIn::Plus,
139 '-' => BuiltIn::Minus,
140 '*' => BuiltIn::Times,
141 '/' => BuiltIn::Divide,
142 '=' => BuiltIn::Equal,
143 _ => unreachable!(),
144 })
145 }
146
147 /// The next easiest thing to parse are keywords.
148 /// We introduce some error handling combinators: `context` for human readable errors
149 /// and `cut_err` to prevent back-tracking.
150 ///
151 /// Put plainly: `preceded(":", cut_err(alpha1))` means that once we see the `:`
152 /// character, we have to see one or more alphabetic characters or the input is invalid.
parse_keyword(i: &mut &'_ str) -> PResult<Atom>153 fn parse_keyword(i: &mut &'_ str) -> PResult<Atom> {
154 preceded(":", cut_err(alpha1))
155 .context(StrContext::Label("keyword"))
156 .map(|sym_str: &str| Atom::Keyword(sym_str.to_string()))
157 .parse_next(i)
158 }
159
160 /// We can now use our new combinator to define the rest of the `Expr`s.
161 ///
162 /// Starting with function application, we can see how the parser mirrors our data
163 /// definitions: our definition is `Application(Box<Expr>, Vec<Expr>)`, so we know
164 /// that we need to parse an expression and then parse 0 or more expressions, all
165 /// wrapped in an S-expression.
166 ///
167 /// tuples are themselves a parser, used to sequence parsers together, so we can translate this
168 /// directly and then map over it to transform the output into an `Expr::Application`
parse_application(i: &mut &'_ str) -> PResult<Expr>169 fn parse_application(i: &mut &'_ str) -> PResult<Expr> {
170 let application_inner = (parse_expr, repeat(0.., parse_expr))
171 .map(|(head, tail)| Expr::Application(Box::new(head), tail));
172 // finally, we wrap it in an s-expression
173 s_exp(application_inner).parse_next(i)
174 }
175
176 /// Because `Expr::If` and `Expr::IfElse` are so similar (we easily could have
177 /// defined `Expr::If` to have an `Option` for the else block), we parse both
178 /// in a single function.
179 ///
180 /// In fact, we define our parser as if `Expr::If` was defined with an Option in it,
181 /// we have the `opt` combinator which fits very nicely here.
parse_if(i: &mut &'_ str) -> PResult<Expr>182 fn parse_if(i: &mut &'_ str) -> PResult<Expr> {
183 let if_inner = preceded(
184 // here to avoid ambiguity with other names starting with `if`, if we added
185 // variables to our language, we say that if must be terminated by at least
186 // one whitespace character
187 terminated("if", multispace1),
188 cut_err((parse_expr, parse_expr, opt(parse_expr))),
189 )
190 .map(|(pred, true_branch, maybe_false_branch)| {
191 if let Some(false_branch) = maybe_false_branch {
192 Expr::IfElse(
193 Box::new(pred),
194 Box::new(true_branch),
195 Box::new(false_branch),
196 )
197 } else {
198 Expr::If(Box::new(pred), Box::new(true_branch))
199 }
200 })
201 .context(StrContext::Label("if expression"));
202 s_exp(if_inner).parse_next(i)
203 }
204
205 /// A quoted S-expression is list data structure.
206 ///
207 /// This example doesn't have the symbol atom, but by adding variables and changing
208 /// the definition of quote to not always be around an S-expression, we'd get them
209 /// naturally.
parse_quote(i: &mut &'_ str) -> PResult<Expr>210 fn parse_quote(i: &mut &'_ str) -> PResult<Expr> {
211 // this should look very straight-forward after all we've done:
212 // we find the `'` (quote) character, use cut_err to say that we're unambiguously
213 // looking for an s-expression of 0 or more expressions, and then parse them
214 preceded("'", cut_err(s_exp(repeat(0.., parse_expr))))
215 .context(StrContext::Label("quote"))
216 .map(Expr::Quote)
217 .parse_next(i)
218 }
219
220 /// Before continuing, we need a helper function to parse lists.
221 /// A list starts with `(` and ends with a matching `)`.
222 /// By putting whitespace and newline parsing here, we can avoid having to worry about it
223 /// in much of the rest of the parser.
224 //.parse_next/
225 /// Unlike the previous functions, this function doesn't take or consume input, instead it
226 /// takes a parsing function and returns a new parsing function.
s_exp<'a, O1, F>(inner: F) -> impl Parser<&'a str, O1, ContextError> where F: Parser<&'a str, O1, ContextError>,227 fn s_exp<'a, O1, F>(inner: F) -> impl Parser<&'a str, O1, ContextError>
228 where
229 F: Parser<&'a str, O1, ContextError>,
230 {
231 delimited(
232 '(',
233 preceded(multispace0, inner),
234 cut_err(preceded(multispace0, ')')).context(StrContext::Label("closing paren")),
235 )
236 }
237
238 /// And that's it!
239 /// We can now parse our entire lisp language.
240 ///
241 /// But in order to make it a little more interesting, we can hack together
242 /// a little interpreter to take an Expr, which is really an
243 /// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST),
244 /// and give us something back
245
246 /// This function tries to reduce the AST.
247 /// This has to return an Expression rather than an Atom because quoted `s_expressions`
248 /// can't be reduced
eval_expression(e: Expr) -> Option<Expr>249 fn eval_expression(e: Expr) -> Option<Expr> {
250 match e {
251 // Constants and quoted s-expressions are our base-case
252 Expr::Constant(_) | Expr::Quote(_) => Some(e),
253 // we then recursively `eval_expression` in the context of our special forms
254 // and built-in operators
255 Expr::If(pred, true_branch) => {
256 let reduce_pred = eval_expression(*pred)?;
257 if get_bool_from_expr(reduce_pred)? {
258 eval_expression(*true_branch)
259 } else {
260 None
261 }
262 }
263 Expr::IfElse(pred, true_branch, false_branch) => {
264 let reduce_pred = eval_expression(*pred)?;
265 if get_bool_from_expr(reduce_pred)? {
266 eval_expression(*true_branch)
267 } else {
268 eval_expression(*false_branch)
269 }
270 }
271 Expr::Application(head, tail) => {
272 let reduced_head = eval_expression(*head)?;
273 let reduced_tail = tail
274 .into_iter()
275 .map(eval_expression)
276 .collect::<Option<Vec<Expr>>>()?;
277 if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head {
278 Some(Expr::Constant(match bi {
279 BuiltIn::Plus => Atom::Num(
280 reduced_tail
281 .into_iter()
282 .map(get_num_from_expr)
283 .collect::<Option<Vec<i32>>>()?
284 .into_iter()
285 .sum(),
286 ),
287 BuiltIn::Times => Atom::Num(
288 reduced_tail
289 .into_iter()
290 .map(get_num_from_expr)
291 .collect::<Option<Vec<i32>>>()?
292 .into_iter()
293 .product(),
294 ),
295 BuiltIn::Equal => Atom::Boolean(
296 reduced_tail
297 .iter()
298 .zip(reduced_tail.iter().skip(1))
299 .all(|(a, b)| a == b),
300 ),
301 BuiltIn::Not => {
302 if reduced_tail.len() != 1 {
303 return None;
304 } else {
305 Atom::Boolean(!get_bool_from_expr(
306 reduced_tail.first().cloned().unwrap(),
307 )?)
308 }
309 }
310 BuiltIn::Minus => {
311 Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
312 let fe = get_num_from_expr(first_elem)?;
313 reduced_tail
314 .into_iter()
315 .map(get_num_from_expr)
316 .collect::<Option<Vec<i32>>>()?
317 .into_iter()
318 .skip(1)
319 .fold(fe, |a, b| a - b)
320 } else {
321 Default::default()
322 })
323 }
324 BuiltIn::Divide => {
325 Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
326 let fe = get_num_from_expr(first_elem)?;
327 reduced_tail
328 .into_iter()
329 .map(get_num_from_expr)
330 .collect::<Option<Vec<i32>>>()?
331 .into_iter()
332 .skip(1)
333 .fold(fe, |a, b| a / b)
334 } else {
335 Default::default()
336 })
337 }
338 }))
339 } else {
340 None
341 }
342 }
343 }
344 }
345
346 /// To start we define a couple of helper functions
get_num_from_expr(e: Expr) -> Option<i32>347 fn get_num_from_expr(e: Expr) -> Option<i32> {
348 if let Expr::Constant(Atom::Num(n)) = e {
349 Some(n)
350 } else {
351 None
352 }
353 }
354
get_bool_from_expr(e: Expr) -> Option<bool>355 fn get_bool_from_expr(e: Expr) -> Option<bool> {
356 if let Expr::Constant(Atom::Boolean(b)) = e {
357 Some(b)
358 } else {
359 None
360 }
361 }
362