1 //! # Elements of Programming Languages 2 //! 3 //! These are short recipes for accomplishing common tasks. 4 //! 5 //! * [Whitespace](#whitespace) 6 //! + [Wrapper combinators that eat whitespace before and after a parser](#wrapper-combinators-that-eat-whitespace-before-and-after-a-parser) 7 //! * [Comments](#comments) 8 //! + [`// C++/EOL-style comments`](#-ceol-style-comments) 9 //! + [`/* C-style comments */`](#-c-style-comments-) 10 //! * [Identifiers](#identifiers) 11 //! + [`Rust-Style Identifiers`](#rust-style-identifiers) 12 //! * [Literal Values](#literal-values) 13 //! + [Escaped Strings](#escaped-strings) 14 //! + [Integers](#integers) 15 //! - [Hexadecimal](#hexadecimal) 16 //! - [Octal](#octal) 17 //! - [Binary](#binary) 18 //! - [Decimal](#decimal) 19 //! + [Floating Point Numbers](#floating-point-numbers) 20 //! 21 //! ## Whitespace 22 //! 23 //! 24 //! 25 //! ### Wrapper combinators that eat whitespace before and after a parser 26 //! 27 //! ```rust 28 //! use winnow::prelude::*; 29 //! use winnow::{ 30 //! error::ParserError, 31 //! combinator::delimited, 32 //! ascii::multispace0, 33 //! }; 34 //! 35 //! /// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and 36 //! /// trailing whitespace, returning the output of `inner`. 37 //! fn ws<'a, F, O, E: ParserError<&'a str>>(inner: F) -> impl Parser<&'a str, O, E> 38 //! where 39 //! F: Parser<&'a str, O, E>, 40 //! { 41 //! delimited( 42 //! multispace0, 43 //! inner, 44 //! multispace0 45 //! ) 46 //! } 47 //! ``` 48 //! 49 //! To eat only trailing whitespace, replace `delimited(...)` with `terminated(&inner, multispace0)`. 50 //! Likewise, the eat only leading whitespace, replace `delimited(...)` with `preceded(multispace0, 51 //! &inner)`. You can use your own parser instead of `multispace0` if you want to skip a different set 52 //! of lexemes. 53 //! 54 //! ## Comments 55 //! 56 //! ### `// C++/EOL-style comments` 57 //! 58 //! This version uses `%` to start a comment, does not consume the newline character, and returns an 59 //! output of `()`. 60 //! 61 //! ```rust 62 //! use winnow::prelude::*; 63 //! use winnow::{ 64 //! error::ParserError, 65 //! token::take_till1, 66 //! }; 67 //! 68 //! pub fn peol_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<(), E> 69 //! { 70 //! ('%', take_till1(['\n', '\r'])) 71 //! .void() // Output is thrown away. 72 //! .parse_next(i) 73 //! } 74 //! ``` 75 //! 76 //! ### `/* C-style comments */` 77 //! 78 //! Inline comments surrounded with sentinel tags `(*` and `*)`. This version returns an output of `()` 79 //! and does not handle nested comments. 80 //! 81 //! ```rust 82 //! use winnow::prelude::*; 83 //! use winnow::{ 84 //! error::ParserError, 85 //! token::{tag, take_until}, 86 //! }; 87 //! 88 //! pub fn pinline_comment<'a, E: ParserError<&'a str>>(i: &mut &'a str) -> PResult<(), E> { 89 //! ( 90 //! "(*", 91 //! take_until(0.., "*)"), 92 //! "*)" 93 //! ) 94 //! .void() // Output is thrown away. 95 //! .parse_next(i) 96 //! } 97 //! ``` 98 //! 99 //! ## Identifiers 100 //! 101 //! ### `Rust-Style Identifiers` 102 //! 103 //! Parsing identifiers that may start with a letter (or underscore) and may contain underscores, 104 //! letters and numbers may be parsed like this: 105 //! 106 //! ```rust 107 //! use winnow::prelude::*; 108 //! use winnow::{ 109 //! stream::AsChar, 110 //! token::take_while, 111 //! token::one_of, 112 //! }; 113 //! 114 //! pub fn identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { 115 //! ( 116 //! one_of(|c: char| c.is_alpha() || c == '_'), 117 //! take_while(0.., |c: char| c.is_alphanum() || c == '_') 118 //! ) 119 //! .recognize() 120 //! .parse_next(input) 121 //! } 122 //! ``` 123 //! 124 //! Let's say we apply this to the identifier `hello_world123abc`. The first element of the tuple 125 //! would uses [`one_of`][crate::token::one_of] which would recognize `h`. The tuple ensures that 126 //! `ello_world123abc` will be piped to the next [`take_while`][crate::token::take_while] parser, 127 //! which recognizes every remaining character. However, the tuple returns a tuple of the results 128 //! of its sub-parsers. The [`recognize`][crate::Parser::recognize] parser produces a `&str` of the 129 //! input text that was parsed, which in this case is the entire `&str` `hello_world123abc`. 130 //! 131 //! ## Literal Values 132 //! 133 //! ### Escaped Strings 134 //! 135 //! ```rust 136 #![doc = include_str!("../../examples/string/parser.rs")] 137 //! ``` 138 //! 139 //! See also [`escaped`] and [`escaped_transform`]. 140 //! 141 //! ### Integers 142 //! 143 //! The following recipes all return string slices rather than integer values. How to obtain an 144 //! integer value instead is demonstrated for hexadecimal integers. The others are similar. 145 //! 146 //! The parsers allow the grouping character `_`, which allows one to group the digits by byte, for 147 //! example: `0xA4_3F_11_28`. If you prefer to exclude the `_` character, the lambda to convert from a 148 //! string slice to an integer value is slightly simpler. You can also strip the `_` from the string 149 //! slice that is returned, which is demonstrated in the second hexadecimal number parser. 150 //! 151 //! #### Hexadecimal 152 //! 153 //! The parser outputs the string slice of the digits without the leading `0x`/`0X`. 154 //! 155 //! ```rust 156 //! use winnow::prelude::*; 157 //! use winnow::{ 158 //! combinator::alt, 159 //! combinator::{repeat}, 160 //! combinator::{preceded, terminated}, 161 //! token::one_of, 162 //! token::tag, 163 //! }; 164 //! 165 //! fn hexadecimal<'s>(input: &mut &'s str) -> PResult<&'s str> { // <'a, E: ParserError<&'a str>> 166 //! preceded( 167 //! alt(("0x", "0X")), 168 //! repeat(1.., 169 //! terminated(one_of(('0'..='9', 'a'..='f', 'A'..='F')), repeat(0.., '_').map(|()| ())) 170 //! ).map(|()| ()).recognize() 171 //! ).parse_next(input) 172 //! } 173 //! ``` 174 //! 175 //! If you want it to return the integer value instead, use map: 176 //! 177 //! ```rust 178 //! use winnow::prelude::*; 179 //! use winnow::{ 180 //! combinator::alt, 181 //! combinator::{repeat}, 182 //! combinator::{preceded, terminated}, 183 //! token::one_of, 184 //! token::tag, 185 //! }; 186 //! 187 //! fn hexadecimal_value(input: &mut &str) -> PResult<i64> { 188 //! preceded( 189 //! alt(("0x", "0X")), 190 //! repeat(1.., 191 //! terminated(one_of(('0'..='9', 'a'..='f', 'A'..='F')), repeat(0.., '_').map(|()| ())) 192 //! ).map(|()| ()).recognize() 193 //! ).try_map( 194 //! |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16) 195 //! ).parse_next(input) 196 //! } 197 //! ``` 198 //! 199 //! See also [`hex_uint`] 200 //! 201 //! #### Octal 202 //! 203 //! ```rust 204 //! use winnow::prelude::*; 205 //! use winnow::{ 206 //! combinator::alt, 207 //! combinator::{repeat}, 208 //! combinator::{preceded, terminated}, 209 //! token::one_of, 210 //! token::tag, 211 //! }; 212 //! 213 //! fn octal<'s>(input: &mut &'s str) -> PResult<&'s str> { 214 //! preceded( 215 //! alt(("0o", "0O")), 216 //! repeat(1.., 217 //! terminated(one_of('0'..='7'), repeat(0.., '_').map(|()| ())) 218 //! ).map(|()| ()).recognize() 219 //! ).parse_next(input) 220 //! } 221 //! ``` 222 //! 223 //! #### Binary 224 //! 225 //! ```rust 226 //! use winnow::prelude::*; 227 //! use winnow::{ 228 //! combinator::alt, 229 //! combinator::{repeat}, 230 //! combinator::{preceded, terminated}, 231 //! token::one_of, 232 //! token::tag, 233 //! }; 234 //! 235 //! fn binary<'s>(input: &mut &'s str) -> PResult<&'s str> { 236 //! preceded( 237 //! alt(("0b", "0B")), 238 //! repeat(1.., 239 //! terminated(one_of('0'..='1'), repeat(0.., '_').map(|()| ())) 240 //! ).map(|()| ()).recognize() 241 //! ).parse_next(input) 242 //! } 243 //! ``` 244 //! 245 //! #### Decimal 246 //! 247 //! ```rust 248 //! use winnow::prelude::*; 249 //! use winnow::{ 250 //! combinator::{repeat}, 251 //! combinator::terminated, 252 //! token::one_of, 253 //! }; 254 //! 255 //! fn decimal<'s>(input: &mut &'s str) -> PResult<&'s str> { 256 //! repeat(1.., 257 //! terminated(one_of('0'..='9'), repeat(0.., '_').map(|()| ())) 258 //! ).map(|()| ()) 259 //! .recognize() 260 //! .parse_next(input) 261 //! } 262 //! ``` 263 //! 264 //! See also [`dec_uint`] and [`dec_int`] 265 //! 266 //! ### Floating Point Numbers 267 //! 268 //! The following is adapted from [the Python parser by Valentin Lorentz](https://github.com/ProgVal/rust-python-parser/blob/master/src/numbers.rs). 269 //! 270 //! ```rust 271 //! use winnow::prelude::*; 272 //! use winnow::{ 273 //! combinator::alt, 274 //! combinator::{repeat}, 275 //! combinator::opt, 276 //! combinator::{preceded, terminated}, 277 //! token::one_of, 278 //! }; 279 //! 280 //! fn float<'s>(input: &mut &'s str) -> PResult<&'s str> { 281 //! alt(( 282 //! // Case one: .42 283 //! ( 284 //! '.', 285 //! decimal, 286 //! opt(( 287 //! one_of(['e', 'E']), 288 //! opt(one_of(['+', '-'])), 289 //! decimal 290 //! )) 291 //! ).recognize() 292 //! , // Case two: 42e42 and 42.42e42 293 //! ( 294 //! decimal, 295 //! opt(preceded( 296 //! '.', 297 //! decimal, 298 //! )), 299 //! one_of(['e', 'E']), 300 //! opt(one_of(['+', '-'])), 301 //! decimal 302 //! ).recognize() 303 //! , // Case three: 42. and 42.42 304 //! ( 305 //! decimal, 306 //! '.', 307 //! opt(decimal) 308 //! ).recognize() 309 //! )).parse_next(input) 310 //! } 311 //! 312 //! fn decimal<'s>(input: &mut &'s str) -> PResult<&'s str> { 313 //! repeat(1.., 314 //! terminated(one_of('0'..='9'), repeat(0.., '_').map(|()| ())) 315 //! ). 316 //! map(|()| ()) 317 //! .recognize() 318 //! .parse_next(input) 319 //! } 320 //! ``` 321 //! 322 //! See also [`float`] 323 324 #![allow(unused_imports)] 325 use crate::ascii::dec_int; 326 use crate::ascii::dec_uint; 327 use crate::ascii::escaped; 328 use crate::ascii::escaped_transform; 329 use crate::ascii::float; 330 use crate::ascii::hex_uint; 331