1 //! This crate contains parser combinators, roughly based on the Haskell libraries 2 //! [parsec](http://hackage.haskell.org/package/parsec) and 3 //! [attoparsec](https://hackage.haskell.org/package/attoparsec). 4 //! 5 //! A parser in this library can be described as a function which takes some input and if it 6 //! is successful, returns a value together with the remaining input. 7 //! A parser combinator is a function which takes one or more parsers and returns a new parser. 8 //! For instance the [`many`] parser can be used to convert a parser for single digits into one that 9 //! parses multiple digits. By modeling parsers in this way it becomes easy to compose complex 10 //! parsers in an almost declarative way. 11 //! 12 //! # Overview 13 //! 14 //! `combine` limits itself to creating [LL(1) parsers](https://en.wikipedia.org/wiki/LL_parser) 15 //! (it is possible to opt-in to LL(k) parsing using the [`attempt`] combinator) which makes the 16 //! parsers easy to reason about in both function and performance while sacrificing 17 //! some generality. In addition to you being able to reason better about the parsers you 18 //! construct `combine` the library also takes the knowledge of being an LL parser and uses it to 19 //! automatically construct good error messages. 20 //! 21 //! ```rust 22 //! extern crate combine; 23 //! use combine::{Parser, EasyParser}; 24 //! use combine::stream::position; 25 //! use combine::parser::char::{digit, letter}; 26 //! const MSG: &'static str = r#"Parse error at line: 1, column: 1 27 //! Unexpected `|` 28 //! Expected digit or letter 29 //! "#; 30 //! 31 //! fn main() { 32 //! // Wrapping a `&str` with `State` provides automatic line and column tracking. If `State` 33 //! // was not used the positions would instead only be pointers into the `&str` 34 //! if let Err(err) = digit().or(letter()).easy_parse(position::Stream::new("|")) { 35 //! assert_eq!(MSG, format!("{}", err)); 36 //! } 37 //! } 38 //! ``` 39 //! 40 //! This library is currently split into a few core modules: 41 //! 42 //! * [`parser`][mod parser] is where you will find all the parsers that combine provides. It contains the core 43 //! [`Parser`] trait as well as several submodules such as `sequence` or `choice` which each 44 //! contain several parsers aimed at a specific niche. 45 //! 46 //! * [`stream`] contains the second most important trait next to [`Parser`]. Streams represent the 47 //! data source which is being parsed such as `&[u8]`, `&str` or iterators. 48 //! 49 //! * [`easy`] contains combine's default "easy" error and stream handling. If you use the 50 //! `easy_parse` method to start your parsing these are the types that are used. 51 //! 52 //! * [`error`] contains the types and traits that make up combine's error handling. Unless you 53 //! need to customize the errors your parsers return you should not need to use this module much. 54 //! 55 //! 56 //! # Examples 57 //! 58 //! ``` 59 //! extern crate combine; 60 //! use combine::parser::char::{spaces, digit, char}; 61 //! use combine::{many1, sep_by, Parser, EasyParser}; 62 //! use combine::stream::easy; 63 //! 64 //! fn main() { 65 //! //Parse spaces first and use the with method to only keep the result of the next parser 66 //! let integer = spaces() 67 //! //parse a string of digits into an i32 68 //! .with(many1(digit()).map(|string: String| string.parse::<i32>().unwrap())); 69 //! 70 //! //Parse integers separated by commas, skipping whitespace 71 //! let mut integer_list = sep_by(integer, spaces().skip(char(','))); 72 //! 73 //! //Call parse with the input to execute the parser 74 //! let input = "1234, 45,78"; 75 //! let result: Result<(Vec<i32>, &str), easy::ParseError<&str>> = 76 //! integer_list.easy_parse(input); 77 //! match result { 78 //! Ok((value, _remaining_input)) => println!("{:?}", value), 79 //! Err(err) => println!("{}", err) 80 //! } 81 //! } 82 //! ``` 83 //! 84 //! If we need a parser that is mutually recursive or if we want to export a reusable parser the 85 //! [`parser!`] macro can be used. In effect it makes it possible to return a parser without naming 86 //! the type of the parser (which can be very large due to combine's trait based approach). While 87 //! it is possible to do avoid naming the type without the macro those solutions require either 88 //! allocation (`Box<dyn Parser< Input, Output = O, PartialState = P>>`) or via `impl Trait` in the 89 //! return position. The macro thus threads the needle and makes it possible to have 90 //! non-allocating, anonymous parsers on stable rust. 91 //! 92 //! ``` 93 //! #[macro_use] 94 //! extern crate combine; 95 //! use combine::parser::char::{char, letter, spaces}; 96 //! use combine::{between, choice, many1, parser, sep_by, Parser, EasyParser}; 97 //! use combine::error::{ParseError, StdParseResult}; 98 //! use combine::stream::{Stream, Positioned}; 99 //! use combine::stream::position; 100 //! 101 //! #[derive(Debug, PartialEq)] 102 //! pub enum Expr { 103 //! Id(String), 104 //! Array(Vec<Expr>), 105 //! Pair(Box<Expr>, Box<Expr>) 106 //! } 107 //! 108 //! // `impl Parser` can be used to create reusable parsers with zero overhead 109 //! fn expr_<Input>() -> impl Parser< Input, Output = Expr> 110 //! where Input: Stream<Token = char>, 111 //! { 112 //! let word = many1(letter()); 113 //! 114 //! // A parser which skips past whitespace. 115 //! // Since we aren't interested in knowing that our expression parser 116 //! // could have accepted additional whitespace between the tokens we also silence the error. 117 //! let skip_spaces = || spaces().silent(); 118 //! 119 //! //Creates a parser which parses a char and skips any trailing whitespace 120 //! let lex_char = |c| char(c).skip(skip_spaces()); 121 //! 122 //! let comma_list = sep_by(expr(), lex_char(',')); 123 //! let array = between(lex_char('['), lex_char(']'), comma_list); 124 //! 125 //! //We can use tuples to run several parsers in sequence 126 //! //The resulting type is a tuple containing each parsers output 127 //! let pair = (lex_char('('), 128 //! expr(), 129 //! lex_char(','), 130 //! expr(), 131 //! lex_char(')')) 132 //! .map(|t| Expr::Pair(Box::new(t.1), Box::new(t.3))); 133 //! 134 //! choice(( 135 //! word.map(Expr::Id), 136 //! array.map(Expr::Array), 137 //! pair, 138 //! )) 139 //! .skip(skip_spaces()) 140 //! } 141 //! 142 //! // As this expression parser needs to be able to call itself recursively `impl Parser` can't 143 //! // be used on its own as that would cause an infinitely large type. We can avoid this by using 144 //! // the `parser!` macro which erases the inner type and the size of that type entirely which 145 //! // lets it be used recursively. 146 //! // 147 //! // (This macro does not use `impl Trait` which means it can be used in rust < 1.26 as well to 148 //! // emulate `impl Parser`) 149 //! parser!{ 150 //! fn expr[Input]()(Input) -> Expr 151 //! where [Input: Stream<Token = char>] 152 //! { 153 //! expr_() 154 //! } 155 //! } 156 //! 157 //! fn main() { 158 //! let result = expr() 159 //! .parse("[[], (hello, world), [rust]]"); 160 //! let expr = Expr::Array(vec![ 161 //! Expr::Array(Vec::new()) 162 //! , Expr::Pair(Box::new(Expr::Id("hello".to_string())), 163 //! Box::new(Expr::Id("world".to_string()))) 164 //! , Expr::Array(vec![Expr::Id("rust".to_string())]) 165 //! ]); 166 //! assert_eq!(result, Ok((expr, ""))); 167 //! } 168 //! ``` 169 //! 170 //! [`combinator`]: combinator/index.html 171 //! [mod parser]: parser/index.html 172 //! [`easy`]: easy/index.html 173 //! [`error`]: error/index.html 174 //! [`char`]: parser/char/index.html 175 //! [`byte`]: parser/byte/index.html 176 //! [`range`]: parser/range/index.html 177 //! [`many`]: parser/repeat/fn.many.html 178 //! [`attempt`]: parser/combinator/fn.attempt.html 179 //! [`satisfy`]: parser/token/fn.satisfy.html 180 //! [`or`]: parser/trait.Parser.html#method.or 181 //! [`Stream`]: stream/trait.Stream.html 182 //! [`RangeStream`]: stream/trait.RangeStream.html 183 //! [`Parser`]: parser/trait.Parser.html 184 //! [fn parser]: parser/function/fn.parser.html 185 //! [`parser!`]: macro.parser.html 186 // inline is only used on trivial functions returning parsers 187 #![allow( 188 clippy::inline_always, 189 clippy::type_complexity, 190 clippy::too_many_arguments, 191 clippy::match_like_matches_macro 192 )] 193 #![cfg_attr(not(feature = "std"), no_std)] 194 #![cfg_attr(docsrs, feature(doc_cfg))] 195 196 #[cfg(feature = "alloc")] 197 extern crate alloc; 198 199 #[doc(inline)] 200 pub use crate::error::{ParseError, ParseResult, StdParseResult}; 201 202 #[cfg(feature = "std")] 203 #[doc(inline)] 204 pub use crate::parser::EasyParser; 205 206 #[doc(inline)] 207 pub use crate::parser::Parser; 208 209 #[doc(inline)] 210 pub use crate::stream::{Positioned, RangeStream, RangeStreamOnce, Stream, StreamOnce}; 211 212 #[doc(inline)] 213 pub use crate::parser::{ 214 choice::optional, 215 combinator::{attempt, look_ahead, not_followed_by}, 216 error::{unexpected, unexpected_any}, 217 function::parser, 218 repeat::{ 219 chainl1, chainr1, count, count_min_max, many, many1, sep_by, sep_by1, sep_end_by, 220 sep_end_by1, skip_count, skip_count_min_max, skip_many, skip_many1, 221 }, 222 sequence::between, 223 token::{ 224 any, eof, none_of, one_of, position, produce, satisfy, satisfy_map, token, tokens, value, 225 }, 226 }; 227 228 #[doc(inline)] 229 pub use crate::parser::choice::choice; 230 231 #[doc(inline)] 232 pub use crate::parser::combinator::from_str; 233 234 #[doc(inline)] 235 pub use crate::parser::token::tokens_cmp; 236 237 /// Declares a named parser which can easily be reused. 238 /// 239 /// The expression which creates the parser should have no side effects as it may be called 240 /// multiple times even during a single parse attempt. 241 /// 242 /// NOTE: You can use `impl Trait` in the return position instead. See the [json parser][] for an 243 /// example. 244 /// 245 /// [json parser]:https://github.com/Marwes/combine/blob/master/benches/json.rs 246 /// 247 /// ``` 248 /// #[macro_use] 249 /// extern crate combine; 250 /// use combine::parser::char::digit; 251 /// use combine::{any, choice, from_str, many1, Parser, EasyParser, Stream}; 252 /// use combine::error::ParseError; 253 /// 254 /// parser!{ 255 /// /// `[Input]` represents a normal type parameters and lifetime declaration for the function 256 /// /// It gets expanded to `<Input>` 257 /// fn integer[Input]()(Input) -> i32 258 /// where [ 259 /// Input: Stream<Token = char>, 260 /// <Input::Error as ParseError<Input::Token, Input::Range, Input::Position>>::StreamError: 261 /// From<::std::num::ParseIntError>, 262 /// ] 263 /// { 264 /// // The body must be a block body ( `{ <block body> }`) which ends with an expression 265 /// // which evaluates to a parser 266 /// from_str(many1::<String, _, _>(digit())) 267 /// } 268 /// } 269 /// 270 /// #[derive(Debug, PartialEq)] 271 /// pub enum IntOrString { 272 /// Int(i32), 273 /// String(String), 274 /// } 275 /// // prefix with `pub` to declare a public parser 276 /// parser!{ 277 /// // Documentation comments works as well 278 /// 279 /// /// Parses an integer or a string (any characters) 280 /// pub fn integer_or_string[Input]()(Input) -> IntOrString 281 /// where [ 282 /// Input: Stream<Token = char>, 283 /// <Input::Error as ParseError<Input::Token, Input::Range, Input::Position>>::StreamError: 284 /// From<::std::num::ParseIntError>, 285 /// ] 286 /// { 287 /// choice!( 288 /// integer().map(IntOrString::Int), 289 /// many1(any()).map(IntOrString::String) 290 /// ) 291 /// } 292 /// } 293 /// 294 /// parser!{ 295 /// // Give the created type a unique name 296 /// #[derive(Clone)] 297 /// pub struct Twice; 298 /// pub fn twice[Input, F, P](f: F)(Input) -> (P::Output, P::Output) 299 /// where [P: Parser<Input>, 300 /// F: FnMut() -> P] 301 /// { 302 /// (f(), f()) 303 /// } 304 /// } 305 /// 306 /// fn main() { 307 /// assert_eq!(integer().easy_parse("123"), Ok((123, ""))); 308 /// assert!(integer().easy_parse("!").is_err()); 309 /// 310 /// assert_eq!( 311 /// integer_or_string().easy_parse("123"), 312 /// Ok((IntOrString::Int(123), "")) 313 /// ); 314 /// assert_eq!( 315 /// integer_or_string().easy_parse("abc"), 316 /// Ok((IntOrString::String("abc".to_string()), "")) 317 /// ); 318 /// assert_eq!(twice(|| digit()).parse("123"), Ok((('1', '2'), "3"))); 319 /// } 320 /// ``` 321 #[macro_export] 322 macro_rules! parser { 323 ( 324 type PartialState = $partial_state: ty; 325 $(#[$attr:meta])* 326 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),*) 327 ($input_type: ty) -> $output_type: ty 328 where [$($where_clause: tt)*] 329 $parser: block 330 ) => { 331 $crate::combine_parser_impl!{ 332 #[allow(non_camel_case_types)] 333 #[doc(hidden)] 334 $fn_vis struct $name; 335 (type PartialState = ($partial_state);) 336 $(#[$attr])* 337 $fn_vis fn $name [$($type_params)*]($($arg : $arg_type),*)($input_type) -> $output_type 338 where [$($where_clause)*] 339 $parser 340 } 341 }; 342 ( 343 $(#[$derive:meta])* 344 $struct_vis: vis struct $type_name: ident; 345 type PartialState = $partial_state: ty; 346 $(#[$attr:meta])* 347 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),* ) 348 ($input_type: ty) -> $output_type: ty 349 where [$($where_clause: tt)*] 350 $parser: block 351 ) => { 352 $crate::combine_parser_impl!{ 353 $(#[$derive])* 354 $struct_vis struct $type_name; 355 (type PartialState = ($partial_state);) 356 $(#[$attr])* 357 $fn_vis fn $name [$($type_params)*]($($arg : $arg_type),*)($input_type) -> $output_type 358 where [$($where_clause)*] 359 $parser 360 } 361 }; 362 ( 363 $(#[$attr:meta])* 364 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),*) 365 ($input_type: ty) -> $output_type: ty 366 where [$($where_clause: tt)*] 367 $parser: block 368 ) => { 369 $crate::combine_parser_impl!{ 370 #[allow(non_camel_case_types)] 371 #[doc(hidden)] 372 $fn_vis struct $name; 373 (type PartialState = (());) 374 $(#[$attr])* 375 $fn_vis fn $name [$($type_params)*]($($arg : $arg_type),*)($input_type) -> $output_type 376 where [$($where_clause)*] 377 $parser 378 } 379 }; 380 ( 381 $(#[$derive:meta])* 382 $struct_vis: vis struct $type_name: ident; 383 $(#[$attr:meta])* 384 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),* ) 385 ($input_type: ty) -> $output_type: ty 386 where [$($where_clause: tt)*] 387 $parser: block 388 ) => { 389 $crate::combine_parser_impl!{ 390 $(#[$derive])* 391 $struct_vis struct $type_name; 392 (type PartialState = (());) 393 $(#[$attr])* 394 $fn_vis fn $name [$($type_params)*]($($arg : $arg_type),*)($input_type) -> $output_type 395 where [$($where_clause)*] 396 $parser 397 } 398 }; 399 } 400 401 #[doc(hidden)] 402 #[macro_export] 403 macro_rules! combine_parse_partial { 404 ((()) $mode:ident $input:ident $state:ident $parser:block) => {{ 405 let _ = $state; 406 let mut state = Default::default(); 407 let state = &mut state; 408 $parser.parse_mode($mode, $input, state) 409 }}; 410 (($ignored:ty) $mode:ident $input:ident $state:ident $parser:block) => { 411 $parser.parse_mode($mode, $input, $state) 412 }; 413 } 414 415 #[doc(hidden)] 416 #[macro_export] 417 macro_rules! combine_parser_impl { 418 ( 419 $(#[$derive:meta])* 420 $struct_vis: vis struct $type_name: ident; 421 (type PartialState = ($($partial_state: tt)*);) 422 $(#[$attr:meta])* 423 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),*) 424 ($input_type: ty) -> $output_type: ty 425 where [$($where_clause: tt)*] 426 $parser: block 427 ) => { 428 429 $(#[$derive])* 430 $struct_vis struct $type_name<$($type_params)*> 431 where <$input_type as $crate::stream::StreamOnce>::Error: 432 $crate::error::ParseError< 433 <$input_type as $crate::stream::StreamOnce>::Token, 434 <$input_type as $crate::stream::StreamOnce>::Range, 435 <$input_type as $crate::stream::StreamOnce>::Position 436 >, 437 $input_type: $crate::stream::Stream, 438 $($where_clause)* 439 { 440 $(pub $arg : $arg_type,)* 441 __marker: $crate::lib::marker::PhantomData<fn ($input_type) -> $output_type> 442 } 443 444 // We want this to work on older compilers, at least for a while 445 #[allow(non_shorthand_field_patterns)] 446 impl<$($type_params)*> $crate::Parser<$input_type> for $type_name<$($type_params)*> 447 where <$input_type as $crate::stream::StreamOnce>::Error: 448 $crate::error::ParseError< 449 <$input_type as $crate::stream::StreamOnce>::Token, 450 <$input_type as $crate::stream::StreamOnce>::Range, 451 <$input_type as $crate::stream::StreamOnce>::Position 452 >, 453 $input_type: $crate::stream::Stream, 454 $($where_clause)* 455 { 456 457 type Output = $output_type; 458 type PartialState = $($partial_state)*; 459 460 $crate::parse_mode!($input_type); 461 #[inline] 462 fn parse_mode_impl<M>( 463 &mut self, 464 mode: M, 465 input: &mut $input_type, 466 state: &mut Self::PartialState, 467 ) -> $crate::error::ParseResult<$output_type, <$input_type as $crate::stream::StreamOnce>::Error> 468 where M: $crate::parser::ParseMode 469 { 470 let $type_name { $( $arg: ref mut $arg,)* .. } = *self; 471 $crate::combine_parse_partial!(($($partial_state)*) mode input state $parser) 472 } 473 474 #[inline] 475 fn add_error( 476 &mut self, 477 errors: &mut $crate::error::Tracked< 478 <$input_type as $crate::stream::StreamOnce>::Error 479 >) 480 { 481 let $type_name { $( $arg : ref mut $arg,)* .. } = *self; 482 let mut parser = $parser; 483 { 484 let _: &mut dyn $crate::Parser< $input_type, Output = $output_type, PartialState = _> = &mut parser; 485 } 486 parser.add_error(errors) 487 } 488 489 fn add_committed_expected_error( 490 &mut self, 491 errors: &mut $crate::error::Tracked< 492 <$input_type as $crate::stream::StreamOnce>::Error 493 >) 494 { 495 let $type_name { $( $arg : ref mut $arg,)* .. } = *self; 496 let mut parser = $parser; 497 { 498 let _: &mut dyn $crate::Parser< $input_type, Output = $output_type, PartialState = _> = &mut parser; 499 } 500 parser.add_committed_expected_error(errors) 501 } 502 } 503 504 $(#[$attr])* 505 #[inline] 506 $fn_vis fn $name< $($type_params)* >( 507 $($arg : $arg_type),* 508 ) -> $type_name<$($type_params)*> 509 where <$input_type as $crate::stream::StreamOnce>::Error: 510 $crate::error::ParseError< 511 <$input_type as $crate::stream::StreamOnce>::Token, 512 <$input_type as $crate::stream::StreamOnce>::Range, 513 <$input_type as $crate::stream::StreamOnce>::Position 514 >, 515 $input_type: $crate::stream::Stream, 516 $($where_clause)* 517 { 518 $type_name { 519 $($arg,)* 520 __marker: $crate::lib::marker::PhantomData 521 } 522 } 523 }; 524 } 525 526 /// Internal API. May break without a semver bump 527 macro_rules! forward_parser { 528 ($input: ty, $method: ident $( $methods: ident)*, $($field: tt)*) => { 529 forward_parser!($input, $method $($field)+); 530 forward_parser!($input, $($methods)*, $($field)+); 531 }; 532 ($input: ty, parse_mode $($field: tt)+) => { 533 #[inline] 534 fn parse_mode_impl<M>( 535 &mut self, 536 mode: M, 537 input: &mut $input, 538 state: &mut Self::PartialState, 539 ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> 540 where 541 M: ParseMode, 542 { 543 self.$($field)+.parse_mode(mode, input, state).map(|(a, _)| a) 544 } 545 }; 546 ($input: ty, parse_lazy $($field: tt)+) => { 547 fn parse_lazy( 548 &mut self, 549 input: &mut $input, 550 ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> { 551 self.$($field)+.parse_lazy(input) 552 } 553 }; 554 ($input: ty, parse_first $($field: tt)+) => { 555 fn parse_first( 556 &mut self, 557 input: &mut $input, 558 state: &mut Self::PartialState, 559 ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> { 560 self.$($field)+.parse_first(input, state) 561 } 562 }; 563 ($input: ty, parse_partial $($field: tt)+) => { 564 fn parse_partial( 565 &mut self, 566 input: &mut $input, 567 state: &mut Self::PartialState, 568 ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> { 569 self.$($field)+.parse_partial(input, state) 570 } 571 }; 572 ($input: ty, add_error $($field: tt)+) => { 573 574 fn add_error(&mut self, error: &mut $crate::error::Tracked<<$input as $crate::StreamOnce>::Error>) { 575 self.$($field)+.add_error(error) 576 } 577 }; 578 ($input: ty, add_committed_expected_error $($field: tt)+) => { 579 fn add_committed_expected_error(&mut self, error: &mut $crate::error::Tracked<<$input as $crate::StreamOnce>::Error>) { 580 self.$($field)+.add_committed_expected_error(error) 581 } 582 }; 583 ($input: ty, parser_count $($field: tt)+) => { 584 fn parser_count(&self) -> $crate::ErrorOffset { 585 self.$($field)+.parser_count() 586 } 587 }; 588 ($input: ty, $field: tt) => { 589 forward_parser!($input, parse_lazy parse_first parse_partial add_error add_committed_expected_error parser_count, $field); 590 }; 591 ($input: ty, $($field: tt)+) => { 592 }; 593 } 594 595 // Facade over the core types we need 596 // Public but hidden to be accessible in macros 597 #[doc(hidden)] 598 pub mod lib { 599 #[cfg(not(feature = "std"))] 600 pub use core::*; 601 602 #[cfg(feature = "std")] 603 pub use std::*; 604 } 605 606 #[cfg(feature = "std")] 607 #[doc(inline)] 608 pub use crate::stream::easy; 609 610 /// Error types and traits which define what kind of errors combine parsers may emit 611 #[macro_use] 612 pub mod error; 613 #[macro_use] 614 pub mod stream; 615 #[macro_use] 616 pub mod parser; 617 618 #[cfg(feature = "futures-core-03")] 619 pub mod future_ext; 620 621 #[doc(hidden)] 622 #[derive(Clone, PartialOrd, PartialEq, Debug, Copy)] 623 pub struct ErrorOffset(u8); 624 625 #[cfg(test)] 626 mod tests { 627 628 use crate::parser::char::{char, string}; 629 630 use super::*; 631 632 #[test] chainl1_error_consume()633 fn chainl1_error_consume() { 634 fn first<T, U>(t: T, _: U) -> T { 635 t 636 } 637 let mut p = chainl1(string("abc"), char(',').map(|_| first)); 638 assert!(p.parse("abc,ab").is_err()); 639 } 640 641 #[test] choice_strings()642 fn choice_strings() { 643 let mut fruits = [ 644 attempt(string("Apple")), 645 attempt(string("Banana")), 646 attempt(string("Cherry")), 647 attempt(string("Date")), 648 attempt(string("Fig")), 649 attempt(string("Grape")), 650 ]; 651 let mut parser = choice(&mut fruits); 652 assert_eq!(parser.parse("Apple"), Ok(("Apple", ""))); 653 assert_eq!(parser.parse("Banana"), Ok(("Banana", ""))); 654 assert_eq!(parser.parse("Cherry"), Ok(("Cherry", ""))); 655 assert_eq!(parser.parse("DateABC"), Ok(("Date", "ABC"))); 656 assert_eq!(parser.parse("Fig123"), Ok(("Fig", "123"))); 657 assert_eq!(parser.parse("GrapeApple"), Ok(("Grape", "Apple"))); 658 } 659 } 660 661 #[cfg(all(feature = "std", test))] 662 mod std_tests { 663 664 use crate::{ 665 error::StdParseResult, 666 parser::char::{alpha_num, char, digit, letter, spaces, string}, 667 stream::{ 668 easy, 669 position::{self, SourcePosition}, 670 }, 671 }; 672 673 use super::{easy::Error, error::Commit, stream::IteratorStream, *}; 674 675 #[test] optional_error_consume()676 fn optional_error_consume() { 677 let mut p = optional(string("abc")); 678 let err = p.easy_parse(position::Stream::new("ab")).unwrap_err(); 679 assert_eq!(err.position, SourcePosition { line: 1, column: 1 }); 680 } 681 follow<Input>(input: &mut Input) -> StdParseResult<(), Input> where Input: Stream<Token = char, Error = easy::ParseError<Input>>, Input::Position: Default, Input::Error: std::fmt::Debug, Input::Token: PartialEq, Input::Range: PartialEq,682 fn follow<Input>(input: &mut Input) -> StdParseResult<(), Input> 683 where 684 Input: Stream<Token = char, Error = easy::ParseError<Input>>, 685 Input::Position: Default, 686 Input::Error: std::fmt::Debug, 687 Input::Token: PartialEq, 688 Input::Range: PartialEq, 689 { 690 let before = input.checkpoint(); 691 match input.uncons() { 692 Ok(c) => { 693 if c.is_alphanumeric() { 694 input.reset(before).unwrap(); 695 let e = Error::Unexpected(c.into()); 696 Err(Commit::Peek(easy::Errors::new(input.position(), e).into())) 697 } else { 698 Ok(((), Commit::Peek(()))) 699 } 700 } 701 Err(_) => Ok(((), Commit::Peek(()))), 702 } 703 } 704 integer<Input>(input: &mut Input) -> StdParseResult<i64, Input> where Input: Stream<Token = char>,705 fn integer<Input>(input: &mut Input) -> StdParseResult<i64, Input> 706 where 707 Input: Stream<Token = char>, 708 { 709 let (s, input) = many1::<String, _, _>(digit()) 710 .expected("integer") 711 .parse_stream(input) 712 .into_result()?; 713 let mut n = 0; 714 for c in s.chars() { 715 n = n * 10 + (c as i64 - '0' as i64); 716 } 717 Ok((n, input)) 718 } 719 720 #[test] test_integer()721 fn test_integer() { 722 let result = parser(integer).parse("123"); 723 assert_eq!(result, Ok((123i64, ""))); 724 } 725 #[test] list()726 fn list() { 727 let mut p = sep_by(parser(integer), char(',')); 728 let result = p.parse("123,4,56"); 729 assert_eq!(result, Ok((vec![123i64, 4, 56], ""))); 730 } 731 732 #[test] iterator()733 fn iterator() { 734 let result = parser(integer) 735 .parse(position::Stream::new(IteratorStream::new("123".chars()))) 736 .map(|(i, mut input)| (i, input.uncons().is_err())); 737 assert_eq!(result, Ok((123i64, true))); 738 } 739 740 #[test] field()741 fn field() { 742 let word = || many(alpha_num()); 743 let c_decl = (word(), spaces(), char(':'), spaces(), word()) 744 .map(|t| (t.0, t.4)) 745 .parse("x: int"); 746 assert_eq!(c_decl, Ok((("x".to_string(), "int".to_string()), ""))); 747 } 748 749 #[test] source_position()750 fn source_position() { 751 let source = r" 752 123 753 "; 754 let mut parsed_state = position::Stream::with_positioner(source, SourcePosition::new()); 755 let result = (spaces(), parser(integer), spaces()) 756 .map(|t| t.1) 757 .parse_stream(&mut parsed_state) 758 .into_result(); 759 let state = Commit::Commit(position::Stream { 760 positioner: SourcePosition { line: 3, column: 1 }, 761 input: "", 762 }); 763 assert_eq!( 764 result.map(|(x, c)| (x, c.map(|_| parsed_state))), 765 Ok((123i64, state)) 766 ); 767 } 768 769 #[derive(Debug, PartialEq)] 770 pub enum Expr { 771 Id(String), 772 Int(i64), 773 Array(Vec<Expr>), 774 Plus(Box<Expr>, Box<Expr>), 775 Times(Box<Expr>, Box<Expr>), 776 } 777 778 parser! { 779 fn expr[Input]()(Input) -> Expr 780 where 781 [Input: Stream<Token = char>,] 782 { 783 let word = many1(letter()).expected("identifier"); 784 let integer = parser(integer); 785 let array = between(char('['), char(']'), sep_by(expr(), char(','))).expected("["); 786 let paren_expr = between(char('('), char(')'), parser(term)).expected("("); 787 spaces() 788 .silent() 789 .with( 790 word.map(Expr::Id) 791 .or(integer.map(Expr::Int)) 792 .or(array.map(Expr::Array)) 793 .or(paren_expr), 794 ) 795 .skip(spaces().silent()) 796 } 797 } 798 799 #[test] expression_basic()800 fn expression_basic() { 801 let result = sep_by(expr(), char(',')).parse("int, 100, [[], 123]"); 802 let exprs = vec![ 803 Expr::Id("int".to_string()), 804 Expr::Int(100), 805 Expr::Array(vec![Expr::Array(vec![]), Expr::Int(123)]), 806 ]; 807 assert_eq!(result, Ok((exprs, ""))); 808 } 809 810 #[test] expression_error()811 fn expression_error() { 812 let input = r" 813 ,123 814 "; 815 let result = expr().easy_parse(position::Stream::new(input)); 816 let err = easy::Errors { 817 position: SourcePosition { line: 2, column: 1 }, 818 errors: vec![ 819 Error::Unexpected(','.into()), 820 Error::Expected("integer".into()), 821 Error::Expected("identifier".into()), 822 Error::Expected("[".into()), 823 Error::Expected("(".into()), 824 ], 825 }; 826 assert_eq!(result, Err(err)); 827 } 828 term<Input>(input: &mut Input) -> StdParseResult<Expr, Input> where Input: Stream<Token = char>,829 fn term<Input>(input: &mut Input) -> StdParseResult<Expr, Input> 830 where 831 Input: Stream<Token = char>, 832 { 833 fn times(l: Expr, r: Expr) -> Expr { 834 Expr::Times(Box::new(l), Box::new(r)) 835 } 836 fn plus(l: Expr, r: Expr) -> Expr { 837 Expr::Plus(Box::new(l), Box::new(r)) 838 } 839 let mul = char('*').map(|_| times); 840 let add = char('+').map(|_| plus); 841 let factor = chainl1(expr(), mul); 842 chainl1(factor, add).parse_stream(input).into() 843 } 844 845 #[test] operators()846 fn operators() { 847 let input = r" 848 1 * 2 + 3 * test 849 "; 850 let (result, _) = parser(term).parse(position::Stream::new(input)).unwrap(); 851 852 let e1 = Expr::Times(Box::new(Expr::Int(1)), Box::new(Expr::Int(2))); 853 let e2 = Expr::Times( 854 Box::new(Expr::Int(3)), 855 Box::new(Expr::Id("test".to_string())), 856 ); 857 assert_eq!(result, Expr::Plus(Box::new(e1), Box::new(e2))); 858 } 859 860 #[test] error_position()861 fn error_position() { 862 let mut p = string("let") 863 .skip(parser(follow)) 864 .map(|x| x.to_string()) 865 .or(many1(digit())); 866 match p.easy_parse(position::Stream::new("le123")) { 867 Ok(_) => panic!(), 868 Err(err) => assert_eq!(err.position, SourcePosition { line: 1, column: 1 }), 869 } 870 match p.easy_parse(position::Stream::new("let1")) { 871 Ok(_) => panic!(), 872 Err(err) => assert_eq!(err.position, SourcePosition { line: 1, column: 4 }), 873 } 874 } 875 876 #[test] sep_by_error_consume()877 fn sep_by_error_consume() { 878 let mut p = sep_by::<Vec<_>, _, _, _>(string("abc"), char(',')); 879 let err = p.easy_parse(position::Stream::new("ab,abc")).unwrap_err(); 880 assert_eq!(err.position, SourcePosition { line: 1, column: 1 }); 881 } 882 883 #[test] inner_error_consume()884 fn inner_error_consume() { 885 let mut p = many::<Vec<_>, _, _>(between(char('['), char(']'), digit())); 886 let result = p.easy_parse(position::Stream::new("[1][2][]")); 887 assert!(result.is_err(), "{:?}", result); 888 let error = result.map(|x| format!("{:?}", x)).unwrap_err(); 889 assert_eq!(error.position, SourcePosition { line: 1, column: 8 }); 890 } 891 892 #[test] infinite_recursion_in_box_parser()893 fn infinite_recursion_in_box_parser() { 894 let _: Result<(Vec<_>, _), _> = (many(Box::new(digit()))).parse("1"); 895 } 896 897 #[test] unsized_parser()898 fn unsized_parser() { 899 let mut parser: Box<dyn Parser<_, Output = char, PartialState = _>> = Box::new(digit()); 900 let borrow_parser = &mut *parser; 901 assert_eq!(borrow_parser.parse("1"), Ok(('1', ""))); 902 } 903 904 #[test] std_error()905 fn std_error() { 906 use std::error::Error as StdError; 907 908 use std::fmt; 909 910 #[derive(Debug)] 911 struct Error; 912 impl fmt::Display for Error { 913 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 914 write!(f, "error") 915 } 916 } 917 impl StdError for Error { 918 fn description(&self) -> &str { 919 "error" 920 } 921 } 922 let result: Result<((), _), easy::Errors<char, &str, _>> = 923 EasyParser::easy_parse(&mut string("abc").and_then(|_| Err(Error)), "abc"); 924 assert!(result.is_err()); 925 // Test that ParseError can be coerced to a StdError 926 let _ = result.map_err(|err| { 927 let err: Box<dyn StdError> = Box::new(err); 928 err 929 }); 930 } 931 932 #[test] extract_std_error()933 fn extract_std_error() { 934 // The previous test verified that we could map a ParseError to a StdError by dropping 935 // the internal error details. 936 // This test verifies that we can map a ParseError to a StdError 937 // without dropping the internal error details. Consumers using `error-chain` will 938 // appreciate this. For technical reasons this is pretty janky; see the discussion in 939 // https://github.com/Marwes/combine/issues/86, and excuse the test with significant 940 // boilerplate! 941 use std::error::Error as StdError; 942 943 use std::fmt; 944 945 #[derive(Clone, PartialEq, Debug)] 946 struct CloneOnly(String); 947 948 #[derive(Debug)] 949 struct DisplayVec<T>(Vec<T>); 950 951 #[derive(Debug)] 952 struct ExtractedError(usize, DisplayVec<Error<CloneOnly, DisplayVec<CloneOnly>>>); 953 954 impl StdError for ExtractedError { 955 fn description(&self) -> &str { 956 "extracted error" 957 } 958 } 959 960 impl fmt::Display for CloneOnly { 961 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 962 write!(f, "{}", self.0) 963 } 964 } 965 966 impl<T: fmt::Debug> fmt::Display for DisplayVec<T> { 967 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 968 write!(f, "[{:?}]", self.0) 969 } 970 } 971 972 impl fmt::Display for ExtractedError { 973 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 974 writeln!(f, "Parse error at {}", self.0)?; 975 Error::fmt_errors(&(self.1).0, f) 976 } 977 } 978 979 let input = &[CloneOnly("x".to_string()), CloneOnly("y".to_string())][..]; 980 let result = token(CloneOnly("z".to_string())) 981 .easy_parse(input) 982 .map_err(|e| e.map_position(|p| p.translate_position(input))) 983 .map_err(|e| { 984 ExtractedError( 985 e.position, 986 DisplayVec( 987 e.errors 988 .into_iter() 989 .map(|e| e.map_range(|r| DisplayVec(r.to_owned()))) 990 .collect(), 991 ), 992 ) 993 }); 994 995 assert!(result.is_err()); 996 // Test that the fresh ExtractedError is Display, so that the internal errors can be 997 // inspected by consuming code; and that the ExtractedError can be coerced to StdError. 998 let _ = result.map_err(|err| { 999 let s = format!("{}", err); 1000 assert!(s.starts_with("Parse error at 0")); 1001 assert!(s.contains("Expected")); 1002 let err: Box<dyn StdError> = Box::new(err); 1003 err 1004 }); 1005 } 1006 } 1007