1 //! Extensions to the parsing API with niche applicability. 2 3 use crate::buffer::Cursor; 4 use crate::error::Result; 5 use crate::parse::{inner_unexpected, ParseBuffer, Unexpected}; 6 use proc_macro2::extra::DelimSpan; 7 use proc_macro2::Delimiter; 8 use std::cell::Cell; 9 use std::mem; 10 use std::rc::Rc; 11 12 /// Extensions to the `ParseStream` API to support speculative parsing. 13 pub trait Speculative { 14 /// Advance this parse stream to the position of a forked parse stream. 15 /// 16 /// This is the opposite operation to [`ParseStream::fork`]. You can fork a 17 /// parse stream, perform some speculative parsing, then join the original 18 /// stream to the fork to "commit" the parsing from the fork to the main 19 /// stream. 20 /// 21 /// If you can avoid doing this, you should, as it limits the ability to 22 /// generate useful errors. That said, it is often the only way to parse 23 /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem 24 /// is that when the fork fails to parse an `A`, it's impossible to tell 25 /// whether that was because of a syntax error and the user meant to provide 26 /// an `A`, or that the `A`s are finished and it's time to start parsing 27 /// `B`s. Use with care. 28 /// 29 /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by 30 /// parsing `B*` and removing the leading members of `A` from the 31 /// repetition, bypassing the need to involve the downsides associated with 32 /// speculative parsing. 33 /// 34 /// [`ParseStream::fork`]: ParseBuffer::fork 35 /// 36 /// # Example 37 /// 38 /// There has been chatter about the possibility of making the colons in the 39 /// turbofish syntax like `path::to::<T>` no longer required by accepting 40 /// `path::to<T>` in expression position. Specifically, according to [RFC 41 /// 2544], [`PathSegment`] parsing should always try to consume a following 42 /// `<` token as the start of generic arguments, and reset to the `<` if 43 /// that fails (e.g. the token is acting as a less-than operator). 44 /// 45 /// This is the exact kind of parsing behavior which requires the "fork, 46 /// try, commit" behavior that [`ParseStream::fork`] discourages. With 47 /// `advance_to`, we can avoid having to parse the speculatively parsed 48 /// content a second time. 49 /// 50 /// This change in behavior can be implemented in syn by replacing just the 51 /// `Parse` implementation for `PathSegment`: 52 /// 53 /// ``` 54 /// # use syn::ext::IdentExt; 55 /// use syn::parse::discouraged::Speculative; 56 /// # use syn::parse::{Parse, ParseStream}; 57 /// # use syn::{Ident, PathArguments, Result, Token}; 58 /// 59 /// pub struct PathSegment { 60 /// pub ident: Ident, 61 /// pub arguments: PathArguments, 62 /// } 63 /// # 64 /// # impl<T> From<T> for PathSegment 65 /// # where 66 /// # T: Into<Ident>, 67 /// # { 68 /// # fn from(ident: T) -> Self { 69 /// # PathSegment { 70 /// # ident: ident.into(), 71 /// # arguments: PathArguments::None, 72 /// # } 73 /// # } 74 /// # } 75 /// 76 /// impl Parse for PathSegment { 77 /// fn parse(input: ParseStream) -> Result<Self> { 78 /// if input.peek(Token![super]) 79 /// || input.peek(Token![self]) 80 /// || input.peek(Token![Self]) 81 /// || input.peek(Token![crate]) 82 /// { 83 /// let ident = input.call(Ident::parse_any)?; 84 /// return Ok(PathSegment::from(ident)); 85 /// } 86 /// 87 /// let ident = input.parse()?; 88 /// if input.peek(Token![::]) && input.peek3(Token![<]) { 89 /// return Ok(PathSegment { 90 /// ident, 91 /// arguments: PathArguments::AngleBracketed(input.parse()?), 92 /// }); 93 /// } 94 /// if input.peek(Token![<]) && !input.peek(Token![<=]) { 95 /// let fork = input.fork(); 96 /// if let Ok(arguments) = fork.parse() { 97 /// input.advance_to(&fork); 98 /// return Ok(PathSegment { 99 /// ident, 100 /// arguments: PathArguments::AngleBracketed(arguments), 101 /// }); 102 /// } 103 /// } 104 /// Ok(PathSegment::from(ident)) 105 /// } 106 /// } 107 /// 108 /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap(); 109 /// ``` 110 /// 111 /// # Drawbacks 112 /// 113 /// The main drawback of this style of speculative parsing is in error 114 /// presentation. Even if the lookahead is the "correct" parse, the error 115 /// that is shown is that of the "fallback" parse. To use the same example 116 /// as the turbofish above, take the following unfinished "turbofish": 117 /// 118 /// ```text 119 /// let _ = f<&'a fn(), for<'a> serde::>(); 120 /// ``` 121 /// 122 /// If this is parsed as generic arguments, we can provide the error message 123 /// 124 /// ```text 125 /// error: expected identifier 126 /// --> src.rs:L:C 127 /// | 128 /// L | let _ = f<&'a fn(), for<'a> serde::>(); 129 /// | ^ 130 /// ``` 131 /// 132 /// but if parsed using the above speculative parsing, it falls back to 133 /// assuming that the `<` is a less-than when it fails to parse the generic 134 /// arguments, and tries to interpret the `&'a` as the start of a labelled 135 /// loop, resulting in the much less helpful error 136 /// 137 /// ```text 138 /// error: expected `:` 139 /// --> src.rs:L:C 140 /// | 141 /// L | let _ = f<&'a fn(), for<'a> serde::>(); 142 /// | ^^ 143 /// ``` 144 /// 145 /// This can be mitigated with various heuristics (two examples: show both 146 /// forks' parse errors, or show the one that consumed more tokens), but 147 /// when you can control the grammar, sticking to something that can be 148 /// parsed LL(3) and without the LL(*) speculative parsing this makes 149 /// possible, displaying reasonable errors becomes much more simple. 150 /// 151 /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 152 /// [`PathSegment`]: crate::PathSegment 153 /// 154 /// # Performance 155 /// 156 /// This method performs a cheap fixed amount of work that does not depend 157 /// on how far apart the two streams are positioned. 158 /// 159 /// # Panics 160 /// 161 /// The forked stream in the argument of `advance_to` must have been 162 /// obtained by forking `self`. Attempting to advance to any other stream 163 /// will cause a panic. advance_to(&self, fork: &Self)164 fn advance_to(&self, fork: &Self); 165 } 166 167 impl<'a> Speculative for ParseBuffer<'a> { advance_to(&self, fork: &Self)168 fn advance_to(&self, fork: &Self) { 169 if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { 170 panic!("Fork was not derived from the advancing parse stream"); 171 } 172 173 let (self_unexp, self_sp) = inner_unexpected(self); 174 let (fork_unexp, fork_sp) = inner_unexpected(fork); 175 if !Rc::ptr_eq(&self_unexp, &fork_unexp) { 176 match (fork_sp, self_sp) { 177 // Unexpected set on the fork, but not on `self`, copy it over. 178 (Some(span), None) => { 179 self_unexp.set(Unexpected::Some(span)); 180 } 181 // Unexpected unset. Use chain to propagate errors from fork. 182 (None, None) => { 183 fork_unexp.set(Unexpected::Chain(self_unexp)); 184 185 // Ensure toplevel 'unexpected' tokens from the fork don't 186 // bubble up the chain by replacing the root `unexpected` 187 // pointer, only 'unexpected' tokens from existing group 188 // parsers should bubble. 189 fork.unexpected 190 .set(Some(Rc::new(Cell::new(Unexpected::None)))); 191 } 192 // Unexpected has been set on `self`. No changes needed. 193 (_, Some(_)) => {} 194 } 195 } 196 197 // See comment on `cell` in the struct definition. 198 self.cell 199 .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }); 200 } 201 } 202 203 /// Extensions to the `ParseStream` API to support manipulating invisible 204 /// delimiters the same as if they were visible. 205 pub trait AnyDelimiter { 206 /// Returns the delimiter, the span of the delimiter token, and the nested 207 /// contents for further parsing. parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>208 fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>; 209 } 210 211 impl<'a> AnyDelimiter for ParseBuffer<'a> { parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>212 fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> { 213 self.step(|cursor| { 214 if let Some((content, delimiter, span, rest)) = cursor.any_group() { 215 let scope = crate::buffer::close_span_of_group(*cursor); 216 let nested = crate::parse::advance_step_cursor(cursor, content); 217 let unexpected = crate::parse::get_unexpected(self); 218 let content = crate::parse::new_parse_buffer(scope, nested, unexpected); 219 Ok(((delimiter, span, content), rest)) 220 } else { 221 Err(cursor.error("expected any delimiter")) 222 } 223 }) 224 } 225 } 226