1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use alloc::format;
11 use alloc::rc::Rc;
12 #[cfg(feature = "pretty-print")]
13 use alloc::string::String;
14 use alloc::vec::Vec;
15 use core::borrow::Borrow;
16 use core::fmt;
17 use core::hash::{Hash, Hasher};
18 use core::ptr;
19 use core::str;
20 
21 #[cfg(feature = "pretty-print")]
22 use serde::ser::SerializeStruct;
23 
24 use super::line_index::LineIndex;
25 use super::pairs::{self, Pairs};
26 use super::queueable_token::QueueableToken;
27 use super::tokens::{self, Tokens};
28 use crate::span::{self, Span};
29 use crate::RuleType;
30 
31 /// A matching pair of [`Token`]s and everything between them.
32 ///
33 /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
34 /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
35 /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
36 /// editors.
37 ///
38 /// [`Token`]: ../enum.Token.html
39 #[derive(Clone)]
40 pub struct Pair<'i, R> {
41     /// # Safety
42     ///
43     /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
44     queue: Rc<Vec<QueueableToken<'i, R>>>,
45     input: &'i str,
46     /// Token index into `queue`.
47     start: usize,
48     line_index: Rc<LineIndex>,
49 }
50 
51 /// # Safety
52 ///
53 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
new<'i, R: RuleType>( queue: Rc<Vec<QueueableToken<'i, R>>>, input: &'i str, line_index: Rc<LineIndex>, start: usize, ) -> Pair<'i, R>54 pub unsafe fn new<'i, R: RuleType>(
55     queue: Rc<Vec<QueueableToken<'i, R>>>,
56     input: &'i str,
57     line_index: Rc<LineIndex>,
58     start: usize,
59 ) -> Pair<'i, R> {
60     Pair {
61         queue,
62         input,
63         start,
64         line_index,
65     }
66 }
67 
68 impl<'i, R: RuleType> Pair<'i, R> {
69     /// Returns the `Rule` of the `Pair`.
70     ///
71     /// # Examples
72     ///
73     /// ```
74     /// # use std::rc::Rc;
75     /// # use pest;
76     /// # #[allow(non_camel_case_types)]
77     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
78     /// enum Rule {
79     ///     a
80     /// }
81     ///
82     /// let input = "";
83     /// let pair = pest::state(input, |state| {
84     ///     // generating Token pair with Rule::a ...
85     /// #     state.rule(Rule::a, |s| Ok(s))
86     /// }).unwrap().next().unwrap();
87     ///
88     /// assert_eq!(pair.as_rule(), Rule::a);
89     /// ```
90     #[inline]
as_rule(&self) -> R91     pub fn as_rule(&self) -> R {
92         match self.queue[self.pair()] {
93             QueueableToken::End { rule, .. } => rule,
94             _ => unreachable!(),
95         }
96     }
97 
98     /// Captures a slice from the `&str` defined by the token `Pair`.
99     ///
100     /// # Examples
101     ///
102     /// ```
103     /// # use std::rc::Rc;
104     /// # use pest;
105     /// # #[allow(non_camel_case_types)]
106     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
107     /// enum Rule {
108     ///     ab
109     /// }
110     ///
111     /// let input = "ab";
112     /// let pair = pest::state(input, |state| {
113     ///     // generating Token pair with Rule::ab ...
114     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
115     /// }).unwrap().next().unwrap();
116     ///
117     /// assert_eq!(pair.as_str(), "ab");
118     /// ```
119     #[inline]
as_str(&self) -> &'i str120     pub fn as_str(&self) -> &'i str {
121         let start = self.pos(self.start);
122         let end = self.pos(self.pair());
123 
124         // Generated positions always come from Positions and are UTF-8 borders.
125         &self.input[start..end]
126     }
127 
128     /// Returns the input string of the `Pair`.
129     ///
130     /// This function returns the input string of the `Pair` as a `&str`. This is the source string
131     /// from which the `Pair` was created. The returned `&str` can be used to examine the contents of
132     /// the `Pair` or to perform further processing on the string.
133     ///
134     /// # Examples
135     ///
136     /// ```
137     /// # use std::rc::Rc;
138     /// # use pest;
139     /// # #[allow(non_camel_case_types)]
140     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
141     /// enum Rule {
142     ///     ab
143     /// }
144     ///
145     /// // Example: Get input string from a Pair
146     ///
147     /// let input = "ab";
148     /// let pair = pest::state(input, |state| {
149     ///     // generating Token pair with Rule::ab ...
150     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
151     /// }).unwrap().next().unwrap();
152     ///
153     /// assert_eq!(pair.as_str(), "ab");
154     /// assert_eq!(input, pair.get_input());
155     /// ```
get_input(&self) -> &'i str156     pub fn get_input(&self) -> &'i str {
157         self.input
158     }
159 
160     /// Returns the `Span` defined by the `Pair`, consuming it.
161     ///
162     /// # Examples
163     ///
164     /// ```
165     /// # use std::rc::Rc;
166     /// # use pest;
167     /// # #[allow(non_camel_case_types)]
168     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
169     /// enum Rule {
170     ///     ab
171     /// }
172     ///
173     /// let input = "ab";
174     /// let pair = pest::state(input, |state| {
175     ///     // generating Token pair with Rule::ab ...
176     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
177     /// }).unwrap().next().unwrap();
178     ///
179     /// assert_eq!(pair.into_span().as_str(), "ab");
180     /// ```
181     #[inline]
182     #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
into_span(self) -> Span<'i>183     pub fn into_span(self) -> Span<'i> {
184         self.as_span()
185     }
186 
187     /// Returns the `Span` defined by the `Pair`, **without** consuming it.
188     ///
189     /// # Examples
190     ///
191     /// ```
192     /// # use std::rc::Rc;
193     /// # use pest;
194     /// # #[allow(non_camel_case_types)]
195     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
196     /// enum Rule {
197     ///     ab
198     /// }
199     ///
200     /// let input = "ab";
201     /// let pair = pest::state(input, |state| {
202     ///     // generating Token pair with Rule::ab ...
203     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
204     /// }).unwrap().next().unwrap();
205     ///
206     /// assert_eq!(pair.as_span().as_str(), "ab");
207     /// ```
208     #[inline]
as_span(&self) -> Span<'i>209     pub fn as_span(&self) -> Span<'i> {
210         let start = self.pos(self.start);
211         let end = self.pos(self.pair());
212 
213         // Generated positions always come from Positions and are UTF-8 borders.
214         unsafe { span::Span::new_unchecked(self.input, start, end) }
215     }
216 
217     /// Get current node tag
218     #[inline]
as_node_tag(&self) -> Option<&str>219     pub fn as_node_tag(&self) -> Option<&str> {
220         match &self.queue[self.pair()] {
221             QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
222             _ => None,
223         }
224     }
225 
226     /// Returns the inner `Pairs` between the `Pair`, consuming it.
227     ///
228     /// # Examples
229     ///
230     /// ```
231     /// # use std::rc::Rc;
232     /// # use pest;
233     /// # #[allow(non_camel_case_types)]
234     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
235     /// enum Rule {
236     ///     a
237     /// }
238     ///
239     /// let input = "";
240     /// let pair = pest::state(input, |state| {
241     ///     // generating Token pair with Rule::a ...
242     /// #     state.rule(Rule::a, |s| Ok(s))
243     /// }).unwrap().next().unwrap();
244     ///
245     /// assert!(pair.into_inner().next().is_none());
246     /// ```
247     #[inline]
into_inner(self) -> Pairs<'i, R>248     pub fn into_inner(self) -> Pairs<'i, R> {
249         let pair = self.pair();
250 
251         pairs::new(
252             self.queue,
253             self.input,
254             Some(self.line_index),
255             self.start + 1,
256             pair,
257         )
258     }
259 
260     /// Returns the `Tokens` for the `Pair`.
261     ///
262     /// # Examples
263     ///
264     /// ```
265     /// # use std::rc::Rc;
266     /// # use pest;
267     /// # #[allow(non_camel_case_types)]
268     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
269     /// enum Rule {
270     ///     a
271     /// }
272     ///
273     /// let input = "";
274     /// let pair = pest::state(input, |state| {
275     ///     // generating Token pair with Rule::a ...
276     /// #     state.rule(Rule::a, |s| Ok(s))
277     /// }).unwrap().next().unwrap();
278     /// let tokens: Vec<_> = pair.tokens().collect();
279     ///
280     /// assert_eq!(tokens.len(), 2);
281     /// ```
282     #[inline]
tokens(self) -> Tokens<'i, R>283     pub fn tokens(self) -> Tokens<'i, R> {
284         let end = self.pair();
285 
286         tokens::new(self.queue, self.input, self.start, end + 1)
287     }
288 
289     /// Generates a string that stores the lexical information of `self` in
290     /// a pretty-printed JSON format.
291     #[cfg(feature = "pretty-print")]
to_json(&self) -> String292     pub fn to_json(&self) -> String {
293         ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
294     }
295 
296     /// Returns the `line`, `col` of this pair start.
line_col(&self) -> (usize, usize)297     pub fn line_col(&self) -> (usize, usize) {
298         let pos = self.pos(self.start);
299         self.line_index.line_col(self.input, pos)
300     }
301 
pair(&self) -> usize302     fn pair(&self) -> usize {
303         match self.queue[self.start] {
304             QueueableToken::Start {
305                 end_token_index, ..
306             } => end_token_index,
307             _ => unreachable!(),
308         }
309     }
310 
pos(&self, index: usize) -> usize311     fn pos(&self, index: usize) -> usize {
312         match self.queue[index] {
313             QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
314                 input_pos
315             }
316         }
317     }
318 }
319 
320 impl<'i, R: RuleType> Pairs<'i, R> {
321     /// Create a new `Pairs` iterator containing just the single `Pair`.
single(pair: Pair<'i, R>) -> Self322     pub fn single(pair: Pair<'i, R>) -> Self {
323         let end = pair.pair();
324         pairs::new(
325             pair.queue,
326             pair.input,
327             Some(pair.line_index),
328             pair.start,
329             end,
330         )
331     }
332 }
333 
334 impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result335     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
336         let pair = &mut f.debug_struct("Pair");
337         pair.field("rule", &self.as_rule());
338         // In order not to break compatibility
339         if let Some(s) = self.as_node_tag() {
340             pair.field("node_tag", &s);
341         }
342         pair.field("span", &self.as_span())
343             .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
344             .finish()
345     }
346 }
347 
348 impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result349     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350         let rule = self.as_rule();
351         let start = self.pos(self.start);
352         let end = self.pos(self.pair());
353         let mut pairs = self.clone().into_inner().peekable();
354 
355         if pairs.peek().is_none() {
356             write!(f, "{:?}({}, {})", rule, start, end)
357         } else {
358             write!(
359                 f,
360                 "{:?}({}, {}, [{}])",
361                 rule,
362                 start,
363                 end,
364                 pairs
365                     .map(|pair| format!("{}", pair))
366                     .collect::<Vec<_>>()
367                     .join(", ")
368             )
369         }
370     }
371 }
372 
373 impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
eq(&self, other: &Pair<'i, R>) -> bool374     fn eq(&self, other: &Pair<'i, R>) -> bool {
375         Rc::ptr_eq(&self.queue, &other.queue)
376             && ptr::eq(self.input, other.input)
377             && self.start == other.start
378     }
379 }
380 
381 impl<'i, R: Eq> Eq for Pair<'i, R> {}
382 
383 impl<'i, R: Hash> Hash for Pair<'i, R> {
hash<H: Hasher>(&self, state: &mut H)384     fn hash<H: Hasher>(&self, state: &mut H) {
385         (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
386         (self.input as *const str).hash(state);
387         self.start.hash(state);
388     }
389 }
390 
391 #[cfg(feature = "pretty-print")]
392 impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ::serde::Serializer,393     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
394     where
395         S: ::serde::Serializer,
396     {
397         let start = self.pos(self.start);
398         let end = self.pos(self.pair());
399         let rule = format!("{:?}", self.as_rule());
400         let inner = self.clone().into_inner();
401 
402         let mut ser = serializer.serialize_struct("Pairs", 3)?;
403         ser.serialize_field("pos", &(start, end))?;
404         ser.serialize_field("rule", &rule)?;
405 
406         if inner.peek().is_none() {
407             ser.serialize_field("inner", &self.as_str())?;
408         } else {
409             ser.serialize_field("inner", &inner)?;
410         }
411 
412         ser.end()
413     }
414 }
415 
416 #[cfg(test)]
417 mod tests {
418     use crate::macros::tests::*;
419     use crate::parser::Parser;
420 
421     #[test]
422     #[cfg(feature = "pretty-print")]
test_pretty_print()423     fn test_pretty_print() {
424         let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
425 
426         let expected = r#"{
427   "pos": [
428     0,
429     3
430   ],
431   "rule": "a",
432   "inner": {
433     "pos": [
434       1,
435       2
436     ],
437     "pairs": [
438       {
439         "pos": [
440           1,
441           2
442         ],
443         "rule": "b",
444         "inner": "b"
445       }
446     ]
447   }
448 }"#;
449 
450         assert_eq!(expected, pair.to_json());
451     }
452 
453     #[test]
pair_into_inner()454     fn pair_into_inner() {
455         let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
456 
457         let pairs = pair.into_inner(); // the tokens b()
458 
459         assert_eq!(2, pairs.tokens().count());
460     }
461 
462     #[test]
get_input_of_pair()463     fn get_input_of_pair() {
464         let input = "abcde";
465         let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap();
466 
467         assert_eq!(input, pair.get_input());
468     }
469 }
470