1 use std::collections::HashMap;
2 use std::str;
3 
4 use winnow::prelude::*;
5 use winnow::{
6     ascii::float,
7     combinator::cut_err,
8     combinator::empty,
9     combinator::fail,
10     combinator::peek,
11     combinator::{alt, dispatch},
12     combinator::{delimited, preceded, separated_pair, terminated},
13     combinator::{repeat, separated},
14     error::{AddContext, ParserError},
15     token::{any, none_of, take, take_while},
16 };
17 
18 use crate::json::JsonValue;
19 
20 pub type Stream<'i> = &'i str;
21 
22 /// The root element of a JSON parser is any value
23 ///
24 /// A parser has the following signature:
25 /// `&mut Stream -> PResult<Output, InputError>`, with `PResult` defined as:
26 /// `type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>;`
27 ///
28 /// most of the times you can ignore the error type and use the default (but this
29 /// examples shows custom error types later on!)
30 ///
31 /// Here we use `&str` as input type, but parsers can be generic over
32 /// the input type, work directly with `&[u8]`, or any other type that
33 /// implements the required traits.
json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>34 pub fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
35     input: &mut Stream<'i>,
36 ) -> PResult<JsonValue, E> {
37     delimited(ws, json_value, ws).parse_next(input)
38 }
39 
40 /// `alt` is a combinator that tries multiple parsers one by one, until
41 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>42 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
43     input: &mut Stream<'i>,
44 ) -> PResult<JsonValue, E> {
45     // `dispatch` gives you `match`-like behavior compared to `alt` successively trying different
46     // implementations.
47     dispatch!(peek(any);
48         'n' => null.value(JsonValue::Null),
49         't' => true_.map(JsonValue::Boolean),
50         'f' => false_.map(JsonValue::Boolean),
51         '"' => string.map(JsonValue::Str),
52         '+' => float.map(JsonValue::Num),
53         '-' => float.map(JsonValue::Num),
54         '0'..='9' => float.map(JsonValue::Num),
55         '[' => array.map(JsonValue::Array),
56         '{' => object.map(JsonValue::Object),
57         _ => fail,
58     )
59     .parse_next(input)
60 }
61 
62 /// `tag(string)` generates a parser that recognizes the argument string.
63 ///
64 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>65 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
66     // This is a parser that returns `"null"` if it sees the string "null", and
67     // an error otherwise
68     "null".parse_next(input)
69 }
70 
71 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
72 /// success.
true_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>73 fn true_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
74     // This is a parser that returns `true` if it sees the string "true", and
75     // an error otherwise
76     "true".value(true).parse_next(input)
77 }
78 
79 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
80 /// success.
false_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>81 fn false_<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
82     // This is a parser that returns `false` if it sees the string "false", and
83     // an error otherwise
84     "false".value(false).parse_next(input)
85 }
86 
87 /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote
88 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<String, E>89 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
90     input: &mut Stream<'i>,
91 ) -> PResult<String, E> {
92     preceded(
93         '\"',
94         // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
95         // combinators like  `alt` that they should not try other parsers. We were in the
96         // right branch (since we found the `"` character) but encountered an error when
97         // parsing the string
98         cut_err(terminated(
99             repeat(0.., character).fold(String::new, |mut string, c| {
100                 string.push(c);
101                 string
102             }),
103             '\"',
104         )),
105     )
106     // `context` lets you add a static string to errors to provide more information in the
107     // error chain (to indicate which parser had an error)
108     .context("string")
109     .parse_next(input)
110 }
111 
112 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
113 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>114 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
115     let c = none_of('\"').parse_next(input)?;
116     if c == '\\' {
117         dispatch!(any;
118           '"' => empty.value('"'),
119           '\\' => empty.value('\\'),
120           '/'  => empty.value('/'),
121           'b' => empty.value('\x08'),
122           'f' => empty.value('\x0C'),
123           'n' => empty.value('\n'),
124           'r' => empty.value('\r'),
125           't' => empty.value('\t'),
126           'u' => unicode_escape,
127           _ => fail,
128         )
129         .parse_next(input)
130     } else {
131         Ok(c)
132     }
133 }
134 
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>135 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
136     alt((
137         // Not a surrogate
138         u16_hex
139             .verify(|cp| !(0xD800..0xE000).contains(cp))
140             .map(|cp| cp as u32),
141         // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
142         separated_pair(u16_hex, "\\u", u16_hex)
143             .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
144             .map(|(high, low)| {
145                 let high_ten = (high as u32) - 0xD800;
146                 let low_ten = (low as u32) - 0xDC00;
147                 (high_ten << 10) + low_ten + 0x10000
148             }),
149     ))
150     .verify_map(
151         // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
152         std::char::from_u32,
153     )
154     .parse_next(input)
155 }
156 
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>157 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
158     take(4usize)
159         .verify_map(|s| u16::from_str_radix(s, 16).ok())
160         .parse_next(input)
161 }
162 
163 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
164 /// accumulating results in a `Vec`, until it encounters an error.
165 /// If you want more control on the parser application, check out the `iterator`
166 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>167 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
168     input: &mut Stream<'i>,
169 ) -> PResult<Vec<JsonValue>, E> {
170     preceded(
171         ('[', ws),
172         cut_err(terminated(
173             separated(0.., json_value, (ws, ',', ws)),
174             (ws, ']'),
175         )),
176     )
177     .context("array")
178     .parse_next(input)
179 }
180 
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>181 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
182     input: &mut Stream<'i>,
183 ) -> PResult<HashMap<String, JsonValue>, E> {
184     preceded(
185         ('{', ws),
186         cut_err(terminated(
187             separated(0.., key_value, (ws, ',', ws)),
188             (ws, '}'),
189         )),
190     )
191     .context("object")
192     .parse_next(input)
193 }
194 
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>195 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
196     input: &mut Stream<'i>,
197 ) -> PResult<(String, JsonValue), E> {
198     separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
199 }
200 
201 /// Parser combinators are constructed from the bottom up:
202 /// first we write parsers for the smallest elements (here a space character),
203 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>204 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
205     // Combinators like `take_while` return a function. That function is the
206     // parser,to which we can pass the input
207     take_while(0.., WS).parse_next(input)
208 }
209 
210 const WS: &[char] = &[' ', '\t', '\r', '\n'];
211 
212 #[cfg(test)]
213 mod test {
214     #[allow(clippy::useless_attribute)]
215     #[allow(dead_code)] // its dead for benches
216     use super::*;
217 
218     #[allow(clippy::useless_attribute)]
219     #[allow(dead_code)] // its dead for benches
220     type Error<'i> = winnow::error::InputError<&'i str>;
221 
222     #[test]
json_string()223     fn json_string() {
224         assert_eq!(
225             string::<Error<'_>>.parse_peek("\"\""),
226             Ok(("", "".to_string()))
227         );
228         assert_eq!(
229             string::<Error<'_>>.parse_peek("\"abc\""),
230             Ok(("", "abc".to_string()))
231         );
232         assert_eq!(
233             string::<Error<'_>>
234                 .parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
235             Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())),
236         );
237         assert_eq!(
238             string::<Error<'_>>.parse_peek("\"\\uD83D\\uDE10\""),
239             Ok(("", "��".to_string()))
240         );
241 
242         assert!(string::<Error<'_>>.parse_peek("\"").is_err());
243         assert!(string::<Error<'_>>.parse_peek("\"abc").is_err());
244         assert!(string::<Error<'_>>.parse_peek("\"\\\"").is_err());
245         assert!(string::<Error<'_>>.parse_peek("\"\\u123\"").is_err());
246         assert!(string::<Error<'_>>.parse_peek("\"\\uD800\"").is_err());
247         assert!(string::<Error<'_>>
248             .parse_peek("\"\\uD800\\uD800\"")
249             .is_err());
250         assert!(string::<Error<'_>>.parse_peek("\"\\uDC00\"").is_err());
251     }
252 
253     #[test]
json_object()254     fn json_object() {
255         use JsonValue::{Num, Object, Str};
256 
257         let input = r#"{"a":42,"b":"x"}"#;
258 
259         let expected = Object(
260             vec![
261                 ("a".to_string(), Num(42.0)),
262                 ("b".to_string(), Str("x".to_string())),
263             ]
264             .into_iter()
265             .collect(),
266         );
267 
268         assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected)));
269     }
270 
271     #[test]
json_array()272     fn json_array() {
273         use JsonValue::{Array, Num, Str};
274 
275         let input = r#"[42,"x"]"#;
276 
277         let expected = Array(vec![Num(42.0), Str("x".to_string())]);
278 
279         assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected)));
280     }
281 
282     #[test]
json_whitespace()283     fn json_whitespace() {
284         use JsonValue::{Array, Boolean, Null, Num, Object, Str};
285 
286         let input = r#"
287   {
288     "null" : null,
289     "true"  :true ,
290     "false":  false  ,
291     "number" : 123e4 ,
292     "string" : " abc 123 " ,
293     "array" : [ false , 1 , "two" ] ,
294     "object" : { "a" : 1.0 , "b" : "c" } ,
295     "empty_array" : [  ] ,
296     "empty_object" : {   }
297   }
298   "#;
299 
300         assert_eq!(
301             json::<Error<'_>>.parse_peek(input),
302             Ok((
303                 "",
304                 Object(
305                     vec![
306                         ("null".to_string(), Null),
307                         ("true".to_string(), Boolean(true)),
308                         ("false".to_string(), Boolean(false)),
309                         ("number".to_string(), Num(123e4)),
310                         ("string".to_string(), Str(" abc 123 ".to_string())),
311                         (
312                             "array".to_string(),
313                             Array(vec![Boolean(false), Num(1.0), Str("two".to_string())])
314                         ),
315                         (
316                             "object".to_string(),
317                             Object(
318                                 vec![
319                                     ("a".to_string(), Num(1.0)),
320                                     ("b".to_string(), Str("c".to_string())),
321                                 ]
322                                 .into_iter()
323                                 .collect()
324                             )
325                         ),
326                         ("empty_array".to_string(), Array(vec![]),),
327                         ("empty_object".to_string(), Object(HashMap::new()),),
328                     ]
329                     .into_iter()
330                     .collect()
331                 )
332             ))
333         );
334     }
335 }
336