1 use std::collections::HashMap;
2 use std::str;
3 
4 use winnow::prelude::*;
5 use winnow::{
6     ascii::float,
7     combinator::alt,
8     combinator::cut_err,
9     combinator::{delimited, preceded, separated_pair, terminated},
10     combinator::{repeat, separated},
11     error::{AddContext, ParserError},
12     token::{any, none_of, take, take_while},
13 };
14 
15 use crate::json::JsonValue;
16 
17 pub type Stream<'i> = &'i str;
18 
19 /// The root element of a JSON parser is any value
20 ///
21 /// A parser has the following signature:
22 /// `&mut Stream -> PResult<Output, InputError>`, with `PResult` defined as:
23 /// `type PResult<O, E = (I, ErrorKind)> = Result<O, Err<E>>;`
24 ///
25 /// most of the times you can ignore the error type and use the default (but this
26 /// examples shows custom error types later on!)
27 ///
28 /// Here we use `&str` as input type, but parsers can be generic over
29 /// the input type, work directly with `&[u8]`, or any other type that
30 /// implements the required traits.
json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>31 pub fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
32     input: &mut Stream<'i>,
33 ) -> PResult<JsonValue, E> {
34     delimited(ws, json_value, ws).parse_next(input)
35 }
36 
37 /// `alt` is a combinator that tries multiple parsers one by one, until
38 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>39 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
40     input: &mut Stream<'i>,
41 ) -> PResult<JsonValue, E> {
42     // `alt` combines the each value parser. It returns the result of the first
43     // successful parser, or an error
44     alt((
45         null.value(JsonValue::Null),
46         boolean.map(JsonValue::Boolean),
47         string.map(JsonValue::Str),
48         float.map(JsonValue::Num),
49         array.map(JsonValue::Array),
50         object.map(JsonValue::Object),
51     ))
52     .parse_next(input)
53 }
54 
55 /// `tag(string)` generates a parser that recognizes the argument string.
56 ///
57 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>58 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
59     // This is a parser that returns `"null"` if it sees the string "null", and
60     // an error otherwise
61     "null".parse_next(input)
62 }
63 
64 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
65 /// success.
boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>66 fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
67     // This is a parser that returns `true` if it sees the string "true", and
68     // an error otherwise
69     let parse_true = "true".value(true);
70 
71     // This is a parser that returns `false` if it sees the string "false", and
72     // an error otherwise
73     let parse_false = "false".value(false);
74 
75     alt((parse_true, parse_false)).parse_next(input)
76 }
77 
78 /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote
79 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<String, E>80 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
81     input: &mut Stream<'i>,
82 ) -> PResult<String, E> {
83     preceded(
84         '\"',
85         // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
86         // combinators like  `alt` that they should not try other parsers. We were in the
87         // right branch (since we found the `"` character) but encountered an error when
88         // parsing the string
89         cut_err(terminated(
90             repeat(0.., character).fold(String::new, |mut string, c| {
91                 string.push(c);
92                 string
93             }),
94             '\"',
95         )),
96     )
97     // `context` lets you add a static string to errors to provide more information in the
98     // error chain (to indicate which parser had an error)
99     .context("string")
100     .parse_next(input)
101 }
102 
103 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
104 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>105 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
106     let c = none_of('\"').parse_next(input)?;
107     if c == '\\' {
108         alt((
109             any.verify_map(|c| {
110                 Some(match c {
111                     '"' | '\\' | '/' => c,
112                     'b' => '\x08',
113                     'f' => '\x0C',
114                     'n' => '\n',
115                     'r' => '\r',
116                     't' => '\t',
117                     _ => return None,
118                 })
119             }),
120             preceded('u', unicode_escape),
121         ))
122         .parse_next(input)
123     } else {
124         Ok(c)
125     }
126 }
127 
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>128 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
129     alt((
130         // Not a surrogate
131         u16_hex
132             .verify(|cp| !(0xD800..0xE000).contains(cp))
133             .map(|cp| cp as u32),
134         // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
135         separated_pair(u16_hex, "\\u", u16_hex)
136             .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
137             .map(|(high, low)| {
138                 let high_ten = (high as u32) - 0xD800;
139                 let low_ten = (low as u32) - 0xDC00;
140                 (high_ten << 10) + low_ten + 0x10000
141             }),
142     ))
143     .verify_map(
144         // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
145         std::char::from_u32,
146     )
147     .parse_next(input)
148 }
149 
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>150 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
151     take(4usize)
152         .verify_map(|s| u16::from_str_radix(s, 16).ok())
153         .parse_next(input)
154 }
155 
156 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
157 /// accumulating results in a `Vec`, until it encounters an error.
158 /// If you want more control on the parser application, check out the `iterator`
159 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>160 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
161     input: &mut Stream<'i>,
162 ) -> PResult<Vec<JsonValue>, E> {
163     preceded(
164         ('[', ws),
165         cut_err(terminated(
166             separated(0.., json_value, (ws, ',', ws)),
167             (ws, ']'),
168         )),
169     )
170     .context("array")
171     .parse_next(input)
172 }
173 
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>174 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
175     input: &mut Stream<'i>,
176 ) -> PResult<HashMap<String, JsonValue>, E> {
177     preceded(
178         ('{', ws),
179         cut_err(terminated(
180             separated(0.., key_value, (ws, ',', ws)),
181             (ws, '}'),
182         )),
183     )
184     .context("object")
185     .parse_next(input)
186 }
187 
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>188 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
189     input: &mut Stream<'i>,
190 ) -> PResult<(String, JsonValue), E> {
191     separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
192 }
193 
194 /// Parser combinators are constructed from the bottom up:
195 /// first we write parsers for the smallest elements (here a space character),
196 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>197 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
198     // Combinators like `take_while` return a function. That function is the
199     // parser,to which we can pass the input
200     take_while(0.., WS).parse_next(input)
201 }
202 
203 const WS: &[char] = &[' ', '\t', '\r', '\n'];
204 
205 #[cfg(test)]
206 mod test {
207     #[allow(clippy::useless_attribute)]
208     #[allow(dead_code)] // its dead for benches
209     use super::*;
210 
211     #[allow(clippy::useless_attribute)]
212     #[allow(dead_code)] // its dead for benches
213     type Error<'i> = winnow::error::InputError<&'i str>;
214 
215     #[test]
json_string()216     fn json_string() {
217         assert_eq!(
218             string::<Error<'_>>.parse_peek("\"\""),
219             Ok(("", "".to_string()))
220         );
221         assert_eq!(
222             string::<Error<'_>>.parse_peek("\"abc\""),
223             Ok(("", "abc".to_string()))
224         );
225         assert_eq!(
226             string::<Error<'_>>
227                 .parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
228             Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())),
229         );
230         assert_eq!(
231             string::<Error<'_>>.parse_peek("\"\\uD83D\\uDE10\""),
232             Ok(("", "��".to_string()))
233         );
234 
235         assert!(string::<Error<'_>>.parse_peek("\"").is_err());
236         assert!(string::<Error<'_>>.parse_peek("\"abc").is_err());
237         assert!(string::<Error<'_>>.parse_peek("\"\\\"").is_err());
238         assert!(string::<Error<'_>>.parse_peek("\"\\u123\"").is_err());
239         assert!(string::<Error<'_>>.parse_peek("\"\\uD800\"").is_err());
240         assert!(string::<Error<'_>>
241             .parse_peek("\"\\uD800\\uD800\"")
242             .is_err());
243         assert!(string::<Error<'_>>.parse_peek("\"\\uDC00\"").is_err());
244     }
245 
246     #[test]
json_object()247     fn json_object() {
248         use JsonValue::{Num, Object, Str};
249 
250         let input = r#"{"a":42,"b":"x"}"#;
251 
252         let expected = Object(
253             vec![
254                 ("a".to_string(), Num(42.0)),
255                 ("b".to_string(), Str("x".to_string())),
256             ]
257             .into_iter()
258             .collect(),
259         );
260 
261         assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected)));
262     }
263 
264     #[test]
json_array()265     fn json_array() {
266         use JsonValue::{Array, Num, Str};
267 
268         let input = r#"[42,"x"]"#;
269 
270         let expected = Array(vec![Num(42.0), Str("x".to_string())]);
271 
272         assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected)));
273     }
274 
275     #[test]
json_whitespace()276     fn json_whitespace() {
277         use JsonValue::{Array, Boolean, Null, Num, Object, Str};
278 
279         let input = r#"
280   {
281     "null" : null,
282     "true"  :true ,
283     "false":  false  ,
284     "number" : 123e4 ,
285     "string" : " abc 123 " ,
286     "array" : [ false , 1 , "two" ] ,
287     "object" : { "a" : 1.0 , "b" : "c" } ,
288     "empty_array" : [  ] ,
289     "empty_object" : {   }
290   }
291   "#;
292 
293         assert_eq!(
294             json::<Error<'_>>.parse_peek(input),
295             Ok((
296                 "",
297                 Object(
298                     vec![
299                         ("null".to_string(), Null),
300                         ("true".to_string(), Boolean(true)),
301                         ("false".to_string(), Boolean(false)),
302                         ("number".to_string(), Num(123e4)),
303                         ("string".to_string(), Str(" abc 123 ".to_string())),
304                         (
305                             "array".to_string(),
306                             Array(vec![Boolean(false), Num(1.0), Str("two".to_string())])
307                         ),
308                         (
309                             "object".to_string(),
310                             Object(
311                                 vec![
312                                     ("a".to_string(), Num(1.0)),
313                                     ("b".to_string(), Str("c".to_string())),
314                                 ]
315                                 .into_iter()
316                                 .collect()
317                             )
318                         ),
319                         ("empty_array".to_string(), Array(vec![]),),
320                         ("empty_object".to_string(), Object(HashMap::new()),),
321                     ]
322                     .into_iter()
323                     .collect()
324                 )
325             ))
326         );
327     }
328 }
329