1 use std::collections::HashMap;
2 use std::str;
3
4 use winnow::prelude::*;
5 use winnow::{
6 ascii::float,
7 combinator::alt,
8 combinator::cut_err,
9 combinator::{delimited, preceded, separated_pair, terminated},
10 combinator::{repeat, separated},
11 error::{AddContext, ParserError},
12 token::{any, none_of, take, take_while},
13 };
14
15 use crate::json::JsonValue;
16
17 pub type Stream<'i> = &'i str;
18
19 /// The root element of a JSON parser is any value
20 ///
21 /// A parser has the following signature:
22 /// `&mut Stream -> PResult<Output, InputError>`, with `PResult` defined as:
23 /// `type PResult<O, E = (I, ErrorKind)> = Result<O, Err<E>>;`
24 ///
25 /// most of the times you can ignore the error type and use the default (but this
26 /// examples shows custom error types later on!)
27 ///
28 /// Here we use `&str` as input type, but parsers can be generic over
29 /// the input type, work directly with `&[u8]`, or any other type that
30 /// implements the required traits.
json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>31 pub fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
32 input: &mut Stream<'i>,
33 ) -> PResult<JsonValue, E> {
34 delimited(ws, json_value, ws).parse_next(input)
35 }
36
37 /// `alt` is a combinator that tries multiple parsers one by one, until
38 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>39 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
40 input: &mut Stream<'i>,
41 ) -> PResult<JsonValue, E> {
42 // `alt` combines the each value parser. It returns the result of the first
43 // successful parser, or an error
44 alt((
45 null.value(JsonValue::Null),
46 boolean.map(JsonValue::Boolean),
47 string.map(JsonValue::Str),
48 float.map(JsonValue::Num),
49 array.map(JsonValue::Array),
50 object.map(JsonValue::Object),
51 ))
52 .parse_next(input)
53 }
54
55 /// `tag(string)` generates a parser that recognizes the argument string.
56 ///
57 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>58 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
59 // This is a parser that returns `"null"` if it sees the string "null", and
60 // an error otherwise
61 "null".parse_next(input)
62 }
63
64 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
65 /// success.
boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>66 fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
67 // This is a parser that returns `true` if it sees the string "true", and
68 // an error otherwise
69 let parse_true = "true".value(true);
70
71 // This is a parser that returns `false` if it sees the string "false", and
72 // an error otherwise
73 let parse_false = "false".value(false);
74
75 alt((parse_true, parse_false)).parse_next(input)
76 }
77
78 /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote
79 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<String, E>80 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
81 input: &mut Stream<'i>,
82 ) -> PResult<String, E> {
83 preceded(
84 '\"',
85 // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
86 // combinators like `alt` that they should not try other parsers. We were in the
87 // right branch (since we found the `"` character) but encountered an error when
88 // parsing the string
89 cut_err(terminated(
90 repeat(0.., character).fold(String::new, |mut string, c| {
91 string.push(c);
92 string
93 }),
94 '\"',
95 )),
96 )
97 // `context` lets you add a static string to errors to provide more information in the
98 // error chain (to indicate which parser had an error)
99 .context("string")
100 .parse_next(input)
101 }
102
103 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
104 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>105 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
106 let c = none_of('\"').parse_next(input)?;
107 if c == '\\' {
108 alt((
109 any.verify_map(|c| {
110 Some(match c {
111 '"' | '\\' | '/' => c,
112 'b' => '\x08',
113 'f' => '\x0C',
114 'n' => '\n',
115 'r' => '\r',
116 't' => '\t',
117 _ => return None,
118 })
119 }),
120 preceded('u', unicode_escape),
121 ))
122 .parse_next(input)
123 } else {
124 Ok(c)
125 }
126 }
127
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>128 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
129 alt((
130 // Not a surrogate
131 u16_hex
132 .verify(|cp| !(0xD800..0xE000).contains(cp))
133 .map(|cp| cp as u32),
134 // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
135 separated_pair(u16_hex, "\\u", u16_hex)
136 .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
137 .map(|(high, low)| {
138 let high_ten = (high as u32) - 0xD800;
139 let low_ten = (low as u32) - 0xDC00;
140 (high_ten << 10) + low_ten + 0x10000
141 }),
142 ))
143 .verify_map(
144 // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
145 std::char::from_u32,
146 )
147 .parse_next(input)
148 }
149
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>150 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
151 take(4usize)
152 .verify_map(|s| u16::from_str_radix(s, 16).ok())
153 .parse_next(input)
154 }
155
156 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
157 /// accumulating results in a `Vec`, until it encounters an error.
158 /// If you want more control on the parser application, check out the `iterator`
159 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>160 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
161 input: &mut Stream<'i>,
162 ) -> PResult<Vec<JsonValue>, E> {
163 preceded(
164 ('[', ws),
165 cut_err(terminated(
166 separated(0.., json_value, (ws, ',', ws)),
167 (ws, ']'),
168 )),
169 )
170 .context("array")
171 .parse_next(input)
172 }
173
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>174 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
175 input: &mut Stream<'i>,
176 ) -> PResult<HashMap<String, JsonValue>, E> {
177 preceded(
178 ('{', ws),
179 cut_err(terminated(
180 separated(0.., key_value, (ws, ',', ws)),
181 (ws, '}'),
182 )),
183 )
184 .context("object")
185 .parse_next(input)
186 }
187
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>188 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
189 input: &mut Stream<'i>,
190 ) -> PResult<(String, JsonValue), E> {
191 separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
192 }
193
194 /// Parser combinators are constructed from the bottom up:
195 /// first we write parsers for the smallest elements (here a space character),
196 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>197 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
198 // Combinators like `take_while` return a function. That function is the
199 // parser,to which we can pass the input
200 take_while(0.., WS).parse_next(input)
201 }
202
203 const WS: &[char] = &[' ', '\t', '\r', '\n'];
204
205 #[cfg(test)]
206 mod test {
207 #[allow(clippy::useless_attribute)]
208 #[allow(dead_code)] // its dead for benches
209 use super::*;
210
211 #[allow(clippy::useless_attribute)]
212 #[allow(dead_code)] // its dead for benches
213 type Error<'i> = winnow::error::InputError<&'i str>;
214
215 #[test]
json_string()216 fn json_string() {
217 assert_eq!(
218 string::<Error<'_>>.parse_peek("\"\""),
219 Ok(("", "".to_string()))
220 );
221 assert_eq!(
222 string::<Error<'_>>.parse_peek("\"abc\""),
223 Ok(("", "abc".to_string()))
224 );
225 assert_eq!(
226 string::<Error<'_>>
227 .parse_peek("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""),
228 Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())),
229 );
230 assert_eq!(
231 string::<Error<'_>>.parse_peek("\"\\uD83D\\uDE10\""),
232 Ok(("", "".to_string()))
233 );
234
235 assert!(string::<Error<'_>>.parse_peek("\"").is_err());
236 assert!(string::<Error<'_>>.parse_peek("\"abc").is_err());
237 assert!(string::<Error<'_>>.parse_peek("\"\\\"").is_err());
238 assert!(string::<Error<'_>>.parse_peek("\"\\u123\"").is_err());
239 assert!(string::<Error<'_>>.parse_peek("\"\\uD800\"").is_err());
240 assert!(string::<Error<'_>>
241 .parse_peek("\"\\uD800\\uD800\"")
242 .is_err());
243 assert!(string::<Error<'_>>.parse_peek("\"\\uDC00\"").is_err());
244 }
245
246 #[test]
json_object()247 fn json_object() {
248 use JsonValue::{Num, Object, Str};
249
250 let input = r#"{"a":42,"b":"x"}"#;
251
252 let expected = Object(
253 vec![
254 ("a".to_string(), Num(42.0)),
255 ("b".to_string(), Str("x".to_string())),
256 ]
257 .into_iter()
258 .collect(),
259 );
260
261 assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected)));
262 }
263
264 #[test]
json_array()265 fn json_array() {
266 use JsonValue::{Array, Num, Str};
267
268 let input = r#"[42,"x"]"#;
269
270 let expected = Array(vec![Num(42.0), Str("x".to_string())]);
271
272 assert_eq!(json::<Error<'_>>.parse_peek(input), Ok(("", expected)));
273 }
274
275 #[test]
json_whitespace()276 fn json_whitespace() {
277 use JsonValue::{Array, Boolean, Null, Num, Object, Str};
278
279 let input = r#"
280 {
281 "null" : null,
282 "true" :true ,
283 "false": false ,
284 "number" : 123e4 ,
285 "string" : " abc 123 " ,
286 "array" : [ false , 1 , "two" ] ,
287 "object" : { "a" : 1.0 , "b" : "c" } ,
288 "empty_array" : [ ] ,
289 "empty_object" : { }
290 }
291 "#;
292
293 assert_eq!(
294 json::<Error<'_>>.parse_peek(input),
295 Ok((
296 "",
297 Object(
298 vec![
299 ("null".to_string(), Null),
300 ("true".to_string(), Boolean(true)),
301 ("false".to_string(), Boolean(false)),
302 ("number".to_string(), Num(123e4)),
303 ("string".to_string(), Str(" abc 123 ".to_string())),
304 (
305 "array".to_string(),
306 Array(vec![Boolean(false), Num(1.0), Str("two".to_string())])
307 ),
308 (
309 "object".to_string(),
310 Object(
311 vec![
312 ("a".to_string(), Num(1.0)),
313 ("b".to_string(), Str("c".to_string())),
314 ]
315 .into_iter()
316 .collect()
317 )
318 ),
319 ("empty_array".to_string(), Array(vec![]),),
320 ("empty_object".to_string(), Object(HashMap::new()),),
321 ]
322 .into_iter()
323 .collect()
324 )
325 ))
326 );
327 }
328 }
329