1 #[allow(unused, deprecated)]
2 use std::ascii::AsciiExt;
3 use std::error::Error;
4 use std::fmt;
5 use std::iter::Enumerate;
6 use std::str::Bytes;
7 
8 use super::{Mime, MimeIter, Source, ParamSource, Indexed, CHARSET, UTF_8};
9 
10 #[derive(Debug)]
11 pub enum ParseError {
12     MissingSlash,
13     MissingEqual,
14     MissingQuote,
15     InvalidToken {
16         pos: usize,
17         byte: u8,
18     },
19 }
20 
21 impl ParseError {
s(&self) -> &str22     fn s(&self) -> &str {
23         use self::ParseError::*;
24 
25         match *self {
26             MissingSlash => "a slash (/) was missing between the type and subtype",
27             MissingEqual => "an equals sign (=) was missing between a parameter and its value",
28             MissingQuote => "a quote (\") was missing from a parameter value",
29             InvalidToken { .. } => "an invalid token was encountered",
30         }
31     }
32 }
33 
34 impl fmt::Display for ParseError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result35     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36         if let ParseError::InvalidToken { pos, byte } = *self {
37             write!(f, "{}, {:X} at position {}", self.s(), byte, pos)
38         } else {
39             f.write_str(self.s())
40         }
41     }
42 }
43 
44 impl Error for ParseError {
45     // Minimum Rust is 1.15, Error::description was still required then
46     #[allow(deprecated)]
description(&self) -> &str47     fn description(&self) -> &str {
48         self.s()
49     }
50 }
51 
52 impl<'a> MimeIter<'a> {
53     /// A new iterator over mimes or media types
new(s: &'a str) -> Self54     pub fn new(s: &'a str) -> Self {
55         Self {
56             pos: 0,
57             source: s,
58         }
59     }
60 }
61 
62 impl<'a> Iterator for MimeIter<'a> {
63     type Item = Result<Mime, &'a str>;
64 
next(&mut self) -> Option<Self::Item>65     fn next(&mut self) -> Option<Self::Item> {
66         let start = self.pos;
67         let len = self.source.bytes().len();
68 
69         if start >= len {
70             return None
71         }
72 
73         // Try parsing the whole remaining slice, until the end
74         match parse(&self.source[start ..len]) {
75             Ok(value) => {
76                 self.pos = len;
77                 Some(Ok(value))
78             }
79             Err(ParseError::InvalidToken { pos, .. }) => {
80                 // The first token is immediately found to be wrong by `parse`. Skip it
81                 if pos == 0 {
82                     self.pos += 1;
83                     return self.next()
84                 }
85                 let slice = &self.source[start .. start + pos];
86                 // Try parsing the longest slice (until the first invalid token)
87                 return match parse(slice) {
88                     Ok(mime) => {
89                         self.pos = start + pos + 1;
90                         Some(Ok(mime))
91                     }
92                     Err(_) => {
93                         if start + pos < len {
94                             // Skip this invalid slice,
95                             // try parsing the remaining slice in the next iteration
96                             self.pos = start + pos;
97                             Some(Err(slice))
98                         } else {
99                             None
100                         }
101                     }
102                 }
103             }
104             // Do not process any other error condition: the slice is malformed and
105             // no character is found to be invalid: a character is missing
106             Err(_) => None,
107         }
108     }
109 }
110 
parse(s: &str) -> Result<Mime, ParseError>111 pub fn parse(s: &str) -> Result<Mime, ParseError> {
112     if s == "*/*" {
113         return Ok(::STAR_STAR);
114     }
115 
116     let mut iter = s.bytes().enumerate();
117     // toplevel
118     let mut start;
119     let slash;
120     loop {
121         match iter.next() {
122             Some((_, c)) if is_token(c) => (),
123             Some((i, b'/')) if i > 0 => {
124                 slash = i;
125                 start = i + 1;
126                 break;
127             },
128             None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime
129             Some((pos, byte)) => return Err(ParseError::InvalidToken {
130                 pos: pos,
131                 byte: byte,
132             })
133         };
134 
135     }
136 
137     // sublevel
138     let mut plus = None;
139     loop {
140         match iter.next() {
141             Some((i, b'+')) if i > start => {
142                 plus = Some(i);
143             },
144             Some((i, b';')) if i > start => {
145                 start = i;
146                 break;
147             },
148             Some((_, c)) if is_token(c) => (),
149             None => {
150                 return Ok(Mime {
151                     source: Source::Dynamic(s.to_ascii_lowercase()),
152                     slash: slash,
153                     plus: plus,
154                     params: ParamSource::None,
155                 });
156             },
157             Some((pos, byte)) => return Err(ParseError::InvalidToken {
158                 pos: pos,
159                 byte: byte,
160             })
161         };
162     }
163 
164     // params
165     let params = params_from_str(s, &mut iter, start)?;
166 
167     let src = match params {
168         ParamSource::Utf8(_)  => s.to_ascii_lowercase(),
169         ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices),
170         ParamSource::None => {
171             // Chop off the empty list
172             s[..start].to_ascii_lowercase()
173         }
174     };
175 
176     Ok(Mime {
177         source: Source::Dynamic(src),
178         slash: slash,
179         plus: plus,
180         params: params,
181     })
182 }
183 
184 
params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError>185 fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> {
186     let semicolon = start;
187     start += 1;
188     let mut params = ParamSource::None;
189     'params: while start < s.len() {
190         let name;
191         // name
192         'name: loop {
193             match iter.next() {
194                 Some((i, b' ')) if i == start => {
195                     start = i + 1;
196                     continue 'params;
197                 },
198                 Some((_, c)) if is_token(c) => (),
199                 Some((i, b'=')) if i > start => {
200                     name = Indexed(start, i);
201                     start = i + 1;
202                     break 'name;
203                 },
204                 None => return Err(ParseError::MissingEqual),
205                 Some((pos, byte)) => return Err(ParseError::InvalidToken {
206                     pos: pos,
207                     byte: byte,
208                 }),
209             }
210         }
211 
212         let value;
213         // values must be restrict-name-char or "anything goes"
214         let mut is_quoted = false;
215 
216         'value: loop {
217             if is_quoted {
218                 match iter.next() {
219                     Some((i, b'"')) if i > start => {
220                         value = Indexed(start, i);
221                         break 'value;
222                     },
223                     Some((_, c)) if is_restricted_quoted_char(c) => (),
224                     None => return Err(ParseError::MissingQuote),
225                     Some((pos, byte)) => return Err(ParseError::InvalidToken {
226                         pos: pos,
227                         byte: byte,
228                     }),
229                 }
230             } else {
231                 match iter.next() {
232                     Some((i, b'"')) if i == start => {
233                         is_quoted = true;
234                         start = i + 1;
235                     },
236                     Some((_, c)) if is_token(c) => (),
237                     Some((i, b';')) if i > start => {
238                         value = Indexed(start, i);
239                         start = i + 1;
240                         break 'value;
241                     }
242                     None => {
243                         value = Indexed(start, s.len());
244                         start = s.len();
245                         break 'value;
246                     },
247 
248                     Some((pos, byte)) => return Err(ParseError::InvalidToken {
249                         pos: pos,
250                         byte: byte,
251                     }),
252                 }
253             }
254         }
255 
256         if is_quoted {
257             'ws: loop {
258                 match iter.next() {
259                     Some((i, b';')) => {
260                         // next param
261                         start = i + 1;
262                         break 'ws;
263                     },
264                     Some((_, b' ')) => {
265                         // skip whitespace
266                     },
267                     None => {
268                         // eof
269                         start = s.len();
270                         break 'ws;
271                     },
272                     Some((pos, byte)) => return Err(ParseError::InvalidToken {
273                         pos: pos,
274                         byte: byte,
275                     }),
276                 }
277             }
278         }
279 
280         match params {
281             ParamSource::Utf8(i) => {
282                 let i = i + 2;
283                 let charset = Indexed(i, "charset".len() + i);
284                 let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8".len() + 1);
285                 params = ParamSource::Custom(semicolon, vec![
286                     (charset, utf8),
287                     (name, value),
288                 ]);
289             },
290             ParamSource::Custom(_, ref mut vec) => {
291                 vec.push((name, value));
292             },
293             ParamSource::None => {
294                 if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] {
295                     if UTF_8 == &s[value.0..value.1] {
296                         params = ParamSource::Utf8(semicolon);
297                         continue 'params;
298                     }
299                 }
300                 params = ParamSource::Custom(semicolon, vec![(name, value)]);
301             },
302         }
303     }
304     Ok(params)
305 }
306 
lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String307 fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String {
308     let mut owned = s.to_owned();
309     owned[..semi].make_ascii_lowercase();
310 
311     for &(ref name, ref value) in params {
312         owned[name.0..name.1].make_ascii_lowercase();
313         // Since we just converted this part of the string to lowercase,
314         // we can skip the `Name == &str` unicase check and do a faster
315         // memcmp instead.
316         if &owned[name.0..name.1] == CHARSET.source {
317             owned[value.0..value.1].make_ascii_lowercase();
318         }
319     }
320 
321     owned
322 }
323 
324 // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
325 //
326 // > All registered media types MUST be assigned top-level type and
327 // > subtype names.  The combination of these names serves to uniquely
328 // > identify the media type, and the subtype name facet (or the absence
329 // > of one) identifies the registration tree.  Both top-level type and
330 // > subtype names are case-insensitive.
331 // >
332 // > Type and subtype names MUST conform to the following ABNF:
333 // >
334 // >     type-name = restricted-name
335 // >     subtype-name = restricted-name
336 // >
337 // >     restricted-name = restricted-name-first *126restricted-name-chars
338 // >     restricted-name-first  = ALPHA / DIGIT
339 // >     restricted-name-chars  = ALPHA / DIGIT / "!" / "#" /
340 // >                              "$" / "&" / "-" / "^" / "_"
341 // >     restricted-name-chars =/ "." ; Characters before first dot always
342 // >                                  ; specify a facet name
343 // >     restricted-name-chars =/ "+" ; Characters after last plus always
344 // >                                  ; specify a structured syntax suffix
345 
346 // However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1):
347 //
348 // >     media-type = type "/" subtype *( OWS ";" OWS parameter )
349 // >     type       = token
350 // >     subtype    = token
351 // >     parameter  = token "=" ( token / quoted-string )
352 //
353 // Where token is defined as:
354 //
355 // >     token = 1*tchar
356 // >     tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
357 // >        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
358 //
359 // So, clearly, ¯\_(Ä_/¯
360 
361 macro_rules! byte_map {
362     ($($flag:expr,)*) => ([
363         $($flag != 0,)*
364     ])
365 }
366 
367 static TOKEN_MAP: [bool; 256] = byte_map![
368     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
369     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
370     0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
371     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
372     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
373     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
374     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
375     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
376     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
377     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
378     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
380     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
381     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
382     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384 ];
385 
is_token(c: u8) -> bool386 fn is_token(c: u8) -> bool {
387     TOKEN_MAP[c as usize]
388 }
389 
is_restricted_quoted_char(c: u8) -> bool390 fn is_restricted_quoted_char(c: u8) -> bool {
391     c > 31 && c != 127
392 }
393 
394 #[test]
395 #[allow(warnings)] // ... ranges deprecated
test_lookup_tables()396 fn test_lookup_tables() {
397     for (i, &valid) in TOKEN_MAP.iter().enumerate() {
398         let i = i as u8;
399         let should = match i {
400             b'a'...b'z' |
401             b'A'...b'Z' |
402             b'0'...b'9' |
403             b'!' |
404             b'#' |
405             b'$' |
406             b'%' |
407             b'&' |
408             b'\'' |
409             b'*' |
410             b'+' |
411             b'-' |
412             b'.' |
413             b'^' |
414             b'_' |
415             b'`' |
416             b'|' |
417             b'~' => true,
418             _ => false
419         };
420         assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
421     }
422 }
423 
424 #[test]
test_parse_iterator()425 fn test_parse_iterator() {
426     let mut iter = MimeIter::new("application/json, application/json");
427     assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
428     assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
429     assert_eq!(iter.next(), None);
430 
431     let mut iter = MimeIter::new("application/json");
432     assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
433     assert_eq!(iter.next(), None);
434 
435     let mut iter = MimeIter::new("application/json;  ");
436     assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
437     assert_eq!(iter.next(), None);
438 }
439 
440 #[test]
test_parse_iterator_invalid()441 fn test_parse_iterator_invalid() {
442     let mut iter = MimeIter::new("application/json, invalid, application/json");
443     assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
444     assert_eq!(iter.next().unwrap().unwrap_err(), "invalid");
445     assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap());
446     assert_eq!(iter.next(), None);
447 }
448 
449 #[test]
test_parse_iterator_all_invalid()450 fn test_parse_iterator_all_invalid() {
451     let mut iter = MimeIter::new("application/json, text/html");
452     assert_eq!(iter.next().unwrap().unwrap_err(), "application/json");
453     assert_eq!(iter.next(), None);
454 }
455