1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
10 //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
11 //! as used by HTML forms.
12 //!
13 //! Converts between a string (such as an URL’s query string)
14 //! and a sequence of (name, value) pairs.
15 #![no_std]
16 
17 // For forwards compatibility
18 #[cfg(feature = "std")]
19 extern crate std as _;
20 
21 extern crate alloc;
22 
23 #[cfg(not(feature = "alloc"))]
24 compile_error!("the `alloc` feature must currently be enabled");
25 
26 use alloc::borrow::{Borrow, Cow, ToOwned};
27 use alloc::string::String;
28 use core::str;
29 use percent_encoding::{percent_decode, percent_encode_byte};
30 
31 /// Convert a byte string in the `application/x-www-form-urlencoded` syntax
32 /// into a iterator of (name, value) pairs.
33 ///
34 /// Use `parse(input.as_bytes())` to parse a `&str` string.
35 ///
36 /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
37 /// converted to `[("#first", "%try%")]`.
38 #[inline]
parse(input: &[u8]) -> Parse<'_>39 pub fn parse(input: &[u8]) -> Parse<'_> {
40     Parse { input }
41 }
42 /// The return type of `parse()`.
43 #[derive(Copy, Clone)]
44 pub struct Parse<'a> {
45     input: &'a [u8],
46 }
47 
48 impl<'a> Iterator for Parse<'a> {
49     type Item = (Cow<'a, str>, Cow<'a, str>);
50 
next(&mut self) -> Option<Self::Item>51     fn next(&mut self) -> Option<Self::Item> {
52         loop {
53             if self.input.is_empty() {
54                 return None;
55             }
56             let mut split2 = self.input.splitn(2, |&b| b == b'&');
57             let sequence = split2.next().unwrap();
58             self.input = split2.next().unwrap_or(&[][..]);
59             if sequence.is_empty() {
60                 continue;
61             }
62             let mut split2 = sequence.splitn(2, |&b| b == b'=');
63             let name = split2.next().unwrap();
64             let value = split2.next().unwrap_or(&[][..]);
65             return Some((decode(name), decode(value)));
66         }
67     }
68 }
69 
decode(input: &[u8]) -> Cow<'_, str>70 fn decode(input: &[u8]) -> Cow<'_, str> {
71     let replaced = replace_plus(input);
72     decode_utf8_lossy(match percent_decode(&replaced).into() {
73         Cow::Owned(vec) => Cow::Owned(vec),
74         Cow::Borrowed(_) => replaced,
75     })
76 }
77 
78 /// Replace b'+' with b' '
replace_plus(input: &[u8]) -> Cow<'_, [u8]>79 fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
80     match input.iter().position(|&b| b == b'+') {
81         None => Cow::Borrowed(input),
82         Some(first_position) => {
83             let mut replaced = input.to_owned();
84             replaced[first_position] = b' ';
85             for byte in &mut replaced[first_position + 1..] {
86                 if *byte == b'+' {
87                     *byte = b' ';
88                 }
89             }
90             Cow::Owned(replaced)
91         }
92     }
93 }
94 
95 impl<'a> Parse<'a> {
96     /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
into_owned(self) -> ParseIntoOwned<'a>97     pub fn into_owned(self) -> ParseIntoOwned<'a> {
98         ParseIntoOwned { inner: self }
99     }
100 }
101 
102 /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
103 pub struct ParseIntoOwned<'a> {
104     inner: Parse<'a>,
105 }
106 
107 impl<'a> Iterator for ParseIntoOwned<'a> {
108     type Item = (String, String);
109 
next(&mut self) -> Option<Self::Item>110     fn next(&mut self) -> Option<Self::Item> {
111         self.inner
112             .next()
113             .map(|(k, v)| (k.into_owned(), v.into_owned()))
114     }
115 }
116 
117 /// The [`application/x-www-form-urlencoded` byte serializer](
118 /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
119 ///
120 /// Return an iterator of `&str` slices.
byte_serialize(input: &[u8]) -> ByteSerialize<'_>121 pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
122     ByteSerialize { bytes: input }
123 }
124 
125 /// Return value of `byte_serialize()`.
126 #[derive(Debug)]
127 pub struct ByteSerialize<'a> {
128     bytes: &'a [u8],
129 }
130 
byte_serialized_unchanged(byte: u8) -> bool131 fn byte_serialized_unchanged(byte: u8) -> bool {
132     matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
133 }
134 
135 impl<'a> Iterator for ByteSerialize<'a> {
136     type Item = &'a str;
137 
next(&mut self) -> Option<&'a str>138     fn next(&mut self) -> Option<&'a str> {
139         if let Some((&first, tail)) = self.bytes.split_first() {
140             if !byte_serialized_unchanged(first) {
141                 self.bytes = tail;
142                 return Some(if first == b' ' {
143                     "+"
144                 } else {
145                     percent_encode_byte(first)
146                 });
147             }
148             let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
149             let (unchanged_slice, remaining) = match position {
150                 // 1 for first_byte + i unchanged in tail
151                 Some(i) => self.bytes.split_at(1 + i),
152                 None => (self.bytes, &[][..]),
153             };
154             self.bytes = remaining;
155             // This unsafe is appropriate because we have already checked these
156             // bytes in byte_serialized_unchanged, which checks for a subset
157             // of UTF-8. So we know these bytes are valid UTF-8, and doing
158             // another UTF-8 check would be wasteful.
159             Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
160         } else {
161             None
162         }
163     }
164 
size_hint(&self) -> (usize, Option<usize>)165     fn size_hint(&self) -> (usize, Option<usize>) {
166         if self.bytes.is_empty() {
167             (0, Some(0))
168         } else {
169             (1, Some(self.bytes.len()))
170         }
171     }
172 }
173 
174 /// The [`application/x-www-form-urlencoded` serializer](
175 /// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
176 pub struct Serializer<'a, T: Target> {
177     target: Option<T>,
178     start_position: usize,
179     encoding: EncodingOverride<'a>,
180 }
181 
182 pub trait Target {
as_mut_string(&mut self) -> &mut String183     fn as_mut_string(&mut self) -> &mut String;
finish(self) -> Self::Finished184     fn finish(self) -> Self::Finished;
185     type Finished;
186 }
187 
188 impl Target for String {
as_mut_string(&mut self) -> &mut String189     fn as_mut_string(&mut self) -> &mut String {
190         self
191     }
finish(self) -> Self192     fn finish(self) -> Self {
193         self
194     }
195     type Finished = Self;
196 }
197 
198 impl<'a> Target for &'a mut String {
as_mut_string(&mut self) -> &mut String199     fn as_mut_string(&mut self) -> &mut String {
200         self
201     }
finish(self) -> Self202     fn finish(self) -> Self {
203         self
204     }
205     type Finished = Self;
206 }
207 
208 impl<'a, T: Target> Serializer<'a, T> {
209     /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
210     ///
211     /// If the target is non-empty,
212     /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
new(target: T) -> Self213     pub fn new(target: T) -> Self {
214         Self::for_suffix(target, 0)
215     }
216 
217     /// Create a new `application/x-www-form-urlencoded` serializer
218     /// for a suffix of the given target.
219     ///
220     /// If that suffix is non-empty,
221     /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
for_suffix(mut target: T, start_position: usize) -> Self222     pub fn for_suffix(mut target: T, start_position: usize) -> Self {
223         if target.as_mut_string().len() < start_position {
224             panic!(
225                 "invalid length {} for target of length {}",
226                 start_position,
227                 target.as_mut_string().len()
228             );
229         }
230 
231         Serializer {
232             target: Some(target),
233             start_position,
234             encoding: None,
235         }
236     }
237 
238     /// Remove any existing name/value pair.
239     ///
240     /// Panics if called after `.finish()`.
clear(&mut self) -> &mut Self241     pub fn clear(&mut self) -> &mut Self {
242         string(&mut self.target).truncate(self.start_position);
243         self
244     }
245 
246     /// Set the character encoding to be used for names and values before percent-encoding.
encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self247     pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
248         self.encoding = new;
249         self
250     }
251 
252     /// Serialize and append a name/value pair.
253     ///
254     /// Panics if called after `.finish()`.
append_pair(&mut self, name: &str, value: &str) -> &mut Self255     pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
256         append_pair(
257             string(&mut self.target),
258             self.start_position,
259             self.encoding,
260             name,
261             value,
262         );
263         self
264     }
265 
266     /// Serialize and append a name of parameter without any value.
267     ///
268     /// Panics if called after `.finish()`.
append_key_only(&mut self, name: &str) -> &mut Self269     pub fn append_key_only(&mut self, name: &str) -> &mut Self {
270         append_key_only(
271             string(&mut self.target),
272             self.start_position,
273             self.encoding,
274             name,
275         );
276         self
277     }
278 
279     /// Serialize and append a number of name/value pairs.
280     ///
281     /// This simply calls `append_pair` repeatedly.
282     /// This can be more convenient, so the user doesn’t need to introduce a block
283     /// to limit the scope of `Serializer`’s borrow of its string.
284     ///
285     /// Panics if called after `.finish()`.
extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,286     pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
287     where
288         I: IntoIterator,
289         I::Item: Borrow<(K, V)>,
290         K: AsRef<str>,
291         V: AsRef<str>,
292     {
293         {
294             let string = string(&mut self.target);
295             for pair in iter {
296                 let (k, v) = pair.borrow();
297                 append_pair(
298                     string,
299                     self.start_position,
300                     self.encoding,
301                     k.as_ref(),
302                     v.as_ref(),
303                 );
304             }
305         }
306         self
307     }
308 
309     /// Serialize and append a number of names without values.
310     ///
311     /// This simply calls `append_key_only` repeatedly.
312     /// This can be more convenient, so the user doesn’t need to introduce a block
313     /// to limit the scope of `Serializer`’s borrow of its string.
314     ///
315     /// Panics if called after `.finish()`.
extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self where I: IntoIterator, I::Item: Borrow<K>, K: AsRef<str>,316     pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self
317     where
318         I: IntoIterator,
319         I::Item: Borrow<K>,
320         K: AsRef<str>,
321     {
322         {
323             let string = string(&mut self.target);
324             for key in iter {
325                 let k = key.borrow().as_ref();
326                 append_key_only(string, self.start_position, self.encoding, k);
327             }
328         }
329         self
330     }
331 
332     /// If this serializer was constructed with a string, take and return that string.
333     ///
334     /// ```rust
335     /// use form_urlencoded;
336     /// let encoded: String = form_urlencoded::Serializer::new(String::new())
337     ///     .append_pair("foo", "bar & baz")
338     ///     .append_pair("saison", "Été+hiver")
339     ///     .finish();
340     /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
341     /// ```
342     ///
343     /// Panics if called more than once.
finish(&mut self) -> T::Finished344     pub fn finish(&mut self) -> T::Finished {
345         self.target
346             .take()
347             .expect("url::form_urlencoded::Serializer double finish")
348             .finish()
349     }
350 }
351 
append_separator_if_needed(string: &mut String, start_position: usize)352 fn append_separator_if_needed(string: &mut String, start_position: usize) {
353     if string.len() > start_position {
354         string.push('&')
355     }
356 }
357 
string<T: Target>(target: &mut Option<T>) -> &mut String358 fn string<T: Target>(target: &mut Option<T>) -> &mut String {
359     target
360         .as_mut()
361         .expect("url::form_urlencoded::Serializer finished")
362         .as_mut_string()
363 }
364 
append_pair( string: &mut String, start_position: usize, encoding: EncodingOverride<'_>, name: &str, value: &str, )365 fn append_pair(
366     string: &mut String,
367     start_position: usize,
368     encoding: EncodingOverride<'_>,
369     name: &str,
370     value: &str,
371 ) {
372     append_separator_if_needed(string, start_position);
373     append_encoded(name, string, encoding);
374     string.push('=');
375     append_encoded(value, string, encoding);
376 }
377 
append_key_only( string: &mut String, start_position: usize, encoding: EncodingOverride, name: &str, )378 fn append_key_only(
379     string: &mut String,
380     start_position: usize,
381     encoding: EncodingOverride,
382     name: &str,
383 ) {
384     append_separator_if_needed(string, start_position);
385     append_encoded(name, string, encoding);
386 }
387 
append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>)388 fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) {
389     string.extend(byte_serialize(&encode(encoding, s)))
390 }
391 
encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]>392 pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> {
393     if let Some(o) = encoding_override {
394         return o(input);
395     }
396     input.as_bytes().into()
397 }
398 
decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str>399 pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
400     // Note: This function is duplicated in `percent_encoding/lib.rs`.
401     match input {
402         Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
403         Cow::Owned(bytes) => {
404             match String::from_utf8_lossy(&bytes) {
405                 Cow::Borrowed(utf8) => {
406                     // If from_utf8_lossy returns a Cow::Borrowed, then we can
407                     // be sure our original bytes were valid UTF-8. This is because
408                     // if the bytes were invalid UTF-8 from_utf8_lossy would have
409                     // to allocate a new owned string to back the Cow so it could
410                     // replace invalid bytes with a placeholder.
411 
412                     // First we do a debug_assert to confirm our description above.
413                     let raw_utf8: *const [u8] = utf8.as_bytes();
414                     debug_assert!(raw_utf8 == &*bytes as *const [u8]);
415 
416                     // Given we know the original input bytes are valid UTF-8,
417                     // and we have ownership of those bytes, we re-use them and
418                     // return a Cow::Owned here.
419                     Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
420                 }
421                 Cow::Owned(s) => Cow::Owned(s),
422             }
423         }
424     }
425 }
426 
427 pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;
428