1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 use std::cmp;
10 use std::fmt::{self, Formatter};
11 use std::net::{Ipv4Addr, Ipv6Addr};
12 
13 use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
14 #[cfg(feature = "serde")]
15 use serde::{Deserialize, Serialize};
16 
17 use crate::parser::{ParseError, ParseResult};
18 
19 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
20 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
21 pub(crate) enum HostInternal {
22     None,
23     Domain,
24     Ipv4(Ipv4Addr),
25     Ipv6(Ipv6Addr),
26 }
27 
28 impl From<Host<String>> for HostInternal {
from(host: Host<String>) -> HostInternal29     fn from(host: Host<String>) -> HostInternal {
30         match host {
31             Host::Domain(ref s) if s.is_empty() => HostInternal::None,
32             Host::Domain(_) => HostInternal::Domain,
33             Host::Ipv4(address) => HostInternal::Ipv4(address),
34             Host::Ipv6(address) => HostInternal::Ipv6(address),
35         }
36     }
37 }
38 
39 /// The host name of an URL.
40 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
41 #[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
42 pub enum Host<S = String> {
43     /// A DNS domain name, as '.' dot-separated labels.
44     /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
45     /// a special URL, or percent encoded for non-special URLs. Hosts for
46     /// non-special URLs are also called opaque hosts.
47     Domain(S),
48 
49     /// An IPv4 address.
50     /// `Url::host_str` returns the serialization of this address,
51     /// as four decimal integers separated by `.` dots.
52     Ipv4(Ipv4Addr),
53 
54     /// An IPv6 address.
55     /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
56     /// in the format per [RFC 5952 *A Recommendation
57     /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
58     /// lowercase hexadecimal with maximal `::` compression.
59     Ipv6(Ipv6Addr),
60 }
61 
62 impl<'a> Host<&'a str> {
63     /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
to_owned(&self) -> Host<String>64     pub fn to_owned(&self) -> Host<String> {
65         match *self {
66             Host::Domain(domain) => Host::Domain(domain.to_owned()),
67             Host::Ipv4(address) => Host::Ipv4(address),
68             Host::Ipv6(address) => Host::Ipv6(address),
69         }
70     }
71 }
72 
73 impl Host<String> {
74     /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
75     ///
76     /// <https://url.spec.whatwg.org/#host-parsing>
parse(input: &str) -> Result<Self, ParseError>77     pub fn parse(input: &str) -> Result<Self, ParseError> {
78         if input.starts_with('[') {
79             if !input.ends_with(']') {
80                 return Err(ParseError::InvalidIpv6Address);
81             }
82             return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
83         }
84         let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
85 
86         let domain = Self::domain_to_ascii(&domain)?;
87 
88         if domain.is_empty() {
89             return Err(ParseError::EmptyHost);
90         }
91 
92         let is_invalid_domain_char = |c| {
93             matches!(
94                 c,
95                 '\0'..='\u{001F}'
96                     | ' '
97                     | '#'
98                     | '%'
99                     | '/'
100                     | ':'
101                     | '<'
102                     | '>'
103                     | '?'
104                     | '@'
105                     | '['
106                     | '\\'
107                     | ']'
108                     | '^'
109                     | '\u{007F}'
110                     | '|'
111             )
112         };
113 
114         if domain.find(is_invalid_domain_char).is_some() {
115             Err(ParseError::InvalidDomainCharacter)
116         } else if ends_in_a_number(&domain) {
117             let address = parse_ipv4addr(&domain)?;
118             Ok(Host::Ipv4(address))
119         } else {
120             Ok(Host::Domain(domain))
121         }
122     }
123 
124     // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
parse_opaque(input: &str) -> Result<Self, ParseError>125     pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
126         if input.starts_with('[') {
127             if !input.ends_with(']') {
128                 return Err(ParseError::InvalidIpv6Address);
129             }
130             return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
131         }
132 
133         let is_invalid_host_char = |c| {
134             matches!(
135                 c,
136                 '\0' | '\t'
137                     | '\n'
138                     | '\r'
139                     | ' '
140                     | '#'
141                     | '/'
142                     | ':'
143                     | '<'
144                     | '>'
145                     | '?'
146                     | '@'
147                     | '['
148                     | '\\'
149                     | ']'
150                     | '^'
151                     | '|'
152             )
153         };
154 
155         if input.find(is_invalid_host_char).is_some() {
156             Err(ParseError::InvalidDomainCharacter)
157         } else {
158             Ok(Host::Domain(
159                 utf8_percent_encode(input, CONTROLS).to_string(),
160             ))
161         }
162     }
163 
164     /// convert domain with idna
domain_to_ascii(domain: &str) -> Result<String, ParseError>165     fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
166         idna::domain_to_ascii(domain).map_err(Into::into)
167     }
168 }
169 
170 impl<S: AsRef<str>> fmt::Display for Host<S> {
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result171     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
172         match *self {
173             Host::Domain(ref domain) => domain.as_ref().fmt(f),
174             Host::Ipv4(ref addr) => addr.fmt(f),
175             Host::Ipv6(ref addr) => {
176                 f.write_str("[")?;
177                 write_ipv6(addr, f)?;
178                 f.write_str("]")
179             }
180         }
181     }
182 }
183 
184 impl<S, T> PartialEq<Host<T>> for Host<S>
185 where
186     S: PartialEq<T>,
187 {
eq(&self, other: &Host<T>) -> bool188     fn eq(&self, other: &Host<T>) -> bool {
189         match (self, other) {
190             (Host::Domain(a), Host::Domain(b)) => a == b,
191             (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
192             (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
193             (_, _) => false,
194         }
195     }
196 }
197 
write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result198 fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
199     let segments = addr.segments();
200     let (compress_start, compress_end) = longest_zero_sequence(&segments);
201     let mut i = 0;
202     while i < 8 {
203         if i == compress_start {
204             f.write_str(":")?;
205             if i == 0 {
206                 f.write_str(":")?;
207             }
208             if compress_end < 8 {
209                 i = compress_end;
210             } else {
211                 break;
212             }
213         }
214         write!(f, "{:x}", segments[i as usize])?;
215         if i < 7 {
216             f.write_str(":")?;
217         }
218         i += 1;
219     }
220     Ok(())
221 }
222 
223 // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize)224 fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
225     let mut longest = -1;
226     let mut longest_length = -1;
227     let mut start = -1;
228     macro_rules! finish_sequence(
229         ($end: expr) => {
230             if start >= 0 {
231                 let length = $end - start;
232                 if length > longest_length {
233                     longest = start;
234                     longest_length = length;
235                 }
236             }
237         };
238     );
239     for i in 0..8 {
240         if pieces[i as usize] == 0 {
241             if start < 0 {
242                 start = i;
243             }
244         } else {
245             finish_sequence!(i);
246             start = -1;
247         }
248     }
249     finish_sequence!(8);
250     // https://url.spec.whatwg.org/#concept-ipv6-serializer
251     // step 3: ignore lone zeroes
252     if longest_length < 2 {
253         (-1, -2)
254     } else {
255         (longest, longest + longest_length)
256     }
257 }
258 
259 /// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
ends_in_a_number(input: &str) -> bool260 fn ends_in_a_number(input: &str) -> bool {
261     let mut parts = input.rsplit('.');
262     let last = parts.next().unwrap();
263     let last = if last.is_empty() {
264         if let Some(last) = parts.next() {
265             last
266         } else {
267             return false;
268         }
269     } else {
270         last
271     };
272     if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) {
273         return true;
274     }
275 
276     parse_ipv4number(last).is_ok()
277 }
278 
279 /// <https://url.spec.whatwg.org/#ipv4-number-parser>
280 /// Ok(None) means the input is a valid number, but it overflows a `u32`.
parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()>281 fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
282     if input.is_empty() {
283         return Err(());
284     }
285 
286     let mut r = 10;
287     if input.starts_with("0x") || input.starts_with("0X") {
288         input = &input[2..];
289         r = 16;
290     } else if input.len() >= 2 && input.starts_with('0') {
291         input = &input[1..];
292         r = 8;
293     }
294 
295     if input.is_empty() {
296         return Ok(Some(0));
297     }
298 
299     let valid_number = match r {
300         8 => input.chars().all(|c| ('0'..='7').contains(&c)),
301         10 => input.chars().all(|c| ('0'..='9').contains(&c)),
302         16 => input.chars().all(|c| {
303             ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
304         }),
305         _ => false,
306     };
307     if !valid_number {
308         return Err(());
309     }
310 
311     match u32::from_str_radix(input, r) {
312         Ok(num) => Ok(Some(num)),
313         Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
314                             // The validity of the chars in the input is checked above.
315     }
316 }
317 
318 /// <https://url.spec.whatwg.org/#concept-ipv4-parser>
parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr>319 fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
320     let mut parts: Vec<&str> = input.split('.').collect();
321     if parts.last() == Some(&"") {
322         parts.pop();
323     }
324     if parts.len() > 4 {
325         return Err(ParseError::InvalidIpv4Address);
326     }
327     let mut numbers: Vec<u32> = Vec::new();
328     for part in parts {
329         match parse_ipv4number(part) {
330             Ok(Some(n)) => numbers.push(n),
331             Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
332             Err(()) => return Err(ParseError::InvalidIpv4Address),
333         };
334     }
335     let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
336     // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
337     if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
338         return Err(ParseError::InvalidIpv4Address);
339     }
340     if numbers.iter().any(|x| *x > 255) {
341         return Err(ParseError::InvalidIpv4Address);
342     }
343     for (counter, n) in numbers.iter().enumerate() {
344         ipv4 += n << (8 * (3 - counter as u32))
345     }
346     Ok(Ipv4Addr::from(ipv4))
347 }
348 
349 /// <https://url.spec.whatwg.org/#concept-ipv6-parser>
parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr>350 fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
351     let input = input.as_bytes();
352     let len = input.len();
353     let mut is_ip_v4 = false;
354     let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
355     let mut piece_pointer = 0;
356     let mut compress_pointer = None;
357     let mut i = 0;
358 
359     if len < 2 {
360         return Err(ParseError::InvalidIpv6Address);
361     }
362 
363     if input[0] == b':' {
364         if input[1] != b':' {
365             return Err(ParseError::InvalidIpv6Address);
366         }
367         i = 2;
368         piece_pointer = 1;
369         compress_pointer = Some(1);
370     }
371 
372     while i < len {
373         if piece_pointer == 8 {
374             return Err(ParseError::InvalidIpv6Address);
375         }
376         if input[i] == b':' {
377             if compress_pointer.is_some() {
378                 return Err(ParseError::InvalidIpv6Address);
379             }
380             i += 1;
381             piece_pointer += 1;
382             compress_pointer = Some(piece_pointer);
383             continue;
384         }
385         let start = i;
386         let end = cmp::min(len, start + 4);
387         let mut value = 0u16;
388         while i < end {
389             match (input[i] as char).to_digit(16) {
390                 Some(digit) => {
391                     value = value * 0x10 + digit as u16;
392                     i += 1;
393                 }
394                 None => break,
395             }
396         }
397         if i < len {
398             match input[i] {
399                 b'.' => {
400                     if i == start {
401                         return Err(ParseError::InvalidIpv6Address);
402                     }
403                     i = start;
404                     if piece_pointer > 6 {
405                         return Err(ParseError::InvalidIpv6Address);
406                     }
407                     is_ip_v4 = true;
408                 }
409                 b':' => {
410                     i += 1;
411                     if i == len {
412                         return Err(ParseError::InvalidIpv6Address);
413                     }
414                 }
415                 _ => return Err(ParseError::InvalidIpv6Address),
416             }
417         }
418         if is_ip_v4 {
419             break;
420         }
421         pieces[piece_pointer] = value;
422         piece_pointer += 1;
423     }
424 
425     if is_ip_v4 {
426         if piece_pointer > 6 {
427             return Err(ParseError::InvalidIpv6Address);
428         }
429         let mut numbers_seen = 0;
430         while i < len {
431             if numbers_seen > 0 {
432                 if numbers_seen < 4 && (i < len && input[i] == b'.') {
433                     i += 1
434                 } else {
435                     return Err(ParseError::InvalidIpv6Address);
436                 }
437             }
438 
439             let mut ipv4_piece = None;
440             while i < len {
441                 let digit = match input[i] {
442                     c @ b'0'..=b'9' => c - b'0',
443                     _ => break,
444                 };
445                 match ipv4_piece {
446                     None => ipv4_piece = Some(digit as u16),
447                     Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
448                     Some(ref mut v) => {
449                         *v = *v * 10 + digit as u16;
450                         if *v > 255 {
451                             return Err(ParseError::InvalidIpv6Address);
452                         }
453                     }
454                 }
455                 i += 1;
456             }
457 
458             pieces[piece_pointer] = if let Some(v) = ipv4_piece {
459                 pieces[piece_pointer] * 0x100 + v
460             } else {
461                 return Err(ParseError::InvalidIpv6Address);
462             };
463             numbers_seen += 1;
464 
465             if numbers_seen == 2 || numbers_seen == 4 {
466                 piece_pointer += 1;
467             }
468         }
469 
470         if numbers_seen != 4 {
471             return Err(ParseError::InvalidIpv6Address);
472         }
473     }
474 
475     if i < len {
476         return Err(ParseError::InvalidIpv6Address);
477     }
478 
479     match compress_pointer {
480         Some(compress_pointer) => {
481             let mut swaps = piece_pointer - compress_pointer;
482             piece_pointer = 7;
483             while swaps > 0 {
484                 pieces.swap(piece_pointer, compress_pointer + swaps - 1);
485                 swaps -= 1;
486                 piece_pointer -= 1;
487             }
488         }
489         _ => {
490             if piece_pointer != 8 {
491                 return Err(ParseError::InvalidIpv6Address);
492             }
493         }
494     }
495     Ok(Ipv6Addr::new(
496         pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
497     ))
498 }
499