1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 use std::cmp;
10 use std::fmt::{self, Formatter};
11 use std::net::{Ipv4Addr, Ipv6Addr};
12
13 use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
14 #[cfg(feature = "serde")]
15 use serde::{Deserialize, Serialize};
16
17 use crate::parser::{ParseError, ParseResult};
18
19 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
20 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
21 pub(crate) enum HostInternal {
22 None,
23 Domain,
24 Ipv4(Ipv4Addr),
25 Ipv6(Ipv6Addr),
26 }
27
28 impl From<Host<String>> for HostInternal {
from(host: Host<String>) -> HostInternal29 fn from(host: Host<String>) -> HostInternal {
30 match host {
31 Host::Domain(ref s) if s.is_empty() => HostInternal::None,
32 Host::Domain(_) => HostInternal::Domain,
33 Host::Ipv4(address) => HostInternal::Ipv4(address),
34 Host::Ipv6(address) => HostInternal::Ipv6(address),
35 }
36 }
37 }
38
39 /// The host name of an URL.
40 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
41 #[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
42 pub enum Host<S = String> {
43 /// A DNS domain name, as '.' dot-separated labels.
44 /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
45 /// a special URL, or percent encoded for non-special URLs. Hosts for
46 /// non-special URLs are also called opaque hosts.
47 Domain(S),
48
49 /// An IPv4 address.
50 /// `Url::host_str` returns the serialization of this address,
51 /// as four decimal integers separated by `.` dots.
52 Ipv4(Ipv4Addr),
53
54 /// An IPv6 address.
55 /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
56 /// in the format per [RFC 5952 *A Recommendation
57 /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
58 /// lowercase hexadecimal with maximal `::` compression.
59 Ipv6(Ipv6Addr),
60 }
61
62 impl<'a> Host<&'a str> {
63 /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
to_owned(&self) -> Host<String>64 pub fn to_owned(&self) -> Host<String> {
65 match *self {
66 Host::Domain(domain) => Host::Domain(domain.to_owned()),
67 Host::Ipv4(address) => Host::Ipv4(address),
68 Host::Ipv6(address) => Host::Ipv6(address),
69 }
70 }
71 }
72
73 impl Host<String> {
74 /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
75 ///
76 /// <https://url.spec.whatwg.org/#host-parsing>
parse(input: &str) -> Result<Self, ParseError>77 pub fn parse(input: &str) -> Result<Self, ParseError> {
78 if input.starts_with('[') {
79 if !input.ends_with(']') {
80 return Err(ParseError::InvalidIpv6Address);
81 }
82 return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
83 }
84 let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
85
86 let domain = Self::domain_to_ascii(&domain)?;
87
88 if domain.is_empty() {
89 return Err(ParseError::EmptyHost);
90 }
91
92 let is_invalid_domain_char = |c| {
93 matches!(
94 c,
95 '\0'..='\u{001F}'
96 | ' '
97 | '#'
98 | '%'
99 | '/'
100 | ':'
101 | '<'
102 | '>'
103 | '?'
104 | '@'
105 | '['
106 | '\\'
107 | ']'
108 | '^'
109 | '\u{007F}'
110 | '|'
111 )
112 };
113
114 if domain.find(is_invalid_domain_char).is_some() {
115 Err(ParseError::InvalidDomainCharacter)
116 } else if ends_in_a_number(&domain) {
117 let address = parse_ipv4addr(&domain)?;
118 Ok(Host::Ipv4(address))
119 } else {
120 Ok(Host::Domain(domain))
121 }
122 }
123
124 // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
parse_opaque(input: &str) -> Result<Self, ParseError>125 pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
126 if input.starts_with('[') {
127 if !input.ends_with(']') {
128 return Err(ParseError::InvalidIpv6Address);
129 }
130 return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
131 }
132
133 let is_invalid_host_char = |c| {
134 matches!(
135 c,
136 '\0' | '\t'
137 | '\n'
138 | '\r'
139 | ' '
140 | '#'
141 | '/'
142 | ':'
143 | '<'
144 | '>'
145 | '?'
146 | '@'
147 | '['
148 | '\\'
149 | ']'
150 | '^'
151 | '|'
152 )
153 };
154
155 if input.find(is_invalid_host_char).is_some() {
156 Err(ParseError::InvalidDomainCharacter)
157 } else {
158 Ok(Host::Domain(
159 utf8_percent_encode(input, CONTROLS).to_string(),
160 ))
161 }
162 }
163
164 /// convert domain with idna
domain_to_ascii(domain: &str) -> Result<String, ParseError>165 fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
166 idna::domain_to_ascii(domain).map_err(Into::into)
167 }
168 }
169
170 impl<S: AsRef<str>> fmt::Display for Host<S> {
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result171 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
172 match *self {
173 Host::Domain(ref domain) => domain.as_ref().fmt(f),
174 Host::Ipv4(ref addr) => addr.fmt(f),
175 Host::Ipv6(ref addr) => {
176 f.write_str("[")?;
177 write_ipv6(addr, f)?;
178 f.write_str("]")
179 }
180 }
181 }
182 }
183
184 impl<S, T> PartialEq<Host<T>> for Host<S>
185 where
186 S: PartialEq<T>,
187 {
eq(&self, other: &Host<T>) -> bool188 fn eq(&self, other: &Host<T>) -> bool {
189 match (self, other) {
190 (Host::Domain(a), Host::Domain(b)) => a == b,
191 (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
192 (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
193 (_, _) => false,
194 }
195 }
196 }
197
write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result198 fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
199 let segments = addr.segments();
200 let (compress_start, compress_end) = longest_zero_sequence(&segments);
201 let mut i = 0;
202 while i < 8 {
203 if i == compress_start {
204 f.write_str(":")?;
205 if i == 0 {
206 f.write_str(":")?;
207 }
208 if compress_end < 8 {
209 i = compress_end;
210 } else {
211 break;
212 }
213 }
214 write!(f, "{:x}", segments[i as usize])?;
215 if i < 7 {
216 f.write_str(":")?;
217 }
218 i += 1;
219 }
220 Ok(())
221 }
222
223 // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize)224 fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
225 let mut longest = -1;
226 let mut longest_length = -1;
227 let mut start = -1;
228 macro_rules! finish_sequence(
229 ($end: expr) => {
230 if start >= 0 {
231 let length = $end - start;
232 if length > longest_length {
233 longest = start;
234 longest_length = length;
235 }
236 }
237 };
238 );
239 for i in 0..8 {
240 if pieces[i as usize] == 0 {
241 if start < 0 {
242 start = i;
243 }
244 } else {
245 finish_sequence!(i);
246 start = -1;
247 }
248 }
249 finish_sequence!(8);
250 // https://url.spec.whatwg.org/#concept-ipv6-serializer
251 // step 3: ignore lone zeroes
252 if longest_length < 2 {
253 (-1, -2)
254 } else {
255 (longest, longest + longest_length)
256 }
257 }
258
259 /// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
ends_in_a_number(input: &str) -> bool260 fn ends_in_a_number(input: &str) -> bool {
261 let mut parts = input.rsplit('.');
262 let last = parts.next().unwrap();
263 let last = if last.is_empty() {
264 if let Some(last) = parts.next() {
265 last
266 } else {
267 return false;
268 }
269 } else {
270 last
271 };
272 if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) {
273 return true;
274 }
275
276 parse_ipv4number(last).is_ok()
277 }
278
279 /// <https://url.spec.whatwg.org/#ipv4-number-parser>
280 /// Ok(None) means the input is a valid number, but it overflows a `u32`.
parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()>281 fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
282 if input.is_empty() {
283 return Err(());
284 }
285
286 let mut r = 10;
287 if input.starts_with("0x") || input.starts_with("0X") {
288 input = &input[2..];
289 r = 16;
290 } else if input.len() >= 2 && input.starts_with('0') {
291 input = &input[1..];
292 r = 8;
293 }
294
295 if input.is_empty() {
296 return Ok(Some(0));
297 }
298
299 let valid_number = match r {
300 8 => input.chars().all(|c| ('0'..='7').contains(&c)),
301 10 => input.chars().all(|c| ('0'..='9').contains(&c)),
302 16 => input.chars().all(|c| {
303 ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
304 }),
305 _ => false,
306 };
307 if !valid_number {
308 return Err(());
309 }
310
311 match u32::from_str_radix(input, r) {
312 Ok(num) => Ok(Some(num)),
313 Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
314 // The validity of the chars in the input is checked above.
315 }
316 }
317
318 /// <https://url.spec.whatwg.org/#concept-ipv4-parser>
parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr>319 fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
320 let mut parts: Vec<&str> = input.split('.').collect();
321 if parts.last() == Some(&"") {
322 parts.pop();
323 }
324 if parts.len() > 4 {
325 return Err(ParseError::InvalidIpv4Address);
326 }
327 let mut numbers: Vec<u32> = Vec::new();
328 for part in parts {
329 match parse_ipv4number(part) {
330 Ok(Some(n)) => numbers.push(n),
331 Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
332 Err(()) => return Err(ParseError::InvalidIpv4Address),
333 };
334 }
335 let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
336 // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
337 if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
338 return Err(ParseError::InvalidIpv4Address);
339 }
340 if numbers.iter().any(|x| *x > 255) {
341 return Err(ParseError::InvalidIpv4Address);
342 }
343 for (counter, n) in numbers.iter().enumerate() {
344 ipv4 += n << (8 * (3 - counter as u32))
345 }
346 Ok(Ipv4Addr::from(ipv4))
347 }
348
349 /// <https://url.spec.whatwg.org/#concept-ipv6-parser>
parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr>350 fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
351 let input = input.as_bytes();
352 let len = input.len();
353 let mut is_ip_v4 = false;
354 let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
355 let mut piece_pointer = 0;
356 let mut compress_pointer = None;
357 let mut i = 0;
358
359 if len < 2 {
360 return Err(ParseError::InvalidIpv6Address);
361 }
362
363 if input[0] == b':' {
364 if input[1] != b':' {
365 return Err(ParseError::InvalidIpv6Address);
366 }
367 i = 2;
368 piece_pointer = 1;
369 compress_pointer = Some(1);
370 }
371
372 while i < len {
373 if piece_pointer == 8 {
374 return Err(ParseError::InvalidIpv6Address);
375 }
376 if input[i] == b':' {
377 if compress_pointer.is_some() {
378 return Err(ParseError::InvalidIpv6Address);
379 }
380 i += 1;
381 piece_pointer += 1;
382 compress_pointer = Some(piece_pointer);
383 continue;
384 }
385 let start = i;
386 let end = cmp::min(len, start + 4);
387 let mut value = 0u16;
388 while i < end {
389 match (input[i] as char).to_digit(16) {
390 Some(digit) => {
391 value = value * 0x10 + digit as u16;
392 i += 1;
393 }
394 None => break,
395 }
396 }
397 if i < len {
398 match input[i] {
399 b'.' => {
400 if i == start {
401 return Err(ParseError::InvalidIpv6Address);
402 }
403 i = start;
404 if piece_pointer > 6 {
405 return Err(ParseError::InvalidIpv6Address);
406 }
407 is_ip_v4 = true;
408 }
409 b':' => {
410 i += 1;
411 if i == len {
412 return Err(ParseError::InvalidIpv6Address);
413 }
414 }
415 _ => return Err(ParseError::InvalidIpv6Address),
416 }
417 }
418 if is_ip_v4 {
419 break;
420 }
421 pieces[piece_pointer] = value;
422 piece_pointer += 1;
423 }
424
425 if is_ip_v4 {
426 if piece_pointer > 6 {
427 return Err(ParseError::InvalidIpv6Address);
428 }
429 let mut numbers_seen = 0;
430 while i < len {
431 if numbers_seen > 0 {
432 if numbers_seen < 4 && (i < len && input[i] == b'.') {
433 i += 1
434 } else {
435 return Err(ParseError::InvalidIpv6Address);
436 }
437 }
438
439 let mut ipv4_piece = None;
440 while i < len {
441 let digit = match input[i] {
442 c @ b'0'..=b'9' => c - b'0',
443 _ => break,
444 };
445 match ipv4_piece {
446 None => ipv4_piece = Some(digit as u16),
447 Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
448 Some(ref mut v) => {
449 *v = *v * 10 + digit as u16;
450 if *v > 255 {
451 return Err(ParseError::InvalidIpv6Address);
452 }
453 }
454 }
455 i += 1;
456 }
457
458 pieces[piece_pointer] = if let Some(v) = ipv4_piece {
459 pieces[piece_pointer] * 0x100 + v
460 } else {
461 return Err(ParseError::InvalidIpv6Address);
462 };
463 numbers_seen += 1;
464
465 if numbers_seen == 2 || numbers_seen == 4 {
466 piece_pointer += 1;
467 }
468 }
469
470 if numbers_seen != 4 {
471 return Err(ParseError::InvalidIpv6Address);
472 }
473 }
474
475 if i < len {
476 return Err(ParseError::InvalidIpv6Address);
477 }
478
479 match compress_pointer {
480 Some(compress_pointer) => {
481 let mut swaps = piece_pointer - compress_pointer;
482 piece_pointer = 7;
483 while swaps > 0 {
484 pieces.swap(piece_pointer, compress_pointer + swaps - 1);
485 swaps -= 1;
486 piece_pointer -= 1;
487 }
488 }
489 _ => {
490 if piece_pointer != 8 {
491 return Err(ParseError::InvalidIpv6Address);
492 }
493 }
494 }
495 Ok(Ipv6Addr::new(
496 pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
497 ))
498 }
499