1 // Copyright 2013-2014 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 use crate::test::TestFn;
10 use std::char;
11 use std::fmt::Write;
12 
13 use idna::Errors;
14 
collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F)15 pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
16     // https://www.unicode.org/Public/idna/13.0.0/IdnaTestV2.txt
17     for (i, line) in include_str!("IdnaTestV2.txt").lines().enumerate() {
18         if line.is_empty() || line.starts_with('#') {
19             continue;
20         }
21 
22         // Remove comments
23         let line = match line.find('#') {
24             Some(index) => &line[0..index],
25             None => line,
26         };
27 
28         let mut pieces = line.split(';').map(|x| x.trim()).collect::<Vec<&str>>();
29         let source = unescape(pieces.remove(0));
30 
31         // ToUnicode
32         let mut to_unicode = unescape(pieces.remove(0));
33         if to_unicode.is_empty() {
34             to_unicode = source.clone();
35         }
36         let to_unicode_status = status(pieces.remove(0));
37 
38         // ToAsciiN
39         let to_ascii_n = pieces.remove(0);
40         let to_ascii_n = if to_ascii_n.is_empty() {
41             to_unicode.clone()
42         } else {
43             to_ascii_n.to_owned()
44         };
45         let to_ascii_n_status = pieces.remove(0);
46         let to_ascii_n_status = if to_ascii_n_status.is_empty() {
47             to_unicode_status.clone()
48         } else {
49             status(to_ascii_n_status)
50         };
51 
52         // ToAsciiT
53         let to_ascii_t = pieces.remove(0);
54         let to_ascii_t = if to_ascii_t.is_empty() {
55             to_ascii_n.clone()
56         } else {
57             to_ascii_t.to_owned()
58         };
59         let to_ascii_t_status = pieces.remove(0);
60         let to_ascii_t_status = if to_ascii_t_status.is_empty() {
61             to_ascii_n_status.clone()
62         } else {
63             status(to_ascii_t_status)
64         };
65 
66         let test_name = format!("UTS #46 line {}", i + 1);
67         add_test(
68             test_name,
69             TestFn::DynTestFn(Box::new(move || {
70                 let config = idna::Config::default()
71                     .use_std3_ascii_rules(true)
72                     .verify_dns_length(true)
73                     .check_hyphens(true);
74 
75                 // http://unicode.org/reports/tr46/#Deviations
76                 // applications that perform IDNA2008 lookup are not required to check
77                 // for these contexts, so we skip all tests annotated with C*
78 
79                 // Everybody ignores V2
80                 // https://github.com/servo/rust-url/pull/240
81                 // https://github.com/whatwg/url/issues/53#issuecomment-181528158
82                 // http://www.unicode.org/review/pri317/
83 
84                 // "The special error codes X3 and X4_2 are now returned where a toASCII error code
85                 // was formerly being generated in toUnicode due to an empty label."
86                 // This is not implemented yet, so we skip toUnicode X4_2 tests for now, too.
87 
88                 let (to_unicode_value, to_unicode_result) =
89                     config.transitional_processing(false).to_unicode(&source);
90                 let to_unicode_result = to_unicode_result.map(|()| to_unicode_value);
91                 check(
92                     &source,
93                     (&to_unicode, &to_unicode_status),
94                     to_unicode_result,
95                     |e| e.starts_with('C') || e == "V2" || e == "X4_2",
96                 );
97 
98                 let to_ascii_n_result = config.transitional_processing(false).to_ascii(&source);
99                 check(
100                     &source,
101                     (&to_ascii_n, &to_ascii_n_status),
102                     to_ascii_n_result,
103                     |e| e.starts_with('C') || e == "V2",
104                 );
105 
106                 let to_ascii_t_result = config.transitional_processing(true).to_ascii(&source);
107                 check(
108                     &source,
109                     (&to_ascii_t, &to_ascii_t_status),
110                     to_ascii_t_result,
111                     |e| e.starts_with('C') || e == "V2",
112                 );
113             })),
114         )
115     }
116 }
117 
118 #[allow(clippy::redundant_clone)]
check<F>(source: &str, expected: (&str, &[&str]), actual: Result<String, Errors>, ignore: F) where F: Fn(&str) -> bool,119 fn check<F>(source: &str, expected: (&str, &[&str]), actual: Result<String, Errors>, ignore: F)
120 where
121     F: Fn(&str) -> bool,
122 {
123     if !expected.1.is_empty() {
124         if !expected.1.iter().copied().any(ignore) {
125             let res = actual.ok();
126             assert_eq!(
127                 res.clone(),
128                 None,
129                 "Expected error {:?}. result: {} | source: {}",
130                 expected.1,
131                 res.unwrap(),
132                 source,
133             );
134         }
135     } else {
136         assert!(
137             actual.is_ok(),
138             "Couldn't parse {} | error: {:?}",
139             source,
140             actual.err().unwrap(),
141         );
142         assert_eq!(actual.unwrap(), expected.0, "source: {}", source);
143     }
144 }
145 
unescape(input: &str) -> String146 fn unescape(input: &str) -> String {
147     let mut output = String::new();
148     let mut chars = input.chars();
149     loop {
150         match chars.next() {
151             None => return output,
152             Some(c) => {
153                 if c == '\\' {
154                     match chars.next().unwrap() {
155                         '\\' => output.push('\\'),
156                         'u' => {
157                             let c1 = chars.next().unwrap().to_digit(16).unwrap();
158                             let c2 = chars.next().unwrap().to_digit(16).unwrap();
159                             let c3 = chars.next().unwrap().to_digit(16).unwrap();
160                             let c4 = chars.next().unwrap().to_digit(16).unwrap();
161                             match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) {
162                                 Some(c) => output.push(c),
163                                 None => {
164                                     write!(&mut output, "\\u{:X}{:X}{:X}{:X}", c1, c2, c3, c4)
165                                         .expect("Could not write to output");
166                                 }
167                             };
168                         }
169                         _ => panic!("Invalid test data input"),
170                     }
171                 } else {
172                     output.push(c);
173                 }
174             }
175         }
176     }
177 }
178 
status(status: &str) -> Vec<&str>179 fn status(status: &str) -> Vec<&str> {
180     if status.is_empty() || status == "[]" {
181         return Vec::new();
182     }
183 
184     let mut result = status.split(", ").collect::<Vec<_>>();
185     assert!(result[0].starts_with('['));
186     result[0] = &result[0][1..];
187 
188     let idx = result.len() - 1;
189     let last = &mut result[idx];
190     assert!(last.ends_with(']'));
191     *last = &last[..last.len() - 1];
192 
193     result
194 }
195