1 // Copyright 2013-2014 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 use crate::test::TestFn;
10 use std::char;
11 use std::fmt::Write;
12
13 use idna::Errors;
14
collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F)15 pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
16 // https://www.unicode.org/Public/idna/13.0.0/IdnaTestV2.txt
17 for (i, line) in include_str!("IdnaTestV2.txt").lines().enumerate() {
18 if line.is_empty() || line.starts_with('#') {
19 continue;
20 }
21
22 // Remove comments
23 let line = match line.find('#') {
24 Some(index) => &line[0..index],
25 None => line,
26 };
27
28 let mut pieces = line.split(';').map(|x| x.trim()).collect::<Vec<&str>>();
29 let source = unescape(pieces.remove(0));
30
31 // ToUnicode
32 let mut to_unicode = unescape(pieces.remove(0));
33 if to_unicode.is_empty() {
34 to_unicode = source.clone();
35 }
36 let to_unicode_status = status(pieces.remove(0));
37
38 // ToAsciiN
39 let to_ascii_n = pieces.remove(0);
40 let to_ascii_n = if to_ascii_n.is_empty() {
41 to_unicode.clone()
42 } else {
43 to_ascii_n.to_owned()
44 };
45 let to_ascii_n_status = pieces.remove(0);
46 let to_ascii_n_status = if to_ascii_n_status.is_empty() {
47 to_unicode_status.clone()
48 } else {
49 status(to_ascii_n_status)
50 };
51
52 // ToAsciiT
53 let to_ascii_t = pieces.remove(0);
54 let to_ascii_t = if to_ascii_t.is_empty() {
55 to_ascii_n.clone()
56 } else {
57 to_ascii_t.to_owned()
58 };
59 let to_ascii_t_status = pieces.remove(0);
60 let to_ascii_t_status = if to_ascii_t_status.is_empty() {
61 to_ascii_n_status.clone()
62 } else {
63 status(to_ascii_t_status)
64 };
65
66 let test_name = format!("UTS #46 line {}", i + 1);
67 add_test(
68 test_name,
69 TestFn::DynTestFn(Box::new(move || {
70 let config = idna::Config::default()
71 .use_std3_ascii_rules(true)
72 .verify_dns_length(true)
73 .check_hyphens(true);
74
75 // http://unicode.org/reports/tr46/#Deviations
76 // applications that perform IDNA2008 lookup are not required to check
77 // for these contexts, so we skip all tests annotated with C*
78
79 // Everybody ignores V2
80 // https://github.com/servo/rust-url/pull/240
81 // https://github.com/whatwg/url/issues/53#issuecomment-181528158
82 // http://www.unicode.org/review/pri317/
83
84 // "The special error codes X3 and X4_2 are now returned where a toASCII error code
85 // was formerly being generated in toUnicode due to an empty label."
86 // This is not implemented yet, so we skip toUnicode X4_2 tests for now, too.
87
88 let (to_unicode_value, to_unicode_result) =
89 config.transitional_processing(false).to_unicode(&source);
90 let to_unicode_result = to_unicode_result.map(|()| to_unicode_value);
91 check(
92 &source,
93 (&to_unicode, &to_unicode_status),
94 to_unicode_result,
95 |e| e.starts_with('C') || e == "V2" || e == "X4_2",
96 );
97
98 let to_ascii_n_result = config.transitional_processing(false).to_ascii(&source);
99 check(
100 &source,
101 (&to_ascii_n, &to_ascii_n_status),
102 to_ascii_n_result,
103 |e| e.starts_with('C') || e == "V2",
104 );
105
106 let to_ascii_t_result = config.transitional_processing(true).to_ascii(&source);
107 check(
108 &source,
109 (&to_ascii_t, &to_ascii_t_status),
110 to_ascii_t_result,
111 |e| e.starts_with('C') || e == "V2",
112 );
113 })),
114 )
115 }
116 }
117
118 #[allow(clippy::redundant_clone)]
check<F>(source: &str, expected: (&str, &[&str]), actual: Result<String, Errors>, ignore: F) where F: Fn(&str) -> bool,119 fn check<F>(source: &str, expected: (&str, &[&str]), actual: Result<String, Errors>, ignore: F)
120 where
121 F: Fn(&str) -> bool,
122 {
123 if !expected.1.is_empty() {
124 if !expected.1.iter().copied().any(ignore) {
125 let res = actual.ok();
126 assert_eq!(
127 res.clone(),
128 None,
129 "Expected error {:?}. result: {} | source: {}",
130 expected.1,
131 res.unwrap(),
132 source,
133 );
134 }
135 } else {
136 assert!(
137 actual.is_ok(),
138 "Couldn't parse {} | error: {:?}",
139 source,
140 actual.err().unwrap(),
141 );
142 assert_eq!(actual.unwrap(), expected.0, "source: {}", source);
143 }
144 }
145
unescape(input: &str) -> String146 fn unescape(input: &str) -> String {
147 let mut output = String::new();
148 let mut chars = input.chars();
149 loop {
150 match chars.next() {
151 None => return output,
152 Some(c) => {
153 if c == '\\' {
154 match chars.next().unwrap() {
155 '\\' => output.push('\\'),
156 'u' => {
157 let c1 = chars.next().unwrap().to_digit(16).unwrap();
158 let c2 = chars.next().unwrap().to_digit(16).unwrap();
159 let c3 = chars.next().unwrap().to_digit(16).unwrap();
160 let c4 = chars.next().unwrap().to_digit(16).unwrap();
161 match char::from_u32(((c1 * 16 + c2) * 16 + c3) * 16 + c4) {
162 Some(c) => output.push(c),
163 None => {
164 write!(&mut output, "\\u{:X}{:X}{:X}{:X}", c1, c2, c3, c4)
165 .expect("Could not write to output");
166 }
167 };
168 }
169 _ => panic!("Invalid test data input"),
170 }
171 } else {
172 output.push(c);
173 }
174 }
175 }
176 }
177 }
178
status(status: &str) -> Vec<&str>179 fn status(status: &str) -> Vec<&str> {
180 if status.is_empty() || status == "[]" {
181 return Vec::new();
182 }
183
184 let mut result = status.split(", ").collect::<Vec<_>>();
185 assert!(result[0].starts_with('['));
186 result[0] = &result[0][1..];
187
188 let idx = result.len() - 1;
189 let last = &mut result[idx];
190 assert!(last.ends_with(']'));
191 *last = &last[..last.len() - 1];
192
193 result
194 }
195