1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 use super::UnicodeSegmentation;
12 
13 use std::prelude::v1::*;
14 
15 #[test]
test_graphemes()16 fn test_graphemes() {
17     use crate::testdata::{TEST_DIFF, TEST_SAME};
18 
19     pub const EXTRA_DIFF: &'static [(
20         &'static str,
21         &'static [&'static str],
22         &'static [&'static str],
23     )] = &[
24         // Official test suite doesn't include two Prepend chars between two other chars.
25         (
26             "\u{20}\u{600}\u{600}\u{20}",
27             &["\u{20}", "\u{600}\u{600}\u{20}"],
28             &["\u{20}", "\u{600}", "\u{600}", "\u{20}"],
29         ),
30         // Test for Prepend followed by two Any chars
31         (
32             "\u{600}\u{20}\u{20}",
33             &["\u{600}\u{20}", "\u{20}"],
34             &["\u{600}", "\u{20}", "\u{20}"],
35         ),
36     ];
37 
38     pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
39         // family emoji (more than two emoji joined by ZWJ)
40         (
41             "\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
42             &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"],
43         ),
44         // cartwheel emoji followed by two fitzpatrick skin tone modifiers
45         // (test case from issue #19)
46         (
47             "\u{1F938}\u{1F3FE}\u{1F3FE}",
48             &["\u{1F938}\u{1F3FE}\u{1F3FE}"],
49         ),
50     ];
51 
52     for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
53         // test forward iterator
54         assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
55         assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
56 
57         // test reverse iterator
58         assert!(UnicodeSegmentation::graphemes(s, true)
59             .rev()
60             .eq(g.iter().rev().cloned()));
61         assert!(UnicodeSegmentation::graphemes(s, false)
62             .rev()
63             .eq(g.iter().rev().cloned()));
64     }
65 
66     for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
67         // test forward iterator
68         assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
69         assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
70 
71         // test reverse iterator
72         assert!(UnicodeSegmentation::graphemes(s, true)
73             .rev()
74             .eq(gt.iter().rev().cloned()));
75         assert!(UnicodeSegmentation::graphemes(s, false)
76             .rev()
77             .eq(gf.iter().rev().cloned()));
78     }
79 
80     // test the indices iterators
81     let s = "a̐éö̲\r\n";
82     let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
83     let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
84     assert_eq!(gr_inds, b);
85     let gr_inds = UnicodeSegmentation::grapheme_indices(s, true)
86         .rev()
87         .collect::<Vec<(usize, &str)>>();
88     let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
89     assert_eq!(gr_inds, b);
90     let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
91     {
92         let gr_inds = gr_inds_iter.by_ref();
93         let e1 = gr_inds.size_hint();
94         assert_eq!(e1, (1, Some(13)));
95         let c = gr_inds.count();
96         assert_eq!(c, 4);
97     }
98     let e2 = gr_inds_iter.size_hint();
99     assert_eq!(e2, (0, Some(0)));
100 
101     // make sure the reverse iterator does the right thing with "\n" at beginning of string
102     let s = "\n\r\n\r";
103     let gr = UnicodeSegmentation::graphemes(s, true)
104         .rev()
105         .collect::<Vec<&str>>();
106     let b: &[_] = &["\r", "\r\n", "\n"];
107     assert_eq!(gr, b);
108 }
109 
110 #[test]
test_words()111 fn test_words() {
112     use crate::testdata::TEST_WORD;
113 
114     // Unicode's official tests don't really test longer chains of flag emoji
115     // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
116     const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[
117         (
118             "����������������������������",
119             &["����", "����", "����", "����", "����", "����", "����"],
120         ),
121         ("��������������������������", &["����", "����", "����", "����", "����", "����", "��"]),
122         (
123             "��a������a������������������",
124             &["��", "a", "����", "��", "a", "����", "����", "����", "����", "��"],
125         ),
126         (
127             "\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}",
128             &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"],
129         ),
130         ("������", &["��", "����"]),
131         // perhaps wrong, spaces should not be included?
132         ("hello world", &["hello", " ", "world"]),
133         ("�������������� hi", &["����", "����", "����", "��", " ", "hi"]),
134     ];
135     for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
136         macro_rules! assert_ {
137             ($test:expr, $exp:expr, $name:expr) => {
138                 // collect into vector for better diagnostics in failure case
139                 let testing = $test.collect::<Vec<_>>();
140                 let expected = $exp.collect::<Vec<_>>();
141                 assert_eq!(
142                     testing, expected,
143                     "{} test for testcase ({:?}, {:?}) failed.",
144                     $name, s, w
145                 )
146             };
147         }
148         // test forward iterator
149         assert_!(
150             s.split_word_bounds(),
151             w.iter().cloned(),
152             "Forward word boundaries"
153         );
154 
155         // test reverse iterator
156         assert_!(
157             s.split_word_bounds().rev(),
158             w.iter().rev().cloned(),
159             "Reverse word boundaries"
160         );
161 
162         // generate offsets from word string lengths
163         let mut indices = vec![0];
164         for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| {
165             *t += n;
166             Some(*t)
167         }) {
168             indices.push(i);
169         }
170         indices.pop();
171         let indices = indices;
172 
173         // test forward indices iterator
174         assert_!(
175             s.split_word_bound_indices().map(|(l, _)| l),
176             indices.iter().cloned(),
177             "Forward word indices"
178         );
179 
180         // test backward indices iterator
181         assert_!(
182             s.split_word_bound_indices().rev().map(|(l, _)| l),
183             indices.iter().rev().cloned(),
184             "Reverse word indices"
185         );
186     }
187 }
188 
189 #[test]
test_sentences()190 fn test_sentences() {
191     use crate::testdata::TEST_SENTENCE;
192 
193     for &(s, w) in TEST_SENTENCE.iter() {
194         macro_rules! assert_ {
195             ($test:expr, $exp:expr, $name:expr) => {
196                 // collect into vector for better diagnostics in failure case
197                 let testing = $test.collect::<Vec<_>>();
198                 let expected = $exp.collect::<Vec<_>>();
199                 assert_eq!(
200                     testing, expected,
201                     "{} test for testcase ({:?}, {:?}) failed.",
202                     $name, s, w
203                 )
204             };
205         }
206 
207         assert_!(
208             s.split_sentence_bounds(),
209             w.iter().cloned(),
210             "Forward sentence boundaries"
211         );
212     }
213 }
214 
215 quickcheck! {
216     fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
217         let a = s.graphemes(true).collect::<Vec<_>>();
218         let mut b = s.graphemes(true).rev().collect::<Vec<_>>();
219         b.reverse();
220         a == b
221     }
222 
223     fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool {
224         let a = s.graphemes(false).collect::<Vec<_>>();
225         let mut b = s.graphemes(false).rev().collect::<Vec<_>>();
226         b.reverse();
227         a == b
228     }
229 
230     fn quickcheck_join_graphemes(s: String) -> bool {
231         let a = s.graphemes(true).collect::<String>();
232         let b = s.graphemes(false).collect::<String>();
233         a == s && b == s
234     }
235 
236     fn quickcheck_forward_reverse_words(s: String) -> bool {
237         let a = s.split_word_bounds().collect::<Vec<_>>();
238         let mut b = s.split_word_bounds().rev().collect::<Vec<_>>();
239         b.reverse();
240         a == b
241     }
242 
243     fn quickcheck_join_words(s: String) -> bool {
244         let a = s.split_word_bounds().collect::<String>();
245         a == s
246     }
247 }
248