1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 use super::UnicodeSegmentation;
12
13 use std::prelude::v1::*;
14
15 #[test]
test_graphemes()16 fn test_graphemes() {
17 use crate::testdata::{TEST_DIFF, TEST_SAME};
18
19 pub const EXTRA_DIFF: &'static [(
20 &'static str,
21 &'static [&'static str],
22 &'static [&'static str],
23 )] = &[
24 // Official test suite doesn't include two Prepend chars between two other chars.
25 (
26 "\u{20}\u{600}\u{600}\u{20}",
27 &["\u{20}", "\u{600}\u{600}\u{20}"],
28 &["\u{20}", "\u{600}", "\u{600}", "\u{20}"],
29 ),
30 // Test for Prepend followed by two Any chars
31 (
32 "\u{600}\u{20}\u{20}",
33 &["\u{600}\u{20}", "\u{20}"],
34 &["\u{600}", "\u{20}", "\u{20}"],
35 ),
36 ];
37
38 pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
39 // family emoji (more than two emoji joined by ZWJ)
40 (
41 "\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
42 &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"],
43 ),
44 // cartwheel emoji followed by two fitzpatrick skin tone modifiers
45 // (test case from issue #19)
46 (
47 "\u{1F938}\u{1F3FE}\u{1F3FE}",
48 &["\u{1F938}\u{1F3FE}\u{1F3FE}"],
49 ),
50 ];
51
52 for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
53 // test forward iterator
54 assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
55 assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
56
57 // test reverse iterator
58 assert!(UnicodeSegmentation::graphemes(s, true)
59 .rev()
60 .eq(g.iter().rev().cloned()));
61 assert!(UnicodeSegmentation::graphemes(s, false)
62 .rev()
63 .eq(g.iter().rev().cloned()));
64 }
65
66 for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
67 // test forward iterator
68 assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
69 assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
70
71 // test reverse iterator
72 assert!(UnicodeSegmentation::graphemes(s, true)
73 .rev()
74 .eq(gt.iter().rev().cloned()));
75 assert!(UnicodeSegmentation::graphemes(s, false)
76 .rev()
77 .eq(gf.iter().rev().cloned()));
78 }
79
80 // test the indices iterators
81 let s = "a̐éö̲\r\n";
82 let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
83 let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
84 assert_eq!(gr_inds, b);
85 let gr_inds = UnicodeSegmentation::grapheme_indices(s, true)
86 .rev()
87 .collect::<Vec<(usize, &str)>>();
88 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
89 assert_eq!(gr_inds, b);
90 let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
91 {
92 let gr_inds = gr_inds_iter.by_ref();
93 let e1 = gr_inds.size_hint();
94 assert_eq!(e1, (1, Some(13)));
95 let c = gr_inds.count();
96 assert_eq!(c, 4);
97 }
98 let e2 = gr_inds_iter.size_hint();
99 assert_eq!(e2, (0, Some(0)));
100
101 // make sure the reverse iterator does the right thing with "\n" at beginning of string
102 let s = "\n\r\n\r";
103 let gr = UnicodeSegmentation::graphemes(s, true)
104 .rev()
105 .collect::<Vec<&str>>();
106 let b: &[_] = &["\r", "\r\n", "\n"];
107 assert_eq!(gr, b);
108 }
109
110 #[test]
test_words()111 fn test_words() {
112 use crate::testdata::TEST_WORD;
113
114 // Unicode's official tests don't really test longer chains of flag emoji
115 // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
116 const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[
117 (
118 "",
119 &["", "", "", "", "", "", ""],
120 ),
121 ("", &["", "", "", "", "", "", ""]),
122 (
123 "aa",
124 &["", "a", "", "", "a", "", "", "", "", ""],
125 ),
126 (
127 "\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}",
128 &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"],
129 ),
130 ("", &["", ""]),
131 // perhaps wrong, spaces should not be included?
132 ("hello world", &["hello", " ", "world"]),
133 (" hi", &["", "", "", "", " ", "hi"]),
134 ];
135 for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
136 macro_rules! assert_ {
137 ($test:expr, $exp:expr, $name:expr) => {
138 // collect into vector for better diagnostics in failure case
139 let testing = $test.collect::<Vec<_>>();
140 let expected = $exp.collect::<Vec<_>>();
141 assert_eq!(
142 testing, expected,
143 "{} test for testcase ({:?}, {:?}) failed.",
144 $name, s, w
145 )
146 };
147 }
148 // test forward iterator
149 assert_!(
150 s.split_word_bounds(),
151 w.iter().cloned(),
152 "Forward word boundaries"
153 );
154
155 // test reverse iterator
156 assert_!(
157 s.split_word_bounds().rev(),
158 w.iter().rev().cloned(),
159 "Reverse word boundaries"
160 );
161
162 // generate offsets from word string lengths
163 let mut indices = vec![0];
164 for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| {
165 *t += n;
166 Some(*t)
167 }) {
168 indices.push(i);
169 }
170 indices.pop();
171 let indices = indices;
172
173 // test forward indices iterator
174 assert_!(
175 s.split_word_bound_indices().map(|(l, _)| l),
176 indices.iter().cloned(),
177 "Forward word indices"
178 );
179
180 // test backward indices iterator
181 assert_!(
182 s.split_word_bound_indices().rev().map(|(l, _)| l),
183 indices.iter().rev().cloned(),
184 "Reverse word indices"
185 );
186 }
187 }
188
189 #[test]
test_sentences()190 fn test_sentences() {
191 use crate::testdata::TEST_SENTENCE;
192
193 for &(s, w) in TEST_SENTENCE.iter() {
194 macro_rules! assert_ {
195 ($test:expr, $exp:expr, $name:expr) => {
196 // collect into vector for better diagnostics in failure case
197 let testing = $test.collect::<Vec<_>>();
198 let expected = $exp.collect::<Vec<_>>();
199 assert_eq!(
200 testing, expected,
201 "{} test for testcase ({:?}, {:?}) failed.",
202 $name, s, w
203 )
204 };
205 }
206
207 assert_!(
208 s.split_sentence_bounds(),
209 w.iter().cloned(),
210 "Forward sentence boundaries"
211 );
212 }
213 }
214
215 quickcheck! {
216 fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
217 let a = s.graphemes(true).collect::<Vec<_>>();
218 let mut b = s.graphemes(true).rev().collect::<Vec<_>>();
219 b.reverse();
220 a == b
221 }
222
223 fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool {
224 let a = s.graphemes(false).collect::<Vec<_>>();
225 let mut b = s.graphemes(false).rev().collect::<Vec<_>>();
226 b.reverse();
227 a == b
228 }
229
230 fn quickcheck_join_graphemes(s: String) -> bool {
231 let a = s.graphemes(true).collect::<String>();
232 let b = s.graphemes(false).collect::<String>();
233 a == s && b == s
234 }
235
236 fn quickcheck_forward_reverse_words(s: String) -> bool {
237 let a = s.split_word_bounds().collect::<Vec<_>>();
238 let mut b = s.split_word_bounds().rev().collect::<Vec<_>>();
239 b.reverse();
240 a == b
241 }
242
243 fn quickcheck_join_words(s: String) -> bool {
244 let a = s.split_word_bounds().collect::<String>();
245 a == s
246 }
247 }
248