1 //! Word splitting functionality.
2 //!
3 //! To wrap text into lines, long words sometimes need to be split
4 //! across lines. The [`WordSplitter`] enum defines this
5 //! functionality.
6
7 use crate::core::{display_width, Word};
8
9 /// The `WordSplitter` enum describes where words can be split.
10 ///
11 /// If the textwrap crate has been compiled with the `hyphenation`
12 /// Cargo feature enabled, you will find a
13 /// [`WordSplitter::Hyphenation`] variant. Use this struct for
14 /// language-aware hyphenation:
15 ///
16 /// ```
17 /// #[cfg(feature = "hyphenation")] {
18 /// use hyphenation::{Language, Load, Standard};
19 /// use textwrap::{wrap, Options, WordSplitter};
20 ///
21 /// let text = "Oxidation is the loss of electrons.";
22 /// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
23 /// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary));
24 /// assert_eq!(wrap(text, &options), vec!["Oxida-",
25 /// "tion is",
26 /// "the loss",
27 /// "of elec-",
28 /// "trons."]);
29 /// }
30 /// ```
31 ///
32 /// Please see the documentation for the [hyphenation] crate for more
33 /// details.
34 ///
35 /// [hyphenation]: https://docs.rs/hyphenation/
36 #[derive(Clone)]
37 pub enum WordSplitter {
38 /// Use this as a [`Options.word_splitter`] to avoid any kind of
39 /// hyphenation:
40 ///
41 /// ```
42 /// use textwrap::{wrap, Options, WordSplitter};
43 ///
44 /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
45 /// assert_eq!(wrap("foo bar-baz", &options),
46 /// vec!["foo", "bar-baz"]);
47 /// ```
48 ///
49 /// [`Options.word_splitter`]: super::Options::word_splitter
50 NoHyphenation,
51
52 /// `HyphenSplitter` is the default `WordSplitter` used by
53 /// [`Options::new`](super::Options::new). It will split words on
54 /// existing hyphens in the word.
55 ///
56 /// It will only use hyphens that are surrounded by alphanumeric
57 /// characters, which prevents a word like `"--foo-bar"` from
58 /// being split into `"--"` and `"foo-bar"`.
59 ///
60 /// # Examples
61 ///
62 /// ```
63 /// use textwrap::WordSplitter;
64 ///
65 /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"),
66 /// vec![6]);
67 /// ```
68 HyphenSplitter,
69
70 /// Use a custom function as the word splitter.
71 ///
72 /// This varian lets you implement a custom word splitter using
73 /// your own function.
74 ///
75 /// # Examples
76 ///
77 /// ```
78 /// use textwrap::WordSplitter;
79 ///
80 /// fn split_at_underscore(word: &str) -> Vec<usize> {
81 /// word.match_indices('_').map(|(idx, _)| idx + 1).collect()
82 /// }
83 ///
84 /// let word_splitter = WordSplitter::Custom(split_at_underscore);
85 /// assert_eq!(word_splitter.split_points("a_long_identifier"),
86 /// vec![2, 7]);
87 /// ```
88 Custom(fn(word: &str) -> Vec<usize>),
89
90 /// A hyphenation dictionary can be used to do language-specific
91 /// hyphenation using patterns from the [hyphenation] crate.
92 ///
93 /// **Note:** Only available when the `hyphenation` Cargo feature is
94 /// enabled.
95 ///
96 /// [hyphenation]: https://docs.rs/hyphenation/
97 #[cfg(feature = "hyphenation")]
98 Hyphenation(hyphenation::Standard),
99 }
100
101 impl std::fmt::Debug for WordSplitter {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result102 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103 match self {
104 WordSplitter::NoHyphenation => f.write_str("NoHyphenation"),
105 WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"),
106 WordSplitter::Custom(_) => f.write_str("Custom(...)"),
107 #[cfg(feature = "hyphenation")]
108 WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()),
109 }
110 }
111 }
112
113 impl PartialEq<WordSplitter> for WordSplitter {
eq(&self, other: &WordSplitter) -> bool114 fn eq(&self, other: &WordSplitter) -> bool {
115 match (self, other) {
116 (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true,
117 (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true,
118 #[cfg(feature = "hyphenation")]
119 (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => {
120 this_dict.language() == other_dict.language()
121 }
122 (_, _) => false,
123 }
124 }
125 }
126
127 impl WordSplitter {
128 /// Return all possible indices where `word` can be split.
129 ///
130 /// The indices are in the range `0..word.len()`. They point to
131 /// the index _after_ the split point, i.e., after `-` if
132 /// splitting on hyphens. This way, `word.split_at(idx)` will
133 /// break the word into two well-formed pieces.
134 ///
135 /// # Examples
136 ///
137 /// ```
138 /// use textwrap::WordSplitter;
139 /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]);
140 /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]);
141 /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]);
142 /// ```
split_points(&self, word: &str) -> Vec<usize>143 pub fn split_points(&self, word: &str) -> Vec<usize> {
144 match self {
145 WordSplitter::NoHyphenation => Vec::new(),
146 WordSplitter::HyphenSplitter => {
147 let mut splits = Vec::new();
148
149 for (idx, _) in word.match_indices('-') {
150 // We only use hyphens that are surrounded by alphanumeric
151 // characters. This is to avoid splitting on repeated hyphens,
152 // such as those found in --foo-bar.
153 let prev = word[..idx].chars().next_back();
154 let next = word[idx + 1..].chars().next();
155
156 if prev.filter(|ch| ch.is_alphanumeric()).is_some()
157 && next.filter(|ch| ch.is_alphanumeric()).is_some()
158 {
159 splits.push(idx + 1); // +1 due to width of '-'.
160 }
161 }
162
163 splits
164 }
165 WordSplitter::Custom(splitter_func) => splitter_func(word),
166 #[cfg(feature = "hyphenation")]
167 WordSplitter::Hyphenation(dictionary) => {
168 use hyphenation::Hyphenator;
169 dictionary.hyphenate(word).breaks
170 }
171 }
172 }
173 }
174
175 /// Split words into smaller words according to the split points given
176 /// by `word_splitter`.
177 ///
178 /// Note that we split all words, regardless of their length. This is
179 /// to more cleanly separate the business of splitting (including
180 /// automatic hyphenation) from the business of word wrapping.
split_words<'a, I>( words: I, word_splitter: &'a WordSplitter, ) -> impl Iterator<Item = Word<'a>> where I: IntoIterator<Item = Word<'a>>,181 pub fn split_words<'a, I>(
182 words: I,
183 word_splitter: &'a WordSplitter,
184 ) -> impl Iterator<Item = Word<'a>>
185 where
186 I: IntoIterator<Item = Word<'a>>,
187 {
188 words.into_iter().flat_map(move |word| {
189 let mut prev = 0;
190 let mut split_points = word_splitter.split_points(&word).into_iter();
191 std::iter::from_fn(move || {
192 if let Some(idx) = split_points.next() {
193 let need_hyphen = !word[..idx].ends_with('-');
194 let w = Word {
195 word: &word.word[prev..idx],
196 width: display_width(&word[prev..idx]),
197 whitespace: "",
198 penalty: if need_hyphen { "-" } else { "" },
199 };
200 prev = idx;
201 return Some(w);
202 }
203
204 if prev < word.word.len() || prev == 0 {
205 let w = Word {
206 word: &word.word[prev..],
207 width: display_width(&word[prev..]),
208 whitespace: word.whitespace,
209 penalty: word.penalty,
210 };
211 prev = word.word.len() + 1;
212 return Some(w);
213 }
214
215 None
216 })
217 })
218 }
219
220 #[cfg(test)]
221 mod tests {
222 use super::*;
223
224 // Like assert_eq!, but the left expression is an iterator.
225 macro_rules! assert_iter_eq {
226 ($left:expr, $right:expr) => {
227 assert_eq!($left.collect::<Vec<_>>(), $right);
228 };
229 }
230
231 #[test]
split_words_no_words()232 fn split_words_no_words() {
233 assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]);
234 }
235
236 #[test]
split_words_empty_word()237 fn split_words_empty_word() {
238 assert_iter_eq!(
239 split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter),
240 vec![Word::from(" ")]
241 );
242 }
243
244 #[test]
split_words_single_word()245 fn split_words_single_word() {
246 assert_iter_eq!(
247 split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter),
248 vec![Word::from("foobar")]
249 );
250 }
251
252 #[test]
split_words_hyphen_splitter()253 fn split_words_hyphen_splitter() {
254 assert_iter_eq!(
255 split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter),
256 vec![Word::from("foo-"), Word::from("bar")]
257 );
258 }
259
260 #[test]
split_words_no_hyphenation()261 fn split_words_no_hyphenation() {
262 assert_iter_eq!(
263 split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation),
264 vec![Word::from("foo-bar")]
265 );
266 }
267
268 #[test]
split_words_adds_penalty()269 fn split_words_adds_penalty() {
270 let fixed_split_point = |_: &str| vec![3];
271
272 assert_iter_eq!(
273 split_words(
274 vec![Word::from("foobar")].into_iter(),
275 &WordSplitter::Custom(fixed_split_point)
276 ),
277 vec![
278 Word {
279 word: "foo",
280 width: 3,
281 whitespace: "",
282 penalty: "-"
283 },
284 Word {
285 word: "bar",
286 width: 3,
287 whitespace: "",
288 penalty: ""
289 }
290 ]
291 );
292
293 assert_iter_eq!(
294 split_words(
295 vec![Word::from("fo-bar")].into_iter(),
296 &WordSplitter::Custom(fixed_split_point)
297 ),
298 vec![
299 Word {
300 word: "fo-",
301 width: 3,
302 whitespace: "",
303 penalty: ""
304 },
305 Word {
306 word: "bar",
307 width: 3,
308 whitespace: "",
309 penalty: ""
310 }
311 ]
312 );
313 }
314 }
315