1 // Copyright 2023 The Mozilla Foundation. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 use super::TextSource;
11
12 use alloc::borrow::Cow;
13 use alloc::vec::Vec;
14 use core::char;
15 use core::ops::Range;
16
17 use crate::{
18 compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
19 reorder_visual, visual_runs_for_line,
20 };
21 use crate::{BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo};
22
23 #[cfg(feature = "hardcoded-data")]
24 use crate::HardcodedBidiData;
25
26 /// Initial bidi information of the text (UTF-16 version).
27 ///
28 /// Contains the text paragraphs and `BidiClass` of its characters.
29 #[derive(PartialEq, Debug)]
30 pub struct InitialInfo<'text> {
31 /// The text
32 pub text: &'text [u16],
33
34 /// The BidiClass of the character at each code unit in the text.
35 /// If a character is multiple code units, its class will appear multiple times in the vector.
36 pub original_classes: Vec<BidiClass>,
37
38 /// The boundaries and level of each paragraph within the text.
39 pub paragraphs: Vec<ParagraphInfo>,
40 }
41
42 impl<'text> InitialInfo<'text> {
43 /// Find the paragraphs and BidiClasses in a string of text.
44 ///
45 /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
46 ///
47 /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
48 /// character is found before the matching PDI. If no strong character is found, the class will
49 /// remain FSI, and it's up to later stages to treat these as LRI when needed.
50 ///
51 /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
52 #[cfg_attr(feature = "flame_it", flamer::flame)]
53 #[cfg(feature = "hardcoded-data")]
new(text: &[u16], default_para_level: Option<Level>) -> InitialInfo<'_>54 pub fn new(text: &[u16], default_para_level: Option<Level>) -> InitialInfo<'_> {
55 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
56 }
57
58 /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
59 /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
60 /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
61 ///
62 /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
63 ///
64 /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
65 /// character is found before the matching PDI. If no strong character is found, the class will
66 /// remain FSI, and it's up to later stages to treat these as LRI when needed.
67 #[cfg_attr(feature = "flame_it", flamer::flame)]
new_with_data_source<'a, D: BidiDataSource>( data_source: &D, text: &'a [u16], default_para_level: Option<Level>, ) -> InitialInfo<'a>68 pub fn new_with_data_source<'a, D: BidiDataSource>(
69 data_source: &D,
70 text: &'a [u16],
71 default_para_level: Option<Level>,
72 ) -> InitialInfo<'a> {
73 InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
74 }
75 }
76
77 /// Extended version of InitialInfo (not public API).
78 #[derive(PartialEq, Debug)]
79 struct InitialInfoExt<'text> {
80 /// The base InitialInfo for the text, recording its paragraphs and bidi classes.
81 base: InitialInfo<'text>,
82
83 /// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
84 /// requires no further bidi processing (i.e. there are no RTL characters or bidi
85 /// control codes present).
86 pure_ltr: Vec<bool>,
87 }
88
89 impl<'text> InitialInfoExt<'text> {
90 /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
91 /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
92 /// instead (enabled with tbe default `hardcoded-data` Cargo feature)
93 ///
94 /// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
95 ///
96 /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
97 /// character is found before the matching PDI. If no strong character is found, the class will
98 /// remain FSI, and it's up to later stages to treat these as LRI when needed.
99 #[cfg_attr(feature = "flame_it", flamer::flame)]
new_with_data_source<'a, D: BidiDataSource>( data_source: &D, text: &'a [u16], default_para_level: Option<Level>, ) -> InitialInfoExt<'a>100 pub fn new_with_data_source<'a, D: BidiDataSource>(
101 data_source: &D,
102 text: &'a [u16],
103 default_para_level: Option<Level>,
104 ) -> InitialInfoExt<'a> {
105 let mut paragraphs = Vec::<ParagraphInfo>::new();
106 let mut pure_ltr = Vec::<bool>::new();
107 let (original_classes, _, _) = compute_initial_info(
108 data_source,
109 text,
110 default_para_level,
111 Some((&mut paragraphs, &mut pure_ltr)),
112 );
113
114 InitialInfoExt {
115 base: InitialInfo {
116 text,
117 original_classes,
118 paragraphs,
119 },
120 pure_ltr,
121 }
122 }
123 }
124
125 /// Bidi information of the text (UTF-16 version).
126 ///
127 /// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text. If a
128 /// character is multiple code units wide, then its class and level will appear multiple times in these
129 /// vectors.
130 // TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
131 #[derive(Debug, PartialEq)]
132 pub struct BidiInfo<'text> {
133 /// The text
134 pub text: &'text [u16],
135
136 /// The BidiClass of the character at each byte in the text.
137 pub original_classes: Vec<BidiClass>,
138
139 /// The directional embedding level of each byte in the text.
140 pub levels: Vec<Level>,
141
142 /// The boundaries and paragraph embedding level of each paragraph within the text.
143 ///
144 /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
145 /// Or just don't include the first paragraph, which always starts at 0?
146 pub paragraphs: Vec<ParagraphInfo>,
147 }
148
149 impl<'text> BidiInfo<'text> {
150 /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
151 ///
152 ///
153 /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
154 ///
155 /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
156 /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
157 ///
158 /// TODO: Support auto-RTL base direction
159 #[cfg_attr(feature = "flame_it", flamer::flame)]
160 #[cfg(feature = "hardcoded-data")]
161 #[inline]
new(text: &[u16], default_para_level: Option<Level>) -> BidiInfo<'_>162 pub fn new(text: &[u16], default_para_level: Option<Level>) -> BidiInfo<'_> {
163 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
164 }
165
166 /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
167 /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
168 /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
169 ///
170 /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
171 /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
172 ///
173 /// TODO: Support auto-RTL base direction
174 #[cfg_attr(feature = "flame_it", flamer::flame)]
new_with_data_source<'a, D: BidiDataSource>( data_source: &D, text: &'a [u16], default_para_level: Option<Level>, ) -> BidiInfo<'a>175 pub fn new_with_data_source<'a, D: BidiDataSource>(
176 data_source: &D,
177 text: &'a [u16],
178 default_para_level: Option<Level>,
179 ) -> BidiInfo<'a> {
180 let InitialInfoExt { base, pure_ltr, .. } =
181 InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
182
183 let mut levels = Vec::<Level>::with_capacity(text.len());
184 let mut processing_classes = base.original_classes.clone();
185
186 for (para, is_pure_ltr) in base.paragraphs.iter().zip(pure_ltr.iter()) {
187 let text = &text[para.range.clone()];
188 let original_classes = &base.original_classes[para.range.clone()];
189
190 compute_bidi_info_for_para(
191 data_source,
192 para,
193 *is_pure_ltr,
194 text,
195 original_classes,
196 &mut processing_classes,
197 &mut levels,
198 );
199 }
200
201 BidiInfo {
202 text,
203 original_classes: base.original_classes,
204 paragraphs: base.paragraphs,
205 levels,
206 }
207 }
208
209 /// Produce the levels for this paragraph as needed for reordering, one level per *byte*
210 /// in the paragraph. The returned vector includes bytes that are not included
211 /// in the `line`, but will not adjust them.
212 ///
213 /// This runs [Rule L1], you can run
214 /// [Rule L2] by calling [`Self::reorder_visual()`].
215 /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
216 /// to avoid non-byte indices.
217 ///
218 /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
219 ///
220 /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
221 /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
222 #[cfg_attr(feature = "flame_it", flamer::flame)]
reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level>223 pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
224 assert!(line.start <= self.levels.len());
225 assert!(line.end <= self.levels.len());
226
227 let mut levels = self.levels.clone();
228 let line_classes = &self.original_classes[line.clone()];
229 let line_levels = &mut levels[line.clone()];
230 let line_str: &[u16] = &self.text[line.clone()];
231
232 reorder_levels(line_classes, line_levels, line_str, para.level);
233
234 levels
235 }
236
237 /// Produce the levels for this paragraph as needed for reordering, one level per *character*
238 /// in the paragraph. The returned vector includes characters that are not included
239 /// in the `line`, but will not adjust them.
240 ///
241 /// This runs [Rule L1], you can run
242 /// [Rule L2] by calling [`Self::reorder_visual()`].
243 /// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
244 /// to avoid non-byte indices.
245 ///
246 /// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
247 ///
248 /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
249 /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
250 #[cfg_attr(feature = "flame_it", flamer::flame)]
reordered_levels_per_char( &self, para: &ParagraphInfo, line: Range<usize>, ) -> Vec<Level>251 pub fn reordered_levels_per_char(
252 &self,
253 para: &ParagraphInfo,
254 line: Range<usize>,
255 ) -> Vec<Level> {
256 let levels = self.reordered_levels(para, line);
257 self.text.char_indices().map(|(i, _)| levels[i]).collect()
258 }
259
260 /// Re-order a line based on resolved levels and return the line in display order.
261 ///
262 /// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
263 ///
264 /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
265 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
266 #[cfg_attr(feature = "flame_it", flamer::flame)]
reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, [u16]>267 pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, [u16]> {
268 if !level::has_rtl(&self.levels[line.clone()]) {
269 return self.text[line].into();
270 }
271 let (levels, runs) = self.visual_runs(para, line.clone());
272 reorder_line(self.text, line, levels, runs)
273 }
274
275 /// Reorders pre-calculated levels of a sequence of characters.
276 ///
277 /// NOTE: This is a convenience method that does not use a `Paragraph` object. It is
278 /// intended to be used when an application has determined the levels of the objects (character sequences)
279 /// and just needs to have them reordered.
280 ///
281 /// the index map will result in `indexMap[visualIndex]==logicalIndex`.
282 ///
283 /// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
284 /// information about the actual text.
285 ///
286 /// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
287 /// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
288 /// is for a single code point.
289 ///
290 ///
291 /// # # Example
292 /// ```
293 /// use unicode_bidi::BidiInfo;
294 /// use unicode_bidi::Level;
295 ///
296 /// let l0 = Level::from(0);
297 /// let l1 = Level::from(1);
298 /// let l2 = Level::from(2);
299 ///
300 /// let levels = vec![l0, l0, l0, l0];
301 /// let index_map = BidiInfo::reorder_visual(&levels);
302 /// assert_eq!(levels.len(), index_map.len());
303 /// assert_eq!(index_map, [0, 1, 2, 3]);
304 ///
305 /// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
306 /// let index_map = BidiInfo::reorder_visual(&levels);
307 /// assert_eq!(levels.len(), index_map.len());
308 /// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]);
309 /// ```
310 #[cfg_attr(feature = "flame_it", flamer::flame)]
311 #[inline]
reorder_visual(levels: &[Level]) -> Vec<usize>312 pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
313 reorder_visual(levels)
314 }
315
316 /// Find the level runs within a line and return them in visual order.
317 ///
318 /// `line` is a range of bytes indices within `levels`.
319 ///
320 /// The first return value is a vector of levels used by the reordering algorithm,
321 /// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
322 /// the result of [Rule L2], showing the visual order that each level run (a run of text with the
323 /// same level) should be displayed. Within each run, the display order can be checked
324 /// against the Level vector.
325 ///
326 /// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
327 /// as that should be handled by the engine using this API.
328 ///
329 /// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
330 /// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
331 /// of producing a level map, since one may wish to deal with the fact that this is operating on
332 /// byte rather than character indices.
333 ///
334 /// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
335 ///
336 /// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
337 /// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
338 /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
339 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
340 #[cfg_attr(feature = "flame_it", flamer::flame)]
341 #[inline]
visual_runs( &self, para: &ParagraphInfo, line: Range<usize>, ) -> (Vec<Level>, Vec<LevelRun>)342 pub fn visual_runs(
343 &self,
344 para: &ParagraphInfo,
345 line: Range<usize>,
346 ) -> (Vec<Level>, Vec<LevelRun>) {
347 let levels = self.reordered_levels(para, line.clone());
348 visual_runs_for_line(levels, &line)
349 }
350
351 /// If processed text has any computed RTL levels
352 ///
353 /// This information is usually used to skip re-ordering of text when no RTL level is present
354 #[inline]
has_rtl(&self) -> bool355 pub fn has_rtl(&self) -> bool {
356 level::has_rtl(&self.levels)
357 }
358 }
359
360 /// Bidi information of text treated as a single paragraph.
361 ///
362 /// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text. If a
363 /// character is multiple code units wide, then its class and level will appear multiple times in these
364 /// vectors.
365 #[derive(Debug, PartialEq)]
366 pub struct ParagraphBidiInfo<'text> {
367 /// The text
368 pub text: &'text [u16],
369
370 /// The BidiClass of the character at each byte in the text.
371 pub original_classes: Vec<BidiClass>,
372
373 /// The directional embedding level of each byte in the text.
374 pub levels: Vec<Level>,
375
376 /// The paragraph embedding level.
377 pub paragraph_level: Level,
378
379 /// Whether the paragraph is purely LTR.
380 pub is_pure_ltr: bool,
381 }
382
383 impl<'text> ParagraphBidiInfo<'text> {
384 /// Determine the bidi embedding level.
385 ///
386 ///
387 /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
388 ///
389 /// TODO: In early steps, check for special cases that allow later steps to be skipped. like
390 /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
391 ///
392 /// TODO: Support auto-RTL base direction
393 #[cfg_attr(feature = "flame_it", flamer::flame)]
394 #[cfg(feature = "hardcoded-data")]
395 #[inline]
new(text: &[u16], default_para_level: Option<Level>) -> ParagraphBidiInfo<'_>396 pub fn new(text: &[u16], default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
397 Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
398 }
399
400 /// Determine the bidi embedding level, with a custom [`BidiDataSource`]
401 /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
402 /// instead (enabled with tbe default `hardcoded-data` Cargo feature).
403 ///
404 /// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
405 /// and should be kept in sync with it.
406 #[cfg_attr(feature = "flame_it", flamer::flame)]
new_with_data_source<'a, D: BidiDataSource>( data_source: &D, text: &'a [u16], default_para_level: Option<Level>, ) -> ParagraphBidiInfo<'a>407 pub fn new_with_data_source<'a, D: BidiDataSource>(
408 data_source: &D,
409 text: &'a [u16],
410 default_para_level: Option<Level>,
411 ) -> ParagraphBidiInfo<'a> {
412 // Here we could create a ParagraphInitialInfo struct to parallel the one
413 // used by BidiInfo, but there doesn't seem any compelling reason for it.
414 let (original_classes, paragraph_level, is_pure_ltr) =
415 compute_initial_info(data_source, text, default_para_level, None);
416
417 let mut levels = Vec::<Level>::with_capacity(text.len());
418 let mut processing_classes = original_classes.clone();
419
420 let para_info = ParagraphInfo {
421 range: Range {
422 start: 0,
423 end: text.len(),
424 },
425 level: paragraph_level,
426 };
427
428 compute_bidi_info_for_para(
429 data_source,
430 ¶_info,
431 is_pure_ltr,
432 text,
433 &original_classes,
434 &mut processing_classes,
435 &mut levels,
436 );
437
438 ParagraphBidiInfo {
439 text,
440 original_classes,
441 levels,
442 paragraph_level,
443 is_pure_ltr,
444 }
445 }
446
447 /// Produce the levels for this paragraph as needed for reordering, one level per *code unit*
448 /// in the paragraph. The returned vector includes code units that are not included
449 /// in the `line`, but will not adjust them.
450 ///
451 /// See BidiInfo::reordered_levels for details.
452 ///
453 /// (This should be kept in sync with BidiInfo::reordered_levels.)
454 #[cfg_attr(feature = "flame_it", flamer::flame)]
reordered_levels(&self, line: Range<usize>) -> Vec<Level>455 pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
456 assert!(line.start <= self.levels.len());
457 assert!(line.end <= self.levels.len());
458
459 let mut levels = self.levels.clone();
460 let line_classes = &self.original_classes[line.clone()];
461 let line_levels = &mut levels[line.clone()];
462
463 reorder_levels(
464 line_classes,
465 line_levels,
466 self.text.subrange(line),
467 self.paragraph_level,
468 );
469
470 levels
471 }
472
473 /// Produce the levels for this paragraph as needed for reordering, one level per *character*
474 /// in the paragraph. The returned vector includes characters that are not included
475 /// in the `line`, but will not adjust them.
476 ///
477 /// See BidiInfo::reordered_levels_per_char for details.
478 ///
479 /// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
480 #[cfg_attr(feature = "flame_it", flamer::flame)]
reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level>481 pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
482 let levels = self.reordered_levels(line);
483 self.text.char_indices().map(|(i, _)| levels[i]).collect()
484 }
485
486 /// Re-order a line based on resolved levels and return the line in display order.
487 ///
488 /// See BidiInfo::reorder_line for details.
489 ///
490 /// (This should be kept in sync with BidiInfo::reorder_line.)
491 #[cfg_attr(feature = "flame_it", flamer::flame)]
reorder_line(&self, line: Range<usize>) -> Cow<'text, [u16]>492 pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, [u16]> {
493 if !level::has_rtl(&self.levels[line.clone()]) {
494 return self.text[line].into();
495 }
496 let (levels, runs) = self.visual_runs(line.clone());
497 reorder_line(self.text, line, levels, runs)
498 }
499
500 /// Reorders pre-calculated levels of a sequence of characters.
501 ///
502 /// See BidiInfo::reorder_visual for details.
503 #[cfg_attr(feature = "flame_it", flamer::flame)]
504 #[inline]
reorder_visual(levels: &[Level]) -> Vec<usize>505 pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
506 reorder_visual(levels)
507 }
508
509 /// Find the level runs within a line and return them in visual order.
510 ///
511 /// `line` is a range of code-unit indices within `levels`.
512 ///
513 /// See `BidiInfo::visual_runs` for details.
514 ///
515 /// (This should be kept in sync with BidiInfo::visual_runs.)
516 #[cfg_attr(feature = "flame_it", flamer::flame)]
517 #[inline]
visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>)518 pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
519 let levels = self.reordered_levels(line.clone());
520 visual_runs_for_line(levels, &line)
521 }
522
523 /// If processed text has any computed RTL levels
524 ///
525 /// This information is usually used to skip re-ordering of text when no RTL level is present
526 #[inline]
has_rtl(&self) -> bool527 pub fn has_rtl(&self) -> bool {
528 !self.is_pure_ltr
529 }
530
531 /// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
532 #[inline]
direction(&self) -> Direction533 pub fn direction(&self) -> Direction {
534 para_direction(&self.levels)
535 }
536 }
537
538 /// Return a line of the text in display order based on resolved levels.
539 ///
540 /// `text` the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
541 /// `line` a range of byte indices within `text` corresponding to one line
542 /// `levels` array of `Level` values, with `line`'s levels reordered into visual order
543 /// `runs` array of `LevelRun`s in visual order
544 ///
545 /// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
546 /// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
547 ///
548 /// Returns: the reordered text of the line.
549 ///
550 /// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
551 ///
552 /// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
553 /// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
reorder_line<'text>( text: &'text [u16], line: Range<usize>, levels: Vec<Level>, runs: Vec<LevelRun>, ) -> Cow<'text, [u16]>554 fn reorder_line<'text>(
555 text: &'text [u16],
556 line: Range<usize>,
557 levels: Vec<Level>,
558 runs: Vec<LevelRun>,
559 ) -> Cow<'text, [u16]> {
560 // If all isolating run sequences are LTR, no reordering is needed
561 if runs.iter().all(|run| levels[run.start].is_ltr()) {
562 return text[line].into();
563 }
564
565 let mut result = Vec::<u16>::with_capacity(line.len());
566 for run in runs {
567 if levels[run.start].is_rtl() {
568 let mut buf = [0; 2];
569 for c in text[run].chars().rev() {
570 result.extend(c.encode_utf16(&mut buf).iter());
571 }
572 } else {
573 result.extend(text[run].iter());
574 }
575 }
576 result.into()
577 }
578
579 /// Contains a reference of `BidiInfo` and one of its `paragraphs`.
580 /// And it supports all operation in the `Paragraph` that needs also its
581 /// `BidiInfo` such as `direction`.
582 #[derive(Debug)]
583 pub struct Paragraph<'a, 'text> {
584 pub info: &'a BidiInfo<'text>,
585 pub para: &'a ParagraphInfo,
586 }
587
588 impl<'a, 'text> Paragraph<'a, 'text> {
589 #[inline]
new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text>590 pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
591 Paragraph { info, para }
592 }
593
594 /// Returns if the paragraph is Left direction, right direction or mixed.
595 #[inline]
direction(&self) -> Direction596 pub fn direction(&self) -> Direction {
597 para_direction(&self.info.levels[self.para.range.clone()])
598 }
599
600 /// Returns the `Level` of a certain character in the paragraph.
601 #[inline]
level_at(&self, pos: usize) -> Level602 pub fn level_at(&self, pos: usize) -> Level {
603 let actual_position = self.para.range.start + pos;
604 self.info.levels[actual_position]
605 }
606 }
607
608 /// Implementation of TextSource for UTF-16 text in a [u16] array.
609 /// Note that there could be unpaired surrogates present!
610
611 // Convenience functions to check whether a UTF16 code unit is a surrogate.
612 #[inline]
is_high_surrogate(code: u16) -> bool613 fn is_high_surrogate(code: u16) -> bool {
614 (code & 0xFC00) == 0xD800
615 }
616 #[inline]
is_low_surrogate(code: u16) -> bool617 fn is_low_surrogate(code: u16) -> bool {
618 (code & 0xFC00) == 0xDC00
619 }
620
621 impl<'text> TextSource<'text> for [u16] {
622 type CharIter = Utf16CharIter<'text>;
623 type CharIndexIter = Utf16CharIndexIter<'text>;
624 type IndexLenIter = Utf16IndexLenIter<'text>;
625
626 #[inline]
len(&self) -> usize627 fn len(&self) -> usize {
628 (self as &[u16]).len()
629 }
char_at(&self, index: usize) -> Option<(char, usize)>630 fn char_at(&self, index: usize) -> Option<(char, usize)> {
631 if index >= self.len() {
632 return None;
633 }
634 // Get the indicated code unit and try simply converting it to a char;
635 // this will fail if it is half of a surrogate pair.
636 let c = self[index];
637 if let Some(ch) = char::from_u32(c.into()) {
638 return Some((ch, 1));
639 }
640 // If it's a low surrogate, and was immediately preceded by a high surrogate,
641 // then we're in the middle of a (valid) character, and should return None.
642 if is_low_surrogate(c) && index > 0 && is_high_surrogate(self[index - 1]) {
643 return None;
644 }
645 // Otherwise, try to decode, returning REPLACEMENT_CHARACTER for errors.
646 if let Some(ch) = char::decode_utf16(self[index..].iter().cloned()).next() {
647 if let Ok(ch) = ch {
648 // This must be a surrogate pair, otherwise char::from_u32() above should
649 // have succeeded!
650 debug_assert!(ch.len_utf16() == 2, "BMP should have already been handled");
651 return Some((ch, ch.len_utf16()));
652 }
653 } else {
654 debug_assert!(
655 false,
656 "Why did decode_utf16 return None when we're not at the end?"
657 );
658 return None;
659 }
660 // Failed to decode UTF-16: we must have encountered an unpaired surrogate.
661 // Return REPLACEMENT_CHARACTER (not None), to continue processing the following text
662 // and keep indexing correct.
663 Some((char::REPLACEMENT_CHARACTER, 1))
664 }
665 #[inline]
subrange(&self, range: Range<usize>) -> &Self666 fn subrange(&self, range: Range<usize>) -> &Self {
667 &(self as &[u16])[range]
668 }
669 #[inline]
chars(&'text self) -> Self::CharIter670 fn chars(&'text self) -> Self::CharIter {
671 Utf16CharIter::new(&self)
672 }
673 #[inline]
char_indices(&'text self) -> Self::CharIndexIter674 fn char_indices(&'text self) -> Self::CharIndexIter {
675 Utf16CharIndexIter::new(&self)
676 }
677 #[inline]
indices_lengths(&'text self) -> Self::IndexLenIter678 fn indices_lengths(&'text self) -> Self::IndexLenIter {
679 Utf16IndexLenIter::new(&self)
680 }
681 #[inline]
char_len(ch: char) -> usize682 fn char_len(ch: char) -> usize {
683 ch.len_utf16()
684 }
685 }
686
687 /// Iterator over UTF-16 text in a [u16] slice, returning (index, char_len) tuple.
688 #[derive(Debug)]
689 pub struct Utf16IndexLenIter<'text> {
690 text: &'text [u16],
691 cur_pos: usize,
692 }
693
694 impl<'text> Utf16IndexLenIter<'text> {
695 #[inline]
new(text: &'text [u16]) -> Self696 pub fn new(text: &'text [u16]) -> Self {
697 Utf16IndexLenIter { text, cur_pos: 0 }
698 }
699 }
700
701 impl Iterator for Utf16IndexLenIter<'_> {
702 type Item = (usize, usize);
703
704 #[inline]
next(&mut self) -> Option<Self::Item>705 fn next(&mut self) -> Option<Self::Item> {
706 if let Some((_, char_len)) = self.text.char_at(self.cur_pos) {
707 let result = (self.cur_pos, char_len);
708 self.cur_pos += char_len;
709 return Some(result);
710 }
711 None
712 }
713 }
714
715 /// Iterator over UTF-16 text in a [u16] slice, returning (index, char) tuple.
716 #[derive(Debug)]
717 pub struct Utf16CharIndexIter<'text> {
718 text: &'text [u16],
719 cur_pos: usize,
720 }
721
722 impl<'text> Utf16CharIndexIter<'text> {
new(text: &'text [u16]) -> Self723 pub fn new(text: &'text [u16]) -> Self {
724 Utf16CharIndexIter { text, cur_pos: 0 }
725 }
726 }
727
728 impl Iterator for Utf16CharIndexIter<'_> {
729 type Item = (usize, char);
730
next(&mut self) -> Option<Self::Item>731 fn next(&mut self) -> Option<Self::Item> {
732 if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
733 let result = (self.cur_pos, ch);
734 self.cur_pos += char_len;
735 return Some(result);
736 }
737 None
738 }
739 }
740
741 /// Iterator over UTF-16 text in a [u16] slice, returning Unicode chars.
742 /// (Unlike the other iterators above, this also supports reverse iteration.)
743 #[derive(Debug)]
744 pub struct Utf16CharIter<'text> {
745 text: &'text [u16],
746 cur_pos: usize,
747 end_pos: usize,
748 }
749
750 impl<'text> Utf16CharIter<'text> {
new(text: &'text [u16]) -> Self751 pub fn new(text: &'text [u16]) -> Self {
752 Utf16CharIter {
753 text,
754 cur_pos: 0,
755 end_pos: text.len(),
756 }
757 }
758 }
759
760 impl Iterator for Utf16CharIter<'_> {
761 type Item = char;
762
next(&mut self) -> Option<Self::Item>763 fn next(&mut self) -> Option<Self::Item> {
764 if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
765 self.cur_pos += char_len;
766 return Some(ch);
767 }
768 None
769 }
770 }
771
772 impl DoubleEndedIterator for Utf16CharIter<'_> {
next_back(&mut self) -> Option<Self::Item>773 fn next_back(&mut self) -> Option<Self::Item> {
774 if self.end_pos <= self.cur_pos {
775 return None;
776 }
777 self.end_pos -= 1;
778 if let Some(ch) = char::from_u32(self.text[self.end_pos] as u32) {
779 return Some(ch);
780 }
781 if self.end_pos > self.cur_pos {
782 if let Some((ch, char_len)) = self.text.char_at(self.end_pos - 1) {
783 if char_len == 2 {
784 self.end_pos -= 1;
785 return Some(ch);
786 }
787 }
788 }
789 Some(char::REPLACEMENT_CHARACTER)
790 }
791 }
792