1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 //! 3.3.2 Explicit Levels and Directions
11 //!
12 //! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
13 
14 use alloc::vec::Vec;
15 
16 use super::char_data::{
17     is_rtl,
18     BidiClass::{self, *},
19 };
20 use super::level::Level;
21 use super::TextSource;
22 
23 /// Compute explicit embedding levels for one paragraph of text (X1-X8).
24 ///
25 /// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
26 /// for each char in `text`.
27 #[cfg_attr(feature = "flame_it", flamer::flame)]
compute<'a, T: TextSource<'a> + ?Sized>( text: &'a T, para_level: Level, original_classes: &[BidiClass], levels: &mut [Level], processing_classes: &mut [BidiClass], )28 pub fn compute<'a, T: TextSource<'a> + ?Sized>(
29     text: &'a T,
30     para_level: Level,
31     original_classes: &[BidiClass],
32     levels: &mut [Level],
33     processing_classes: &mut [BidiClass],
34 ) {
35     assert_eq!(text.len(), original_classes.len());
36 
37     // <http://www.unicode.org/reports/tr9/#X1>
38     let mut stack = DirectionalStatusStack::new();
39     stack.push(para_level, OverrideStatus::Neutral);
40 
41     let mut overflow_isolate_count = 0u32;
42     let mut overflow_embedding_count = 0u32;
43     let mut valid_isolate_count = 0u32;
44 
45     for (i, len) in text.indices_lengths() {
46         match original_classes[i] {
47             // Rules X2-X5c
48             RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
49                 let last_level = stack.last().level;
50 
51                 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
52                 levels[i] = last_level;
53 
54                 // X5a-X5c: Isolate initiators get the level of the last entry on the stack.
55                 let is_isolate = match original_classes[i] {
56                     RLI | LRI | FSI => true,
57                     _ => false,
58                 };
59                 if is_isolate {
60                     // Redundant due to "Retaining explicit formatting characters" step.
61                     // levels[i] = last_level;
62                     match stack.last().status {
63                         OverrideStatus::RTL => processing_classes[i] = R,
64                         OverrideStatus::LTR => processing_classes[i] = L,
65                         _ => {}
66                     }
67                 }
68 
69                 let new_level = if is_rtl(original_classes[i]) {
70                     last_level.new_explicit_next_rtl()
71                 } else {
72                     last_level.new_explicit_next_ltr()
73                 };
74                 if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
75                 {
76                     let new_level = new_level.unwrap();
77                     stack.push(
78                         new_level,
79                         match original_classes[i] {
80                             RLO => OverrideStatus::RTL,
81                             LRO => OverrideStatus::LTR,
82                             RLI | LRI | FSI => OverrideStatus::Isolate,
83                             _ => OverrideStatus::Neutral,
84                         },
85                     );
86                     if is_isolate {
87                         valid_isolate_count += 1;
88                     } else {
89                         // The spec doesn't explicitly mention this step, but it is necessary.
90                         // See the reference implementations for comparison.
91                         levels[i] = new_level;
92                     }
93                 } else if is_isolate {
94                     overflow_isolate_count += 1;
95                 } else if overflow_isolate_count == 0 {
96                     overflow_embedding_count += 1;
97                 }
98 
99                 if !is_isolate {
100                     // X9 +
101                     // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
102                     // (PDF handled below)
103                     processing_classes[i] = BN;
104                 }
105             }
106 
107             // <http://www.unicode.org/reports/tr9/#X6a>
108             PDI => {
109                 if overflow_isolate_count > 0 {
110                     overflow_isolate_count -= 1;
111                 } else if valid_isolate_count > 0 {
112                     overflow_embedding_count = 0;
113                     loop {
114                         // Pop everything up to and including the last Isolate status.
115                         match stack.vec.pop() {
116                             None
117                             | Some(Status {
118                                 status: OverrideStatus::Isolate,
119                                 ..
120                             }) => break,
121                             _ => continue,
122                         }
123                     }
124                     valid_isolate_count -= 1;
125                 }
126                 let last = stack.last();
127                 levels[i] = last.level;
128                 match last.status {
129                     OverrideStatus::RTL => processing_classes[i] = R,
130                     OverrideStatus::LTR => processing_classes[i] = L,
131                     _ => {}
132                 }
133             }
134 
135             // <http://www.unicode.org/reports/tr9/#X7>
136             PDF => {
137                 if overflow_isolate_count > 0 {
138                     // do nothing
139                 } else if overflow_embedding_count > 0 {
140                     overflow_embedding_count -= 1;
141                 } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 {
142                     stack.vec.pop();
143                 }
144                 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
145                 levels[i] = stack.last().level;
146                 // X9 part of retaining explicit formatting characters.
147                 processing_classes[i] = BN;
148             }
149 
150             // Nothing.
151             // BN case moved down to X6, see <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
152             B => {}
153 
154             // <http://www.unicode.org/reports/tr9/#X6>
155             _ => {
156                 let last = stack.last();
157                 levels[i] = last.level;
158                 // This condition is not in the spec, but I am pretty sure that is a spec bug.
159                 // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
160                 if original_classes[i] != BN {
161                     match last.status {
162                         OverrideStatus::RTL => processing_classes[i] = R,
163                         OverrideStatus::LTR => processing_classes[i] = L,
164                         _ => {}
165                     }
166                 }
167             }
168         }
169 
170         // Handle multi-byte characters.
171         for j in 1..len {
172             levels[i + j] = levels[i];
173             processing_classes[i + j] = processing_classes[i];
174         }
175     }
176 }
177 
178 /// Entries in the directional status stack:
179 struct Status {
180     level: Level,
181     status: OverrideStatus,
182 }
183 
184 #[derive(PartialEq)]
185 enum OverrideStatus {
186     Neutral,
187     RTL,
188     LTR,
189     Isolate,
190 }
191 
192 struct DirectionalStatusStack {
193     vec: Vec<Status>,
194 }
195 
196 impl DirectionalStatusStack {
new() -> Self197     fn new() -> Self {
198         DirectionalStatusStack {
199             vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2),
200         }
201     }
202 
push(&mut self, level: Level, status: OverrideStatus)203     fn push(&mut self, level: Level, status: OverrideStatus) {
204         self.vec.push(Status { level, status });
205     }
206 
last(&self) -> &Status207     fn last(&self) -> &Status {
208         self.vec.last().unwrap()
209     }
210 }
211