1 // Copyright 2015 The Servo Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 //! 3.3.2 Explicit Levels and Directions
11 //!
12 //! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
13
14 use alloc::vec::Vec;
15
16 use super::char_data::{
17 is_rtl,
18 BidiClass::{self, *},
19 };
20 use super::level::Level;
21 use super::TextSource;
22
23 /// Compute explicit embedding levels for one paragraph of text (X1-X8).
24 ///
25 /// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
26 /// for each char in `text`.
27 #[cfg_attr(feature = "flame_it", flamer::flame)]
compute<'a, T: TextSource<'a> + ?Sized>( text: &'a T, para_level: Level, original_classes: &[BidiClass], levels: &mut [Level], processing_classes: &mut [BidiClass], )28 pub fn compute<'a, T: TextSource<'a> + ?Sized>(
29 text: &'a T,
30 para_level: Level,
31 original_classes: &[BidiClass],
32 levels: &mut [Level],
33 processing_classes: &mut [BidiClass],
34 ) {
35 assert_eq!(text.len(), original_classes.len());
36
37 // <http://www.unicode.org/reports/tr9/#X1>
38 let mut stack = DirectionalStatusStack::new();
39 stack.push(para_level, OverrideStatus::Neutral);
40
41 let mut overflow_isolate_count = 0u32;
42 let mut overflow_embedding_count = 0u32;
43 let mut valid_isolate_count = 0u32;
44
45 for (i, len) in text.indices_lengths() {
46 match original_classes[i] {
47 // Rules X2-X5c
48 RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
49 let last_level = stack.last().level;
50
51 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
52 levels[i] = last_level;
53
54 // X5a-X5c: Isolate initiators get the level of the last entry on the stack.
55 let is_isolate = match original_classes[i] {
56 RLI | LRI | FSI => true,
57 _ => false,
58 };
59 if is_isolate {
60 // Redundant due to "Retaining explicit formatting characters" step.
61 // levels[i] = last_level;
62 match stack.last().status {
63 OverrideStatus::RTL => processing_classes[i] = R,
64 OverrideStatus::LTR => processing_classes[i] = L,
65 _ => {}
66 }
67 }
68
69 let new_level = if is_rtl(original_classes[i]) {
70 last_level.new_explicit_next_rtl()
71 } else {
72 last_level.new_explicit_next_ltr()
73 };
74 if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
75 {
76 let new_level = new_level.unwrap();
77 stack.push(
78 new_level,
79 match original_classes[i] {
80 RLO => OverrideStatus::RTL,
81 LRO => OverrideStatus::LTR,
82 RLI | LRI | FSI => OverrideStatus::Isolate,
83 _ => OverrideStatus::Neutral,
84 },
85 );
86 if is_isolate {
87 valid_isolate_count += 1;
88 } else {
89 // The spec doesn't explicitly mention this step, but it is necessary.
90 // See the reference implementations for comparison.
91 levels[i] = new_level;
92 }
93 } else if is_isolate {
94 overflow_isolate_count += 1;
95 } else if overflow_isolate_count == 0 {
96 overflow_embedding_count += 1;
97 }
98
99 if !is_isolate {
100 // X9 +
101 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
102 // (PDF handled below)
103 processing_classes[i] = BN;
104 }
105 }
106
107 // <http://www.unicode.org/reports/tr9/#X6a>
108 PDI => {
109 if overflow_isolate_count > 0 {
110 overflow_isolate_count -= 1;
111 } else if valid_isolate_count > 0 {
112 overflow_embedding_count = 0;
113 loop {
114 // Pop everything up to and including the last Isolate status.
115 match stack.vec.pop() {
116 None
117 | Some(Status {
118 status: OverrideStatus::Isolate,
119 ..
120 }) => break,
121 _ => continue,
122 }
123 }
124 valid_isolate_count -= 1;
125 }
126 let last = stack.last();
127 levels[i] = last.level;
128 match last.status {
129 OverrideStatus::RTL => processing_classes[i] = R,
130 OverrideStatus::LTR => processing_classes[i] = L,
131 _ => {}
132 }
133 }
134
135 // <http://www.unicode.org/reports/tr9/#X7>
136 PDF => {
137 if overflow_isolate_count > 0 {
138 // do nothing
139 } else if overflow_embedding_count > 0 {
140 overflow_embedding_count -= 1;
141 } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 {
142 stack.vec.pop();
143 }
144 // <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
145 levels[i] = stack.last().level;
146 // X9 part of retaining explicit formatting characters.
147 processing_classes[i] = BN;
148 }
149
150 // Nothing.
151 // BN case moved down to X6, see <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
152 B => {}
153
154 // <http://www.unicode.org/reports/tr9/#X6>
155 _ => {
156 let last = stack.last();
157 levels[i] = last.level;
158 // This condition is not in the spec, but I am pretty sure that is a spec bug.
159 // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
160 if original_classes[i] != BN {
161 match last.status {
162 OverrideStatus::RTL => processing_classes[i] = R,
163 OverrideStatus::LTR => processing_classes[i] = L,
164 _ => {}
165 }
166 }
167 }
168 }
169
170 // Handle multi-byte characters.
171 for j in 1..len {
172 levels[i + j] = levels[i];
173 processing_classes[i + j] = processing_classes[i];
174 }
175 }
176 }
177
178 /// Entries in the directional status stack:
179 struct Status {
180 level: Level,
181 status: OverrideStatus,
182 }
183
184 #[derive(PartialEq)]
185 enum OverrideStatus {
186 Neutral,
187 RTL,
188 LTR,
189 Isolate,
190 }
191
192 struct DirectionalStatusStack {
193 vec: Vec<Status>,
194 }
195
196 impl DirectionalStatusStack {
new() -> Self197 fn new() -> Self {
198 DirectionalStatusStack {
199 vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2),
200 }
201 }
202
push(&mut self, level: Level, status: OverrideStatus)203 fn push(&mut self, level: Level, status: OverrideStatus) {
204 self.vec.push(Status { level, status });
205 }
206
last(&self) -> &Status207 fn last(&self) -> &Status {
208 self.vec.last().unwrap()
209 }
210 }
211