1 //! The [cmap](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap) table
2 
3 include!("../../generated/generated_cmap.rs");
4 
5 /// Result of mapping a codepoint with a variation selector.
6 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
7 pub enum MapVariant {
8     /// The variation selector should be ignored and the default mapping
9     /// of the character should be used.
10     UseDefault,
11     /// The variant glyph mapped by a codepoint and associated variation
12     /// selector.
13     Variant(GlyphId),
14 }
15 
16 impl<'a> Cmap<'a> {
17     /// Map a codepoint to a nominal glyph identifier
18     ///
19     /// This uses the first available subtable that provides a valid mapping.
20     ///
21     /// # Note:
22     ///
23     /// Mapping logic is currently only implemented for the most common subtable
24     /// formats.
map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId>25     pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
26         let codepoint = codepoint.into();
27         for record in self.encoding_records() {
28             if let Ok(subtable) = record.subtable(self.offset_data()) {
29                 if let Some(gid) = match subtable {
30                     CmapSubtable::Format4(format4) => format4.map_codepoint(codepoint),
31                     CmapSubtable::Format12(format12) => format12.map_codepoint(codepoint),
32                     _ => None,
33                 } {
34                     return Some(gid);
35                 }
36             }
37         }
38         None
39     }
40 }
41 
42 impl<'a> Cmap4<'a> {
43     /// Maps a codepoint to a nominal glyph identifier.
map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId>44     pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
45         let codepoint = codepoint.into();
46         if codepoint > 0xFFFF {
47             return None;
48         }
49         let codepoint = codepoint as u16;
50         let mut lo = 0;
51         let mut hi = self.seg_count_x2() as usize / 2;
52         let start_codes = self.start_code();
53         let end_codes = self.end_code();
54         while lo < hi {
55             let i = (lo + hi) / 2;
56             let start_code = start_codes.get(i)?.get();
57             if codepoint < start_code {
58                 hi = i;
59             } else if codepoint > end_codes.get(i)?.get() {
60                 lo = i + 1;
61             } else {
62                 return self.lookup_glyph_id(codepoint, i, start_code);
63             }
64         }
65         None
66     }
67 
68     /// Returns an iterator over all (codepoint, glyph identifier) pairs
69     /// in the subtable.
iter(&self) -> Cmap4Iter<'a>70     pub fn iter(&self) -> Cmap4Iter<'a> {
71         Cmap4Iter::new(self.clone())
72     }
73 
74     /// Does the final phase of glyph id lookup.
75     ///
76     /// Shared between Self::map and Cmap4Iter.
lookup_glyph_id(&self, codepoint: u16, index: usize, start_code: u16) -> Option<GlyphId>77     fn lookup_glyph_id(&self, codepoint: u16, index: usize, start_code: u16) -> Option<GlyphId> {
78         let deltas = self.id_delta();
79         let range_offsets = self.id_range_offsets();
80         let delta = deltas.get(index)?.get() as i32;
81         let range_offset = range_offsets.get(index)?.get() as usize;
82         if range_offset == 0 {
83             return Some(GlyphId::new((codepoint as i32 + delta) as u16));
84         }
85         let mut offset = range_offset / 2 + (codepoint - start_code) as usize;
86         offset = offset.saturating_sub(range_offsets.len() - index);
87         let gid = self.glyph_id_array().get(offset)?.get();
88         (gid != 0).then_some(GlyphId::new((gid as i32 + delta) as u16))
89     }
90 
91     /// Returns the [start_code, end_code] range at the given index.
code_range(&self, index: usize) -> Option<Range<u32>>92     fn code_range(&self, index: usize) -> Option<Range<u32>> {
93         // Extend to u32 to ensure we don't overflow on the end + 1 bound
94         // below.
95         let start = self.start_code().get(index)?.get() as u32;
96         let end = self.end_code().get(index)?.get() as u32;
97         // Use end + 1 here because the range in the table is inclusive
98         Some(start..end + 1)
99     }
100 }
101 
102 /// Iterator over all (codepoint, glyph identifier) pairs in
103 /// the subtable.
104 #[derive(Clone)]
105 pub struct Cmap4Iter<'a> {
106     subtable: Cmap4<'a>,
107     cur_range: std::ops::Range<u32>,
108     cur_start_code: u16,
109     cur_range_ix: usize,
110 }
111 
112 impl<'a> Cmap4Iter<'a> {
new(subtable: Cmap4<'a>) -> Self113     fn new(subtable: Cmap4<'a>) -> Self {
114         let cur_range = subtable.code_range(0).unwrap_or_default();
115         let cur_start_code = cur_range.start as u16;
116         Self {
117             subtable,
118             cur_range,
119             cur_start_code,
120             cur_range_ix: 0,
121         }
122     }
123 }
124 
125 impl<'a> Iterator for Cmap4Iter<'a> {
126     type Item = (u32, GlyphId);
127 
next(&mut self) -> Option<Self::Item>128     fn next(&mut self) -> Option<Self::Item> {
129         loop {
130             if let Some(codepoint) = self.cur_range.next() {
131                 let glyph_id = self.subtable.lookup_glyph_id(
132                     codepoint as u16,
133                     self.cur_range_ix,
134                     self.cur_start_code,
135                 )?;
136                 // The table might explicitly map some codepoints to 0. Avoid
137                 // returning those here.
138                 if glyph_id == GlyphId::NOTDEF {
139                     continue;
140                 }
141                 return Some((codepoint, glyph_id));
142             } else {
143                 self.cur_range_ix += 1;
144                 self.cur_range = self.subtable.code_range(self.cur_range_ix)?;
145                 self.cur_start_code = self.cur_range.start as u16;
146             }
147         }
148     }
149 }
150 
151 impl<'a> Cmap12<'a> {
152     /// Maps a codepoint to a nominal glyph identifier.
map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId>153     pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> {
154         let codepoint = codepoint.into();
155         let groups = self.groups();
156         let mut lo = 0;
157         let mut hi = groups.len();
158         while lo < hi {
159             let i = (lo + hi) / 2;
160             let group = groups.get(i)?;
161             if codepoint < group.start_char_code() {
162                 hi = i;
163             } else if codepoint > group.end_char_code() {
164                 lo = i + 1;
165             } else {
166                 return Some(self.lookup_glyph_id(
167                     codepoint,
168                     group.start_char_code(),
169                     group.start_glyph_id(),
170                 ));
171             }
172         }
173         None
174     }
175 
176     /// Returns an iterator over all (codepoint, glyph identifier) pairs
177     /// in the subtable.
iter(&self) -> Cmap12Iter<'a>178     pub fn iter(&self) -> Cmap12Iter<'a> {
179         Cmap12Iter::new(self.clone())
180     }
181 
182     /// Does the final phase of glyph id lookup.
183     ///
184     /// Shared between Self::map and Cmap12Iter.
lookup_glyph_id( &self, codepoint: u32, start_char_code: u32, start_glyph_id: u32, ) -> GlyphId185     fn lookup_glyph_id(
186         &self,
187         codepoint: u32,
188         start_char_code: u32,
189         start_glyph_id: u32,
190     ) -> GlyphId {
191         GlyphId::new(start_glyph_id.wrapping_add(codepoint.wrapping_sub(start_char_code)) as u16)
192     }
193 
194     /// Returns the codepoint range and start glyph id for the group
195     /// at the given index.
group(&self, index: usize) -> Option<(Range<u32>, u32)>196     fn group(&self, index: usize) -> Option<(Range<u32>, u32)> {
197         let group = self.groups().get(index)?;
198         Some((
199             // Use + 1 here because the group range is inclusive
200             group.start_char_code()..group.end_char_code() + 1,
201             group.start_glyph_id(),
202         ))
203     }
204 }
205 
206 /// Iterator over all (codepoint, glyph identifier) pairs in
207 /// the subtable.
208 #[derive(Clone)]
209 pub struct Cmap12Iter<'a> {
210     subtable: Cmap12<'a>,
211     cur_range: Range<u32>,
212     cur_start_code: u32,
213     cur_start_glyph_id: u32,
214     cur_range_ix: usize,
215 }
216 
217 impl<'a> Cmap12Iter<'a> {
new(subtable: Cmap12<'a>) -> Self218     fn new(subtable: Cmap12<'a>) -> Self {
219         let (cur_range, cur_start_glyph_id) = subtable.group(0).unwrap_or_default();
220         let cur_start_code = cur_range.start;
221         Self {
222             subtable,
223             cur_range,
224             cur_start_code,
225             cur_start_glyph_id,
226             cur_range_ix: 0,
227         }
228     }
229 }
230 
231 impl<'a> Iterator for Cmap12Iter<'a> {
232     type Item = (u32, GlyphId);
233 
next(&mut self) -> Option<Self::Item>234     fn next(&mut self) -> Option<Self::Item> {
235         loop {
236             if let Some(codepoint) = self.cur_range.next() {
237                 let glyph_id = self.subtable.lookup_glyph_id(
238                     codepoint,
239                     self.cur_start_code,
240                     self.cur_start_glyph_id,
241                 );
242                 // The table might explicitly map some codepoints to 0. Avoid
243                 // returning those here.
244                 if glyph_id == GlyphId::NOTDEF {
245                     continue;
246                 }
247                 return Some((codepoint, glyph_id));
248             } else {
249                 self.cur_range_ix += 1;
250                 (self.cur_range, self.cur_start_glyph_id) =
251                     self.subtable.group(self.cur_range_ix)?;
252                 self.cur_start_code = self.cur_range.start;
253             }
254         }
255     }
256 }
257 
258 impl<'a> Cmap14<'a> {
259     /// Maps a codepoint and variation selector to a nominal glyph identifier.
map_variant( &self, codepoint: impl Into<u32>, selector: impl Into<u32>, ) -> Option<MapVariant>260     pub fn map_variant(
261         &self,
262         codepoint: impl Into<u32>,
263         selector: impl Into<u32>,
264     ) -> Option<MapVariant> {
265         let codepoint = codepoint.into();
266         let selector = selector.into();
267         let selector_records = self.var_selector();
268         // Variation selector records are sorted in order of var_selector. Binary search to find
269         // the appropriate record.
270         let selector_record = selector_records
271             .binary_search_by(|rec| {
272                 let rec_selector: u32 = rec.var_selector().into();
273                 rec_selector.cmp(&selector)
274             })
275             .ok()
276             .and_then(|idx| selector_records.get(idx))?;
277         // If a default UVS table is present in this selector record, binary search on the ranges
278         // (start_unicode_value, start_unicode_value + additional_count) to find the requested codepoint.
279         // If found, ignore the selector and return a value indicating that the default cmap mapping
280         // should be used.
281         if let Some(Ok(default_uvs)) = selector_record.default_uvs(self.offset_data()) {
282             use core::cmp::Ordering;
283             let found_default_uvs = default_uvs
284                 .ranges()
285                 .binary_search_by(|range| {
286                     let start = range.start_unicode_value().into();
287                     if codepoint < start {
288                         Ordering::Greater
289                     } else if codepoint > (start + range.additional_count() as u32) {
290                         Ordering::Less
291                     } else {
292                         Ordering::Equal
293                     }
294                 })
295                 .is_ok();
296             if found_default_uvs {
297                 return Some(MapVariant::UseDefault);
298             }
299         }
300         // Binary search the non-default UVS table if present. This maps codepoint+selector to a variant glyph.
301         let non_default_uvs = selector_record.non_default_uvs(self.offset_data())?.ok()?;
302         let mapping = non_default_uvs.uvs_mapping();
303         let ix = mapping
304             .binary_search_by(|map| {
305                 let map_codepoint: u32 = map.unicode_value().into();
306                 map_codepoint.cmp(&codepoint)
307             })
308             .ok()?;
309         Some(MapVariant::Variant(GlyphId::new(
310             mapping.get(ix)?.glyph_id(),
311         )))
312     }
313 
314     /// Returns an iterator over all (codepoint, selector, mapping variant)
315     /// triples in the subtable.
iter(&self) -> Cmap14Iter<'a>316     pub fn iter(&self) -> Cmap14Iter<'a> {
317         Cmap14Iter::new(self.clone())
318     }
319 
selector( &self, index: usize, ) -> ( Option<VariationSelector>, Option<DefaultUvs<'a>>, Option<NonDefaultUvs<'a>>, )320     fn selector(
321         &self,
322         index: usize,
323     ) -> (
324         Option<VariationSelector>,
325         Option<DefaultUvs<'a>>,
326         Option<NonDefaultUvs<'a>>,
327     ) {
328         let selector = self.var_selector().get(index).cloned();
329         let default_uvs = selector.as_ref().and_then(|selector| {
330             selector
331                 .default_uvs(self.offset_data())
332                 .transpose()
333                 .ok()
334                 .flatten()
335         });
336         let non_default_uvs = selector.as_ref().and_then(|selector| {
337             selector
338                 .non_default_uvs(self.offset_data())
339                 .transpose()
340                 .ok()
341                 .flatten()
342         });
343         (selector, default_uvs, non_default_uvs)
344     }
345 }
346 
347 /// Iterator over all (codepoint, selector, mapping variant) triples
348 /// in the subtable.
349 #[derive(Clone)]
350 pub struct Cmap14Iter<'a> {
351     subtable: Cmap14<'a>,
352     selector_record: Option<VariationSelector>,
353     default_uvs: Option<DefaultUvsIter<'a>>,
354     non_default_uvs: Option<NonDefaultUvsIter<'a>>,
355     cur_selector_ix: usize,
356 }
357 
358 impl<'a> Cmap14Iter<'a> {
new(subtable: Cmap14<'a>) -> Self359     fn new(subtable: Cmap14<'a>) -> Self {
360         let (selector_record, default_uvs, non_default_uvs) = subtable.selector(0);
361         Self {
362             subtable,
363             selector_record,
364             default_uvs: default_uvs.map(DefaultUvsIter::new),
365             non_default_uvs: non_default_uvs.map(NonDefaultUvsIter::new),
366             cur_selector_ix: 0,
367         }
368     }
369 }
370 
371 impl<'a> Iterator for Cmap14Iter<'a> {
372     type Item = (u32, u32, MapVariant);
373 
next(&mut self) -> Option<Self::Item>374     fn next(&mut self) -> Option<Self::Item> {
375         loop {
376             let selector_record = self.selector_record.as_ref()?;
377             let selector: u32 = selector_record.var_selector().into();
378             if let Some(default_uvs) = self.default_uvs.as_mut() {
379                 if let Some(codepoint) = default_uvs.next() {
380                     return Some((codepoint, selector, MapVariant::UseDefault));
381                 }
382             }
383             if let Some(non_default_uvs) = self.non_default_uvs.as_mut() {
384                 if let Some((codepoint, variant)) = non_default_uvs.next() {
385                     return Some((codepoint, selector, MapVariant::Variant(variant)));
386                 }
387             }
388             self.cur_selector_ix += 1;
389             let (selector_record, default_uvs, non_default_uvs) =
390                 self.subtable.selector(self.cur_selector_ix);
391             self.selector_record = selector_record;
392             self.default_uvs = default_uvs.map(DefaultUvsIter::new);
393             self.non_default_uvs = non_default_uvs.map(NonDefaultUvsIter::new);
394         }
395     }
396 }
397 
398 #[derive(Clone)]
399 struct DefaultUvsIter<'a> {
400     ranges: std::slice::Iter<'a, UnicodeRange>,
401     cur_range: Range<u32>,
402 }
403 
404 impl<'a> DefaultUvsIter<'a> {
new(ranges: DefaultUvs<'a>) -> Self405     fn new(ranges: DefaultUvs<'a>) -> Self {
406         let mut ranges = ranges.ranges().iter();
407         let cur_range = if let Some(range) = ranges.next() {
408             let start: u32 = range.start_unicode_value().into();
409             let end = start + range.additional_count() as u32 + 1;
410             start..end
411         } else {
412             0..0
413         };
414         Self { ranges, cur_range }
415     }
416 }
417 
418 impl<'a> Iterator for DefaultUvsIter<'a> {
419     type Item = u32;
420 
next(&mut self) -> Option<Self::Item>421     fn next(&mut self) -> Option<Self::Item> {
422         loop {
423             if let Some(codepoint) = self.cur_range.next() {
424                 return Some(codepoint);
425             }
426             let range = self.ranges.next()?;
427             let start: u32 = range.start_unicode_value().into();
428             let end = start + range.additional_count() as u32 + 1;
429             self.cur_range = start..end;
430         }
431     }
432 }
433 
434 #[derive(Clone)]
435 struct NonDefaultUvsIter<'a> {
436     iter: std::slice::Iter<'a, UvsMapping>,
437 }
438 
439 impl<'a> NonDefaultUvsIter<'a> {
new(uvs: NonDefaultUvs<'a>) -> Self440     fn new(uvs: NonDefaultUvs<'a>) -> Self {
441         Self {
442             iter: uvs.uvs_mapping().iter(),
443         }
444     }
445 }
446 
447 impl<'a> Iterator for NonDefaultUvsIter<'a> {
448     type Item = (u32, GlyphId);
449 
next(&mut self) -> Option<Self::Item>450     fn next(&mut self) -> Option<Self::Item> {
451         let mapping = self.iter.next()?;
452         let codepoint: u32 = mapping.unicode_value().into();
453         let glyph_id = GlyphId::new(mapping.glyph_id());
454         Some((codepoint, glyph_id))
455     }
456 }
457 
458 #[cfg(test)]
459 mod tests {
460     use super::*;
461     use crate::{FontRef, GlyphId, TableProvider};
462 
463     #[test]
map_codepoints()464     fn map_codepoints() {
465         let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
466         let cmap = font.cmap().unwrap();
467         assert_eq!(cmap.map_codepoint('A'), Some(GlyphId::new(1)));
468         assert_eq!(cmap.map_codepoint('À'), Some(GlyphId::new(2)));
469         assert_eq!(cmap.map_codepoint('`'), Some(GlyphId::new(3)));
470         assert_eq!(cmap.map_codepoint('B'), None);
471 
472         let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
473         let cmap = font.cmap().unwrap();
474         assert_eq!(cmap.map_codepoint(' '), Some(GlyphId::new(1)));
475         assert_eq!(cmap.map_codepoint(0xE_u32), Some(GlyphId::new(2)));
476         assert_eq!(cmap.map_codepoint('B'), None);
477     }
478 
479     #[test]
map_variants()480     fn map_variants() {
481         use super::MapVariant::*;
482         let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
483         let cmap = font.cmap().unwrap();
484         let cmap14 = find_cmap14(&cmap).unwrap();
485         let selector = '\u{e0100}';
486         assert_eq!(cmap14.map_variant('a', selector), None);
487         assert_eq!(cmap14.map_variant('\u{4e00}', selector), Some(UseDefault));
488         assert_eq!(cmap14.map_variant('\u{4e06}', selector), Some(UseDefault));
489         assert_eq!(
490             cmap14.map_variant('\u{4e08}', selector),
491             Some(Variant(GlyphId::new(25)))
492         );
493         assert_eq!(
494             cmap14.map_variant('\u{4e09}', selector),
495             Some(Variant(GlyphId::new(26)))
496         );
497     }
498 
499     #[test]
cmap4_iter()500     fn cmap4_iter() {
501         let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap();
502         let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
503         let mut count = 0;
504         for (codepoint, glyph_id) in cmap4.iter() {
505             assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
506             count += 1;
507         }
508         assert_eq!(count, 3);
509         let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap();
510         let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap();
511         let mut count = 0;
512         for (codepoint, glyph_id) in cmap4.iter() {
513             assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id));
514             count += 1;
515         }
516         assert_eq!(count, 2);
517     }
518 
519     #[test]
cmap12_iter()520     fn cmap12_iter() {
521         let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap();
522         let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap();
523         let mut count = 0;
524         for (codepoint, glyph_id) in cmap12.iter() {
525             assert_eq!(cmap12.map_codepoint(codepoint), Some(glyph_id));
526             count += 1;
527         }
528         assert_eq!(count, 10);
529     }
530 
531     #[test]
cmap14_iter()532     fn cmap14_iter() {
533         let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap();
534         let cmap14 = find_cmap14(&font.cmap().unwrap()).unwrap();
535         let mut count = 0;
536         for (codepoint, selector, mapping) in cmap14.iter() {
537             assert_eq!(cmap14.map_variant(codepoint, selector), Some(mapping));
538             count += 1;
539         }
540         assert_eq!(count, 7);
541     }
542 
find_cmap4<'a>(cmap: &Cmap<'a>) -> Option<Cmap4<'a>>543     fn find_cmap4<'a>(cmap: &Cmap<'a>) -> Option<Cmap4<'a>> {
544         cmap.encoding_records()
545             .iter()
546             .filter_map(|record| record.subtable(cmap.offset_data()).ok())
547             .find_map(|subtable| match subtable {
548                 CmapSubtable::Format4(cmap4) => Some(cmap4),
549                 _ => None,
550             })
551     }
552 
find_cmap12<'a>(cmap: &Cmap<'a>) -> Option<Cmap12<'a>>553     fn find_cmap12<'a>(cmap: &Cmap<'a>) -> Option<Cmap12<'a>> {
554         cmap.encoding_records()
555             .iter()
556             .filter_map(|record| record.subtable(cmap.offset_data()).ok())
557             .find_map(|subtable| match subtable {
558                 CmapSubtable::Format12(cmap12) => Some(cmap12),
559                 _ => None,
560             })
561     }
562 
find_cmap14<'a>(cmap: &Cmap<'a>) -> Option<Cmap14<'a>>563     fn find_cmap14<'a>(cmap: &Cmap<'a>) -> Option<Cmap14<'a>> {
564         cmap.encoding_records()
565             .iter()
566             .filter_map(|record| record.subtable(cmap.offset_data()).ok())
567             .find_map(|subtable| match subtable {
568                 CmapSubtable::Format14(cmap14) => Some(cmap14),
569                 _ => None,
570             })
571     }
572 }
573