1 //! The [cmap](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap) table 2 3 include!("../../generated/generated_cmap.rs"); 4 5 /// Result of mapping a codepoint with a variation selector. 6 #[derive(Copy, Clone, PartialEq, Eq, Debug)] 7 pub enum MapVariant { 8 /// The variation selector should be ignored and the default mapping 9 /// of the character should be used. 10 UseDefault, 11 /// The variant glyph mapped by a codepoint and associated variation 12 /// selector. 13 Variant(GlyphId), 14 } 15 16 impl<'a> Cmap<'a> { 17 /// Map a codepoint to a nominal glyph identifier 18 /// 19 /// This uses the first available subtable that provides a valid mapping. 20 /// 21 /// # Note: 22 /// 23 /// Mapping logic is currently only implemented for the most common subtable 24 /// formats. map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId>25 pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> { 26 let codepoint = codepoint.into(); 27 for record in self.encoding_records() { 28 if let Ok(subtable) = record.subtable(self.offset_data()) { 29 if let Some(gid) = match subtable { 30 CmapSubtable::Format4(format4) => format4.map_codepoint(codepoint), 31 CmapSubtable::Format12(format12) => format12.map_codepoint(codepoint), 32 _ => None, 33 } { 34 return Some(gid); 35 } 36 } 37 } 38 None 39 } 40 } 41 42 impl<'a> Cmap4<'a> { 43 /// Maps a codepoint to a nominal glyph identifier. map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId>44 pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> { 45 let codepoint = codepoint.into(); 46 if codepoint > 0xFFFF { 47 return None; 48 } 49 let codepoint = codepoint as u16; 50 let mut lo = 0; 51 let mut hi = self.seg_count_x2() as usize / 2; 52 let start_codes = self.start_code(); 53 let end_codes = self.end_code(); 54 while lo < hi { 55 let i = (lo + hi) / 2; 56 let start_code = start_codes.get(i)?.get(); 57 if codepoint < start_code { 58 hi = i; 59 } else if codepoint > end_codes.get(i)?.get() { 60 lo = i + 1; 61 } else { 62 return self.lookup_glyph_id(codepoint, i, start_code); 63 } 64 } 65 None 66 } 67 68 /// Returns an iterator over all (codepoint, glyph identifier) pairs 69 /// in the subtable. iter(&self) -> Cmap4Iter<'a>70 pub fn iter(&self) -> Cmap4Iter<'a> { 71 Cmap4Iter::new(self.clone()) 72 } 73 74 /// Does the final phase of glyph id lookup. 75 /// 76 /// Shared between Self::map and Cmap4Iter. lookup_glyph_id(&self, codepoint: u16, index: usize, start_code: u16) -> Option<GlyphId>77 fn lookup_glyph_id(&self, codepoint: u16, index: usize, start_code: u16) -> Option<GlyphId> { 78 let deltas = self.id_delta(); 79 let range_offsets = self.id_range_offsets(); 80 let delta = deltas.get(index)?.get() as i32; 81 let range_offset = range_offsets.get(index)?.get() as usize; 82 if range_offset == 0 { 83 return Some(GlyphId::new((codepoint as i32 + delta) as u16)); 84 } 85 let mut offset = range_offset / 2 + (codepoint - start_code) as usize; 86 offset = offset.saturating_sub(range_offsets.len() - index); 87 let gid = self.glyph_id_array().get(offset)?.get(); 88 (gid != 0).then_some(GlyphId::new((gid as i32 + delta) as u16)) 89 } 90 91 /// Returns the [start_code, end_code] range at the given index. code_range(&self, index: usize) -> Option<Range<u32>>92 fn code_range(&self, index: usize) -> Option<Range<u32>> { 93 // Extend to u32 to ensure we don't overflow on the end + 1 bound 94 // below. 95 let start = self.start_code().get(index)?.get() as u32; 96 let end = self.end_code().get(index)?.get() as u32; 97 // Use end + 1 here because the range in the table is inclusive 98 Some(start..end + 1) 99 } 100 } 101 102 /// Iterator over all (codepoint, glyph identifier) pairs in 103 /// the subtable. 104 #[derive(Clone)] 105 pub struct Cmap4Iter<'a> { 106 subtable: Cmap4<'a>, 107 cur_range: std::ops::Range<u32>, 108 cur_start_code: u16, 109 cur_range_ix: usize, 110 } 111 112 impl<'a> Cmap4Iter<'a> { new(subtable: Cmap4<'a>) -> Self113 fn new(subtable: Cmap4<'a>) -> Self { 114 let cur_range = subtable.code_range(0).unwrap_or_default(); 115 let cur_start_code = cur_range.start as u16; 116 Self { 117 subtable, 118 cur_range, 119 cur_start_code, 120 cur_range_ix: 0, 121 } 122 } 123 } 124 125 impl<'a> Iterator for Cmap4Iter<'a> { 126 type Item = (u32, GlyphId); 127 next(&mut self) -> Option<Self::Item>128 fn next(&mut self) -> Option<Self::Item> { 129 loop { 130 if let Some(codepoint) = self.cur_range.next() { 131 let glyph_id = self.subtable.lookup_glyph_id( 132 codepoint as u16, 133 self.cur_range_ix, 134 self.cur_start_code, 135 )?; 136 // The table might explicitly map some codepoints to 0. Avoid 137 // returning those here. 138 if glyph_id == GlyphId::NOTDEF { 139 continue; 140 } 141 return Some((codepoint, glyph_id)); 142 } else { 143 self.cur_range_ix += 1; 144 self.cur_range = self.subtable.code_range(self.cur_range_ix)?; 145 self.cur_start_code = self.cur_range.start as u16; 146 } 147 } 148 } 149 } 150 151 impl<'a> Cmap12<'a> { 152 /// Maps a codepoint to a nominal glyph identifier. map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId>153 pub fn map_codepoint(&self, codepoint: impl Into<u32>) -> Option<GlyphId> { 154 let codepoint = codepoint.into(); 155 let groups = self.groups(); 156 let mut lo = 0; 157 let mut hi = groups.len(); 158 while lo < hi { 159 let i = (lo + hi) / 2; 160 let group = groups.get(i)?; 161 if codepoint < group.start_char_code() { 162 hi = i; 163 } else if codepoint > group.end_char_code() { 164 lo = i + 1; 165 } else { 166 return Some(self.lookup_glyph_id( 167 codepoint, 168 group.start_char_code(), 169 group.start_glyph_id(), 170 )); 171 } 172 } 173 None 174 } 175 176 /// Returns an iterator over all (codepoint, glyph identifier) pairs 177 /// in the subtable. iter(&self) -> Cmap12Iter<'a>178 pub fn iter(&self) -> Cmap12Iter<'a> { 179 Cmap12Iter::new(self.clone()) 180 } 181 182 /// Does the final phase of glyph id lookup. 183 /// 184 /// Shared between Self::map and Cmap12Iter. lookup_glyph_id( &self, codepoint: u32, start_char_code: u32, start_glyph_id: u32, ) -> GlyphId185 fn lookup_glyph_id( 186 &self, 187 codepoint: u32, 188 start_char_code: u32, 189 start_glyph_id: u32, 190 ) -> GlyphId { 191 GlyphId::new(start_glyph_id.wrapping_add(codepoint.wrapping_sub(start_char_code)) as u16) 192 } 193 194 /// Returns the codepoint range and start glyph id for the group 195 /// at the given index. group(&self, index: usize) -> Option<(Range<u32>, u32)>196 fn group(&self, index: usize) -> Option<(Range<u32>, u32)> { 197 let group = self.groups().get(index)?; 198 Some(( 199 // Use + 1 here because the group range is inclusive 200 group.start_char_code()..group.end_char_code() + 1, 201 group.start_glyph_id(), 202 )) 203 } 204 } 205 206 /// Iterator over all (codepoint, glyph identifier) pairs in 207 /// the subtable. 208 #[derive(Clone)] 209 pub struct Cmap12Iter<'a> { 210 subtable: Cmap12<'a>, 211 cur_range: Range<u32>, 212 cur_start_code: u32, 213 cur_start_glyph_id: u32, 214 cur_range_ix: usize, 215 } 216 217 impl<'a> Cmap12Iter<'a> { new(subtable: Cmap12<'a>) -> Self218 fn new(subtable: Cmap12<'a>) -> Self { 219 let (cur_range, cur_start_glyph_id) = subtable.group(0).unwrap_or_default(); 220 let cur_start_code = cur_range.start; 221 Self { 222 subtable, 223 cur_range, 224 cur_start_code, 225 cur_start_glyph_id, 226 cur_range_ix: 0, 227 } 228 } 229 } 230 231 impl<'a> Iterator for Cmap12Iter<'a> { 232 type Item = (u32, GlyphId); 233 next(&mut self) -> Option<Self::Item>234 fn next(&mut self) -> Option<Self::Item> { 235 loop { 236 if let Some(codepoint) = self.cur_range.next() { 237 let glyph_id = self.subtable.lookup_glyph_id( 238 codepoint, 239 self.cur_start_code, 240 self.cur_start_glyph_id, 241 ); 242 // The table might explicitly map some codepoints to 0. Avoid 243 // returning those here. 244 if glyph_id == GlyphId::NOTDEF { 245 continue; 246 } 247 return Some((codepoint, glyph_id)); 248 } else { 249 self.cur_range_ix += 1; 250 (self.cur_range, self.cur_start_glyph_id) = 251 self.subtable.group(self.cur_range_ix)?; 252 self.cur_start_code = self.cur_range.start; 253 } 254 } 255 } 256 } 257 258 impl<'a> Cmap14<'a> { 259 /// Maps a codepoint and variation selector to a nominal glyph identifier. map_variant( &self, codepoint: impl Into<u32>, selector: impl Into<u32>, ) -> Option<MapVariant>260 pub fn map_variant( 261 &self, 262 codepoint: impl Into<u32>, 263 selector: impl Into<u32>, 264 ) -> Option<MapVariant> { 265 let codepoint = codepoint.into(); 266 let selector = selector.into(); 267 let selector_records = self.var_selector(); 268 // Variation selector records are sorted in order of var_selector. Binary search to find 269 // the appropriate record. 270 let selector_record = selector_records 271 .binary_search_by(|rec| { 272 let rec_selector: u32 = rec.var_selector().into(); 273 rec_selector.cmp(&selector) 274 }) 275 .ok() 276 .and_then(|idx| selector_records.get(idx))?; 277 // If a default UVS table is present in this selector record, binary search on the ranges 278 // (start_unicode_value, start_unicode_value + additional_count) to find the requested codepoint. 279 // If found, ignore the selector and return a value indicating that the default cmap mapping 280 // should be used. 281 if let Some(Ok(default_uvs)) = selector_record.default_uvs(self.offset_data()) { 282 use core::cmp::Ordering; 283 let found_default_uvs = default_uvs 284 .ranges() 285 .binary_search_by(|range| { 286 let start = range.start_unicode_value().into(); 287 if codepoint < start { 288 Ordering::Greater 289 } else if codepoint > (start + range.additional_count() as u32) { 290 Ordering::Less 291 } else { 292 Ordering::Equal 293 } 294 }) 295 .is_ok(); 296 if found_default_uvs { 297 return Some(MapVariant::UseDefault); 298 } 299 } 300 // Binary search the non-default UVS table if present. This maps codepoint+selector to a variant glyph. 301 let non_default_uvs = selector_record.non_default_uvs(self.offset_data())?.ok()?; 302 let mapping = non_default_uvs.uvs_mapping(); 303 let ix = mapping 304 .binary_search_by(|map| { 305 let map_codepoint: u32 = map.unicode_value().into(); 306 map_codepoint.cmp(&codepoint) 307 }) 308 .ok()?; 309 Some(MapVariant::Variant(GlyphId::new( 310 mapping.get(ix)?.glyph_id(), 311 ))) 312 } 313 314 /// Returns an iterator over all (codepoint, selector, mapping variant) 315 /// triples in the subtable. iter(&self) -> Cmap14Iter<'a>316 pub fn iter(&self) -> Cmap14Iter<'a> { 317 Cmap14Iter::new(self.clone()) 318 } 319 selector( &self, index: usize, ) -> ( Option<VariationSelector>, Option<DefaultUvs<'a>>, Option<NonDefaultUvs<'a>>, )320 fn selector( 321 &self, 322 index: usize, 323 ) -> ( 324 Option<VariationSelector>, 325 Option<DefaultUvs<'a>>, 326 Option<NonDefaultUvs<'a>>, 327 ) { 328 let selector = self.var_selector().get(index).cloned(); 329 let default_uvs = selector.as_ref().and_then(|selector| { 330 selector 331 .default_uvs(self.offset_data()) 332 .transpose() 333 .ok() 334 .flatten() 335 }); 336 let non_default_uvs = selector.as_ref().and_then(|selector| { 337 selector 338 .non_default_uvs(self.offset_data()) 339 .transpose() 340 .ok() 341 .flatten() 342 }); 343 (selector, default_uvs, non_default_uvs) 344 } 345 } 346 347 /// Iterator over all (codepoint, selector, mapping variant) triples 348 /// in the subtable. 349 #[derive(Clone)] 350 pub struct Cmap14Iter<'a> { 351 subtable: Cmap14<'a>, 352 selector_record: Option<VariationSelector>, 353 default_uvs: Option<DefaultUvsIter<'a>>, 354 non_default_uvs: Option<NonDefaultUvsIter<'a>>, 355 cur_selector_ix: usize, 356 } 357 358 impl<'a> Cmap14Iter<'a> { new(subtable: Cmap14<'a>) -> Self359 fn new(subtable: Cmap14<'a>) -> Self { 360 let (selector_record, default_uvs, non_default_uvs) = subtable.selector(0); 361 Self { 362 subtable, 363 selector_record, 364 default_uvs: default_uvs.map(DefaultUvsIter::new), 365 non_default_uvs: non_default_uvs.map(NonDefaultUvsIter::new), 366 cur_selector_ix: 0, 367 } 368 } 369 } 370 371 impl<'a> Iterator for Cmap14Iter<'a> { 372 type Item = (u32, u32, MapVariant); 373 next(&mut self) -> Option<Self::Item>374 fn next(&mut self) -> Option<Self::Item> { 375 loop { 376 let selector_record = self.selector_record.as_ref()?; 377 let selector: u32 = selector_record.var_selector().into(); 378 if let Some(default_uvs) = self.default_uvs.as_mut() { 379 if let Some(codepoint) = default_uvs.next() { 380 return Some((codepoint, selector, MapVariant::UseDefault)); 381 } 382 } 383 if let Some(non_default_uvs) = self.non_default_uvs.as_mut() { 384 if let Some((codepoint, variant)) = non_default_uvs.next() { 385 return Some((codepoint, selector, MapVariant::Variant(variant))); 386 } 387 } 388 self.cur_selector_ix += 1; 389 let (selector_record, default_uvs, non_default_uvs) = 390 self.subtable.selector(self.cur_selector_ix); 391 self.selector_record = selector_record; 392 self.default_uvs = default_uvs.map(DefaultUvsIter::new); 393 self.non_default_uvs = non_default_uvs.map(NonDefaultUvsIter::new); 394 } 395 } 396 } 397 398 #[derive(Clone)] 399 struct DefaultUvsIter<'a> { 400 ranges: std::slice::Iter<'a, UnicodeRange>, 401 cur_range: Range<u32>, 402 } 403 404 impl<'a> DefaultUvsIter<'a> { new(ranges: DefaultUvs<'a>) -> Self405 fn new(ranges: DefaultUvs<'a>) -> Self { 406 let mut ranges = ranges.ranges().iter(); 407 let cur_range = if let Some(range) = ranges.next() { 408 let start: u32 = range.start_unicode_value().into(); 409 let end = start + range.additional_count() as u32 + 1; 410 start..end 411 } else { 412 0..0 413 }; 414 Self { ranges, cur_range } 415 } 416 } 417 418 impl<'a> Iterator for DefaultUvsIter<'a> { 419 type Item = u32; 420 next(&mut self) -> Option<Self::Item>421 fn next(&mut self) -> Option<Self::Item> { 422 loop { 423 if let Some(codepoint) = self.cur_range.next() { 424 return Some(codepoint); 425 } 426 let range = self.ranges.next()?; 427 let start: u32 = range.start_unicode_value().into(); 428 let end = start + range.additional_count() as u32 + 1; 429 self.cur_range = start..end; 430 } 431 } 432 } 433 434 #[derive(Clone)] 435 struct NonDefaultUvsIter<'a> { 436 iter: std::slice::Iter<'a, UvsMapping>, 437 } 438 439 impl<'a> NonDefaultUvsIter<'a> { new(uvs: NonDefaultUvs<'a>) -> Self440 fn new(uvs: NonDefaultUvs<'a>) -> Self { 441 Self { 442 iter: uvs.uvs_mapping().iter(), 443 } 444 } 445 } 446 447 impl<'a> Iterator for NonDefaultUvsIter<'a> { 448 type Item = (u32, GlyphId); 449 next(&mut self) -> Option<Self::Item>450 fn next(&mut self) -> Option<Self::Item> { 451 let mapping = self.iter.next()?; 452 let codepoint: u32 = mapping.unicode_value().into(); 453 let glyph_id = GlyphId::new(mapping.glyph_id()); 454 Some((codepoint, glyph_id)) 455 } 456 } 457 458 #[cfg(test)] 459 mod tests { 460 use super::*; 461 use crate::{FontRef, GlyphId, TableProvider}; 462 463 #[test] map_codepoints()464 fn map_codepoints() { 465 let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap(); 466 let cmap = font.cmap().unwrap(); 467 assert_eq!(cmap.map_codepoint('A'), Some(GlyphId::new(1))); 468 assert_eq!(cmap.map_codepoint('À'), Some(GlyphId::new(2))); 469 assert_eq!(cmap.map_codepoint('`'), Some(GlyphId::new(3))); 470 assert_eq!(cmap.map_codepoint('B'), None); 471 472 let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap(); 473 let cmap = font.cmap().unwrap(); 474 assert_eq!(cmap.map_codepoint(' '), Some(GlyphId::new(1))); 475 assert_eq!(cmap.map_codepoint(0xE_u32), Some(GlyphId::new(2))); 476 assert_eq!(cmap.map_codepoint('B'), None); 477 } 478 479 #[test] map_variants()480 fn map_variants() { 481 use super::MapVariant::*; 482 let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap(); 483 let cmap = font.cmap().unwrap(); 484 let cmap14 = find_cmap14(&cmap).unwrap(); 485 let selector = '\u{e0100}'; 486 assert_eq!(cmap14.map_variant('a', selector), None); 487 assert_eq!(cmap14.map_variant('\u{4e00}', selector), Some(UseDefault)); 488 assert_eq!(cmap14.map_variant('\u{4e06}', selector), Some(UseDefault)); 489 assert_eq!( 490 cmap14.map_variant('\u{4e08}', selector), 491 Some(Variant(GlyphId::new(25))) 492 ); 493 assert_eq!( 494 cmap14.map_variant('\u{4e09}', selector), 495 Some(Variant(GlyphId::new(26))) 496 ); 497 } 498 499 #[test] cmap4_iter()500 fn cmap4_iter() { 501 let font = FontRef::new(font_test_data::VAZIRMATN_VAR).unwrap(); 502 let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap(); 503 let mut count = 0; 504 for (codepoint, glyph_id) in cmap4.iter() { 505 assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id)); 506 count += 1; 507 } 508 assert_eq!(count, 3); 509 let font = FontRef::new(font_test_data::SIMPLE_GLYF).unwrap(); 510 let cmap4 = find_cmap4(&font.cmap().unwrap()).unwrap(); 511 let mut count = 0; 512 for (codepoint, glyph_id) in cmap4.iter() { 513 assert_eq!(cmap4.map_codepoint(codepoint), Some(glyph_id)); 514 count += 1; 515 } 516 assert_eq!(count, 2); 517 } 518 519 #[test] cmap12_iter()520 fn cmap12_iter() { 521 let font = FontRef::new(font_test_data::CMAP12_FONT1).unwrap(); 522 let cmap12 = find_cmap12(&font.cmap().unwrap()).unwrap(); 523 let mut count = 0; 524 for (codepoint, glyph_id) in cmap12.iter() { 525 assert_eq!(cmap12.map_codepoint(codepoint), Some(glyph_id)); 526 count += 1; 527 } 528 assert_eq!(count, 10); 529 } 530 531 #[test] cmap14_iter()532 fn cmap14_iter() { 533 let font = FontRef::new(font_test_data::CMAP14_FONT1).unwrap(); 534 let cmap14 = find_cmap14(&font.cmap().unwrap()).unwrap(); 535 let mut count = 0; 536 for (codepoint, selector, mapping) in cmap14.iter() { 537 assert_eq!(cmap14.map_variant(codepoint, selector), Some(mapping)); 538 count += 1; 539 } 540 assert_eq!(count, 7); 541 } 542 find_cmap4<'a>(cmap: &Cmap<'a>) -> Option<Cmap4<'a>>543 fn find_cmap4<'a>(cmap: &Cmap<'a>) -> Option<Cmap4<'a>> { 544 cmap.encoding_records() 545 .iter() 546 .filter_map(|record| record.subtable(cmap.offset_data()).ok()) 547 .find_map(|subtable| match subtable { 548 CmapSubtable::Format4(cmap4) => Some(cmap4), 549 _ => None, 550 }) 551 } 552 find_cmap12<'a>(cmap: &Cmap<'a>) -> Option<Cmap12<'a>>553 fn find_cmap12<'a>(cmap: &Cmap<'a>) -> Option<Cmap12<'a>> { 554 cmap.encoding_records() 555 .iter() 556 .filter_map(|record| record.subtable(cmap.offset_data()).ok()) 557 .find_map(|subtable| match subtable { 558 CmapSubtable::Format12(cmap12) => Some(cmap12), 559 _ => None, 560 }) 561 } 562 find_cmap14<'a>(cmap: &Cmap<'a>) -> Option<Cmap14<'a>>563 fn find_cmap14<'a>(cmap: &Cmap<'a>) -> Option<Cmap14<'a>> { 564 cmap.encoding_records() 565 .iter() 566 .filter_map(|record| record.subtable(cmap.offset_data()).ok()) 567 .find_map(|subtable| match subtable { 568 CmapSubtable::Format14(cmap14) => Some(cmap14), 569 _ => None, 570 }) 571 } 572 } 573