1from fontTools.misc.textTools import bytesjoin, safeEval, readHex 2from fontTools.misc.encodingTools import getEncoding 3from fontTools.ttLib import getSearchRange 4from fontTools.unicode import Unicode 5from . import DefaultTable 6import sys 7import struct 8import array 9import logging 10 11 12log = logging.getLogger(__name__) 13 14 15def _make_map(font, chars, gids): 16 assert len(chars) == len(gids) 17 glyphNames = font.getGlyphNameMany(gids) 18 cmap = {} 19 for char, gid, name in zip(chars, gids, glyphNames): 20 if gid == 0: 21 continue 22 cmap[char] = name 23 return cmap 24 25 26class table__c_m_a_p(DefaultTable.DefaultTable): 27 """Character to Glyph Index Mapping Table 28 29 This class represents the `cmap <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>`_ 30 table, which maps between input characters (in Unicode or other system encodings) 31 and glyphs within the font. The ``cmap`` table contains one or more subtables 32 which determine the mapping of of characters to glyphs across different platforms 33 and encoding systems. 34 35 ``table__c_m_a_p`` objects expose an accessor ``.tables`` which provides access 36 to the subtables, although it is normally easier to retrieve individual subtables 37 through the utility methods described below. To add new subtables to a font, 38 first determine the subtable format (if in doubt use format 4 for glyphs within 39 the BMP, format 12 for glyphs outside the BMP, and format 14 for Unicode Variation 40 Sequences) construct subtable objects with ``CmapSubtable.newSubtable(format)``, 41 and append them to the ``.tables`` list. 42 43 Within a subtable, the mapping of characters to glyphs is provided by the ``.cmap`` 44 attribute. 45 46 Example:: 47 48 cmap4_0_3 = CmapSubtable.newSubtable(4) 49 cmap4_0_3.platformID = 0 50 cmap4_0_3.platEncID = 3 51 cmap4_0_3.language = 0 52 cmap4_0_3.cmap = { 0xC1: "Aacute" } 53 54 cmap = newTable("cmap") 55 cmap.tableVersion = 0 56 cmap.tables = [cmap4_0_3] 57 """ 58 59 def getcmap(self, platformID, platEncID): 60 """Returns the first subtable which matches the given platform and encoding. 61 62 Args: 63 platformID (int): The platform ID. Use 0 for Unicode, 1 for Macintosh 64 (deprecated for new fonts), 2 for ISO (deprecated) and 3 for Windows. 65 encodingID (int): Encoding ID. Interpretation depends on the platform ID. 66 See the OpenType specification for details. 67 68 Returns: 69 An object which is a subclass of :py:class:`CmapSubtable` if a matching 70 subtable is found within the font, or ``None`` otherwise. 71 """ 72 73 for subtable in self.tables: 74 if subtable.platformID == platformID and subtable.platEncID == platEncID: 75 return subtable 76 return None # not found 77 78 def getBestCmap( 79 self, 80 cmapPreferences=( 81 (3, 10), 82 (0, 6), 83 (0, 4), 84 (3, 1), 85 (0, 3), 86 (0, 2), 87 (0, 1), 88 (0, 0), 89 ), 90 ): 91 """Returns the 'best' Unicode cmap dictionary available in the font 92 or ``None``, if no Unicode cmap subtable is available. 93 94 By default it will search for the following (platformID, platEncID) 95 pairs in order:: 96 97 (3, 10), # Windows Unicode full repertoire 98 (0, 6), # Unicode full repertoire (format 13 subtable) 99 (0, 4), # Unicode 2.0 full repertoire 100 (3, 1), # Windows Unicode BMP 101 (0, 3), # Unicode 2.0 BMP 102 (0, 2), # Unicode ISO/IEC 10646 103 (0, 1), # Unicode 1.1 104 (0, 0) # Unicode 1.0 105 106 This particular order matches what HarfBuzz uses to choose what 107 subtable to use by default. This order prefers the largest-repertoire 108 subtable, and among those, prefers the Windows-platform over the 109 Unicode-platform as the former has wider support. 110 111 This order can be customized via the ``cmapPreferences`` argument. 112 """ 113 for platformID, platEncID in cmapPreferences: 114 cmapSubtable = self.getcmap(platformID, platEncID) 115 if cmapSubtable is not None: 116 return cmapSubtable.cmap 117 return None # None of the requested cmap subtables were found 118 119 def buildReversed(self): 120 """Builds a reverse mapping dictionary 121 122 Iterates over all Unicode cmap tables and returns a dictionary mapping 123 glyphs to sets of codepoints, such as:: 124 125 { 126 'one': {0x31} 127 'A': {0x41,0x391} 128 } 129 130 The values are sets of Unicode codepoints because 131 some fonts map different codepoints to the same glyph. 132 For example, ``U+0041 LATIN CAPITAL LETTER A`` and ``U+0391 133 GREEK CAPITAL LETTER ALPHA`` are sometimes the same glyph. 134 """ 135 result = {} 136 for subtable in self.tables: 137 if subtable.isUnicode(): 138 for codepoint, name in subtable.cmap.items(): 139 result.setdefault(name, set()).add(codepoint) 140 return result 141 142 def decompile(self, data, ttFont): 143 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 144 self.tableVersion = int(tableVersion) 145 self.tables = tables = [] 146 seenOffsets = {} 147 for i in range(numSubTables): 148 platformID, platEncID, offset = struct.unpack( 149 ">HHl", data[4 + i * 8 : 4 + (i + 1) * 8] 150 ) 151 platformID, platEncID = int(platformID), int(platEncID) 152 format, length = struct.unpack(">HH", data[offset : offset + 4]) 153 if format in [8, 10, 12, 13]: 154 format, reserved, length = struct.unpack( 155 ">HHL", data[offset : offset + 8] 156 ) 157 elif format in [14]: 158 format, length = struct.unpack(">HL", data[offset : offset + 6]) 159 160 if not length: 161 log.error( 162 "cmap subtable is reported as having zero length: platformID %s, " 163 "platEncID %s, format %s offset %s. Skipping table.", 164 platformID, 165 platEncID, 166 format, 167 offset, 168 ) 169 continue 170 table = CmapSubtable.newSubtable(format) 171 table.platformID = platformID 172 table.platEncID = platEncID 173 # Note that by default we decompile only the subtable header info; 174 # any other data gets decompiled only when an attribute of the 175 # subtable is referenced. 176 table.decompileHeader(data[offset : offset + int(length)], ttFont) 177 if offset in seenOffsets: 178 table.data = None # Mark as decompiled 179 table.cmap = tables[seenOffsets[offset]].cmap 180 else: 181 seenOffsets[offset] = i 182 tables.append(table) 183 if ttFont.lazy is False: # Be lazy for None and True 184 self.ensureDecompiled() 185 186 def ensureDecompiled(self, recurse=False): 187 # The recurse argument is unused, but part of the signature of 188 # ensureDecompiled across the library. 189 for st in self.tables: 190 st.ensureDecompiled() 191 192 def compile(self, ttFont): 193 self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() 194 numSubTables = len(self.tables) 195 totalOffset = 4 + 8 * numSubTables 196 data = struct.pack(">HH", self.tableVersion, numSubTables) 197 tableData = b"" 198 seen = ( 199 {} 200 ) # Some tables are the same object reference. Don't compile them twice. 201 done = ( 202 {} 203 ) # Some tables are different objects, but compile to the same data chunk 204 for table in self.tables: 205 offset = seen.get(id(table.cmap)) 206 if offset is None: 207 chunk = table.compile(ttFont) 208 offset = done.get(chunk) 209 if offset is None: 210 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len( 211 tableData 212 ) 213 tableData = tableData + chunk 214 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 215 return data + tableData 216 217 def toXML(self, writer, ttFont): 218 writer.simpletag("tableVersion", version=self.tableVersion) 219 writer.newline() 220 for table in self.tables: 221 table.toXML(writer, ttFont) 222 223 def fromXML(self, name, attrs, content, ttFont): 224 if name == "tableVersion": 225 self.tableVersion = safeEval(attrs["version"]) 226 return 227 if name[:12] != "cmap_format_": 228 return 229 if not hasattr(self, "tables"): 230 self.tables = [] 231 format = safeEval(name[12:]) 232 table = CmapSubtable.newSubtable(format) 233 table.platformID = safeEval(attrs["platformID"]) 234 table.platEncID = safeEval(attrs["platEncID"]) 235 table.fromXML(name, attrs, content, ttFont) 236 self.tables.append(table) 237 238 239class CmapSubtable(object): 240 """Base class for all cmap subtable formats. 241 242 Subclasses which handle the individual subtable formats are named 243 ``cmap_format_0``, ``cmap_format_2`` etc. Use :py:meth:`getSubtableClass` 244 to retrieve the concrete subclass, or :py:meth:`newSubtable` to get a 245 new subtable object for a given format. 246 247 The object exposes a ``.cmap`` attribute, which contains a dictionary mapping 248 character codepoints to glyph names. 249 """ 250 251 @staticmethod 252 def getSubtableClass(format): 253 """Return the subtable class for a format.""" 254 return cmap_classes.get(format, cmap_format_unknown) 255 256 @staticmethod 257 def newSubtable(format): 258 """Return a new instance of a subtable for the given format 259 .""" 260 subtableClass = CmapSubtable.getSubtableClass(format) 261 return subtableClass(format) 262 263 def __init__(self, format): 264 self.format = format 265 self.data = None 266 self.ttFont = None 267 self.platformID = None #: The platform ID of this subtable 268 self.platEncID = None #: The encoding ID of this subtable (interpretation depends on ``platformID``) 269 self.language = ( 270 None #: The language ID of this subtable (Macintosh platform only) 271 ) 272 273 def ensureDecompiled(self, recurse=False): 274 # The recurse argument is unused, but part of the signature of 275 # ensureDecompiled across the library. 276 if self.data is None: 277 return 278 self.decompile(None, None) # use saved data. 279 self.data = None # Once this table has been decompiled, make sure we don't 280 # just return the original data. Also avoids recursion when 281 # called with an attribute that the cmap subtable doesn't have. 282 283 def __getattr__(self, attr): 284 # allow lazy decompilation of subtables. 285 if attr[:2] == "__": # don't handle requests for member functions like '__lt__' 286 raise AttributeError(attr) 287 if self.data is None: 288 raise AttributeError(attr) 289 self.ensureDecompiled() 290 return getattr(self, attr) 291 292 def decompileHeader(self, data, ttFont): 293 format, length, language = struct.unpack(">HHH", data[:6]) 294 assert ( 295 len(data) == length 296 ), "corrupt cmap table format %d (data length: %d, header length: %d)" % ( 297 format, 298 len(data), 299 length, 300 ) 301 self.format = int(format) 302 self.length = int(length) 303 self.language = int(language) 304 self.data = data[6:] 305 self.ttFont = ttFont 306 307 def toXML(self, writer, ttFont): 308 writer.begintag( 309 self.__class__.__name__, 310 [ 311 ("platformID", self.platformID), 312 ("platEncID", self.platEncID), 313 ("language", self.language), 314 ], 315 ) 316 writer.newline() 317 codes = sorted(self.cmap.items()) 318 self._writeCodes(codes, writer) 319 writer.endtag(self.__class__.__name__) 320 writer.newline() 321 322 def getEncoding(self, default=None): 323 """Returns the Python encoding name for this cmap subtable based on its platformID, 324 platEncID, and language. If encoding for these values is not known, by default 325 ``None`` is returned. That can be overridden by passing a value to the ``default`` 326 argument. 327 328 Note that if you want to choose a "preferred" cmap subtable, most of the time 329 ``self.isUnicode()`` is what you want as that one only returns true for the modern, 330 commonly used, Unicode-compatible triplets, not the legacy ones. 331 """ 332 return getEncoding(self.platformID, self.platEncID, self.language, default) 333 334 def isUnicode(self): 335 """Returns true if the characters are interpreted as Unicode codepoints.""" 336 return self.platformID == 0 or ( 337 self.platformID == 3 and self.platEncID in [0, 1, 10] 338 ) 339 340 def isSymbol(self): 341 """Returns true if the subtable is for the Symbol encoding (3,0)""" 342 return self.platformID == 3 and self.platEncID == 0 343 344 def _writeCodes(self, codes, writer): 345 isUnicode = self.isUnicode() 346 for code, name in codes: 347 writer.simpletag("map", code=hex(code), name=name) 348 if isUnicode: 349 writer.comment(Unicode[code]) 350 writer.newline() 351 352 def __lt__(self, other): 353 if not isinstance(other, CmapSubtable): 354 return NotImplemented 355 356 # implemented so that list.sort() sorts according to the spec. 357 selfTuple = ( 358 getattr(self, "platformID", None), 359 getattr(self, "platEncID", None), 360 getattr(self, "language", None), 361 self.__dict__, 362 ) 363 otherTuple = ( 364 getattr(other, "platformID", None), 365 getattr(other, "platEncID", None), 366 getattr(other, "language", None), 367 other.__dict__, 368 ) 369 return selfTuple < otherTuple 370 371 372class cmap_format_0(CmapSubtable): 373 def decompile(self, data, ttFont): 374 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 375 # If not, someone is calling the subtable decompile() directly, and must provide both args. 376 if data is not None and ttFont is not None: 377 self.decompileHeader(data, ttFont) 378 else: 379 assert ( 380 data is None and ttFont is None 381 ), "Need both data and ttFont arguments" 382 data = ( 383 self.data 384 ) # decompileHeader assigns the data after the header to self.data 385 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 386 gids = array.array("B") 387 gids.frombytes(self.data) 388 charCodes = list(range(len(gids))) 389 self.cmap = _make_map(self.ttFont, charCodes, gids) 390 391 def compile(self, ttFont): 392 if self.data: 393 return struct.pack(">HHH", 0, 262, self.language) + self.data 394 395 cmap = self.cmap 396 assert set(cmap.keys()).issubset(range(256)) 397 getGlyphID = ttFont.getGlyphID 398 valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)] 399 400 gids = array.array("B", valueList) 401 data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes() 402 assert len(data) == 262 403 return data 404 405 def fromXML(self, name, attrs, content, ttFont): 406 self.language = safeEval(attrs["language"]) 407 if not hasattr(self, "cmap"): 408 self.cmap = {} 409 cmap = self.cmap 410 for element in content: 411 if not isinstance(element, tuple): 412 continue 413 name, attrs, content = element 414 if name != "map": 415 continue 416 cmap[safeEval(attrs["code"])] = attrs["name"] 417 418 419subHeaderFormat = ">HHhH" 420 421 422class SubHeader(object): 423 def __init__(self): 424 self.firstCode = None 425 self.entryCount = None 426 self.idDelta = None 427 self.idRangeOffset = None 428 self.glyphIndexArray = [] 429 430 431class cmap_format_2(CmapSubtable): 432 def setIDDelta(self, subHeader): 433 subHeader.idDelta = 0 434 # find the minGI which is not zero. 435 minGI = subHeader.glyphIndexArray[0] 436 for gid in subHeader.glyphIndexArray: 437 if (gid != 0) and (gid < minGI): 438 minGI = gid 439 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 440 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 441 # We would like to pick an idDelta such that the first glyphArray GID is 1, 442 # so that we are more likely to be able to combine glypharray GID subranges. 443 # This means that we have a problem when minGI is > 32K 444 # Since the final gi is reconstructed from the glyphArray GID by: 445 # (short)finalGID = (gid + idDelta) % 0x10000), 446 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 447 # negative number to an unsigned short. 448 449 if minGI > 1: 450 if minGI > 0x7FFF: 451 subHeader.idDelta = -(0x10000 - minGI) - 1 452 else: 453 subHeader.idDelta = minGI - 1 454 idDelta = subHeader.idDelta 455 for i in range(subHeader.entryCount): 456 gid = subHeader.glyphIndexArray[i] 457 if gid > 0: 458 subHeader.glyphIndexArray[i] = gid - idDelta 459 460 def decompile(self, data, ttFont): 461 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 462 # If not, someone is calling the subtable decompile() directly, and must provide both args. 463 if data is not None and ttFont is not None: 464 self.decompileHeader(data, ttFont) 465 else: 466 assert ( 467 data is None and ttFont is None 468 ), "Need both data and ttFont arguments" 469 470 data = ( 471 self.data 472 ) # decompileHeader assigns the data after the header to self.data 473 subHeaderKeys = [] 474 maxSubHeaderindex = 0 475 # get the key array, and determine the number of subHeaders. 476 allKeys = array.array("H") 477 allKeys.frombytes(data[:512]) 478 data = data[512:] 479 if sys.byteorder != "big": 480 allKeys.byteswap() 481 subHeaderKeys = [key // 8 for key in allKeys] 482 maxSubHeaderindex = max(subHeaderKeys) 483 484 # Load subHeaders 485 subHeaderList = [] 486 pos = 0 487 for i in range(maxSubHeaderindex + 1): 488 subHeader = SubHeader() 489 ( 490 subHeader.firstCode, 491 subHeader.entryCount, 492 subHeader.idDelta, 493 subHeader.idRangeOffset, 494 ) = struct.unpack(subHeaderFormat, data[pos : pos + 8]) 495 pos += 8 496 giDataPos = pos + subHeader.idRangeOffset - 2 497 giList = array.array("H") 498 giList.frombytes(data[giDataPos : giDataPos + subHeader.entryCount * 2]) 499 if sys.byteorder != "big": 500 giList.byteswap() 501 subHeader.glyphIndexArray = giList 502 subHeaderList.append(subHeader) 503 # How this gets processed. 504 # Charcodes may be one or two bytes. 505 # The first byte of a charcode is mapped through the subHeaderKeys, to select 506 # a subHeader. For any subheader but 0, the next byte is then mapped through the 507 # selected subheader. If subheader Index 0 is selected, then the byte itself is 508 # mapped through the subheader, and there is no second byte. 509 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 510 # 511 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 512 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 513 # referenced by another subheader. 514 # The only subheader that will be referenced by more than one first-byte value is the subheader 515 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 516 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 517 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 518 # A subheader specifies a subrange within (0...256) by the 519 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 520 # (e.g. glyph not in font). 521 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 522 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 523 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 524 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 525 # Example for Logocut-Medium 526 # first byte of charcode = 129; selects subheader 1. 527 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 528 # second byte of charCode = 66 529 # the index offset = 66-64 = 2. 530 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 531 # [glyphIndexArray index], [subrange array index] = glyphIndex 532 # [256], [0]=1 from charcode [129, 64] 533 # [257], [1]=2 from charcode [129, 65] 534 # [258], [2]=3 from charcode [129, 66] 535 # [259], [3]=4 from charcode [129, 67] 536 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 537 # add it to the glyphID to get the final glyphIndex 538 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 539 540 self.data = b"" 541 cmap = {} 542 notdefGI = 0 543 for firstByte in range(256): 544 subHeadindex = subHeaderKeys[firstByte] 545 subHeader = subHeaderList[subHeadindex] 546 if subHeadindex == 0: 547 if (firstByte < subHeader.firstCode) or ( 548 firstByte >= subHeader.firstCode + subHeader.entryCount 549 ): 550 continue # gi is notdef. 551 else: 552 charCode = firstByte 553 offsetIndex = firstByte - subHeader.firstCode 554 gi = subHeader.glyphIndexArray[offsetIndex] 555 if gi != 0: 556 gi = (gi + subHeader.idDelta) % 0x10000 557 else: 558 continue # gi is notdef. 559 cmap[charCode] = gi 560 else: 561 if subHeader.entryCount: 562 charCodeOffset = firstByte * 256 + subHeader.firstCode 563 for offsetIndex in range(subHeader.entryCount): 564 charCode = charCodeOffset + offsetIndex 565 gi = subHeader.glyphIndexArray[offsetIndex] 566 if gi != 0: 567 gi = (gi + subHeader.idDelta) % 0x10000 568 else: 569 continue 570 cmap[charCode] = gi 571 # If not subHeader.entryCount, then all char codes with this first byte are 572 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 573 # same as mapping it to .notdef. 574 575 gids = list(cmap.values()) 576 charCodes = list(cmap.keys()) 577 self.cmap = _make_map(self.ttFont, charCodes, gids) 578 579 def compile(self, ttFont): 580 if self.data: 581 return ( 582 struct.pack(">HHH", self.format, self.length, self.language) + self.data 583 ) 584 kEmptyTwoCharCodeRange = -1 585 notdefGI = 0 586 587 items = sorted(self.cmap.items()) 588 charCodes = [item[0] for item in items] 589 names = [item[1] for item in items] 590 nameMap = ttFont.getReverseGlyphMap() 591 try: 592 gids = [nameMap[name] for name in names] 593 except KeyError: 594 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 595 try: 596 gids = [nameMap[name] for name in names] 597 except KeyError: 598 # allow virtual GIDs in format 2 tables 599 gids = [] 600 for name in names: 601 try: 602 gid = nameMap[name] 603 except KeyError: 604 try: 605 if name[:3] == "gid": 606 gid = int(name[3:]) 607 else: 608 gid = ttFont.getGlyphID(name) 609 except: 610 raise KeyError(name) 611 612 gids.append(gid) 613 614 # Process the (char code to gid) item list in char code order. 615 # By definition, all one byte char codes map to subheader 0. 616 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 617 # which defines all char codes in its range to map to notdef) unless proven otherwise. 618 # Note that since the char code items are processed in char code order, all the char codes with the 619 # same first byte are in sequential order. 620 621 subHeaderKeys = [ 622 kEmptyTwoCharCodeRange for x in range(256) 623 ] # list of indices into subHeaderList. 624 subHeaderList = [] 625 626 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 627 # with a cmap where all the one byte char codes map to notdef, 628 # with the result that the subhead 0 would not get created just by processing the item list. 629 charCode = charCodes[0] 630 if charCode > 255: 631 subHeader = SubHeader() 632 subHeader.firstCode = 0 633 subHeader.entryCount = 0 634 subHeader.idDelta = 0 635 subHeader.idRangeOffset = 0 636 subHeaderList.append(subHeader) 637 638 lastFirstByte = -1 639 items = zip(charCodes, gids) 640 for charCode, gid in items: 641 if gid == 0: 642 continue 643 firstbyte = charCode >> 8 644 secondByte = charCode & 0x00FF 645 646 if ( 647 firstbyte != lastFirstByte 648 ): # Need to update the current subhead, and start a new one. 649 if lastFirstByte > -1: 650 # fix GI's and iDelta of current subheader. 651 self.setIDDelta(subHeader) 652 653 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 654 # for the indices matching the char codes. 655 if lastFirstByte == 0: 656 for index in range(subHeader.entryCount): 657 charCode = subHeader.firstCode + index 658 subHeaderKeys[charCode] = 0 659 660 assert subHeader.entryCount == len( 661 subHeader.glyphIndexArray 662 ), "Error - subhead entry count does not match len of glyphID subrange." 663 # init new subheader 664 subHeader = SubHeader() 665 subHeader.firstCode = secondByte 666 subHeader.entryCount = 1 667 subHeader.glyphIndexArray.append(gid) 668 subHeaderList.append(subHeader) 669 subHeaderKeys[firstbyte] = len(subHeaderList) - 1 670 lastFirstByte = firstbyte 671 else: 672 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 673 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 674 for i in range(codeDiff): 675 subHeader.glyphIndexArray.append(notdefGI) 676 subHeader.glyphIndexArray.append(gid) 677 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 678 679 # fix GI's and iDelta of last subheader that we we added to the subheader array. 680 self.setIDDelta(subHeader) 681 682 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 683 subHeader = SubHeader() 684 subHeader.firstCode = 0 685 subHeader.entryCount = 0 686 subHeader.idDelta = 0 687 subHeader.idRangeOffset = 2 688 subHeaderList.append(subHeader) 689 emptySubheadIndex = len(subHeaderList) - 1 690 for index in range(256): 691 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 692 subHeaderKeys[index] = emptySubheadIndex 693 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 694 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 695 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 696 # charcode 0 and GID 0. 697 698 idRangeOffset = ( 699 len(subHeaderList) - 1 700 ) * 8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 701 subheadRangeLen = ( 702 len(subHeaderList) - 1 703 ) # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 704 for index in range(subheadRangeLen): 705 subHeader = subHeaderList[index] 706 subHeader.idRangeOffset = 0 707 for j in range(index): 708 prevSubhead = subHeaderList[j] 709 if ( 710 prevSubhead.glyphIndexArray == subHeader.glyphIndexArray 711 ): # use the glyphIndexArray subarray 712 subHeader.idRangeOffset = ( 713 prevSubhead.idRangeOffset - (index - j) * 8 714 ) 715 subHeader.glyphIndexArray = [] 716 break 717 if subHeader.idRangeOffset == 0: # didn't find one. 718 subHeader.idRangeOffset = idRangeOffset 719 idRangeOffset = ( 720 idRangeOffset - 8 721 ) + subHeader.entryCount * 2 # one less subheader, one more subArray. 722 else: 723 idRangeOffset = idRangeOffset - 8 # one less subheader 724 725 # Now we can write out the data! 726 length = ( 727 6 + 512 + 8 * len(subHeaderList) 728 ) # header, 256 subHeaderKeys, and subheader array. 729 for subhead in subHeaderList[:-1]: 730 length = ( 731 length + len(subhead.glyphIndexArray) * 2 732 ) # We can't use subhead.entryCount, as some of the subhead may share subArrays. 733 dataList = [struct.pack(">HHH", 2, length, self.language)] 734 for index in subHeaderKeys: 735 dataList.append(struct.pack(">H", index * 8)) 736 for subhead in subHeaderList: 737 dataList.append( 738 struct.pack( 739 subHeaderFormat, 740 subhead.firstCode, 741 subhead.entryCount, 742 subhead.idDelta, 743 subhead.idRangeOffset, 744 ) 745 ) 746 for subhead in subHeaderList[:-1]: 747 for gi in subhead.glyphIndexArray: 748 dataList.append(struct.pack(">H", gi)) 749 data = bytesjoin(dataList) 750 assert len(data) == length, ( 751 "Error: cmap format 2 is not same length as calculated! actual: " 752 + str(len(data)) 753 + " calc : " 754 + str(length) 755 ) 756 return data 757 758 def fromXML(self, name, attrs, content, ttFont): 759 self.language = safeEval(attrs["language"]) 760 if not hasattr(self, "cmap"): 761 self.cmap = {} 762 cmap = self.cmap 763 764 for element in content: 765 if not isinstance(element, tuple): 766 continue 767 name, attrs, content = element 768 if name != "map": 769 continue 770 cmap[safeEval(attrs["code"])] = attrs["name"] 771 772 773cmap_format_4_format = ">7H" 774 775# uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 776# uint16 reservedPad # This value should be zero 777# uint16 startCode[segCount] # Starting character code for each segment 778# uint16 idDelta[segCount] # Delta for all character codes in segment 779# uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 780# uint16 glyphIndexArray[variable] # Glyph index array 781 782 783def splitRange(startCode, endCode, cmap): 784 # Try to split a range of character codes into subranges with consecutive 785 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 786 # efficiently. I can't prove I've got the optimal solution, but it seems 787 # to do well with the fonts I tested: none became bigger, many became smaller. 788 if startCode == endCode: 789 return [], [endCode] 790 791 lastID = cmap[startCode] 792 lastCode = startCode 793 inOrder = None 794 orderedBegin = None 795 subRanges = [] 796 797 # Gather subranges in which the glyph IDs are consecutive. 798 for code in range(startCode + 1, endCode + 1): 799 glyphID = cmap[code] 800 801 if glyphID - 1 == lastID: 802 if inOrder is None or not inOrder: 803 inOrder = 1 804 orderedBegin = lastCode 805 else: 806 if inOrder: 807 inOrder = 0 808 subRanges.append((orderedBegin, lastCode)) 809 orderedBegin = None 810 811 lastID = glyphID 812 lastCode = code 813 814 if inOrder: 815 subRanges.append((orderedBegin, lastCode)) 816 assert lastCode == endCode 817 818 # Now filter out those new subranges that would only make the data bigger. 819 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 820 # character. 821 newRanges = [] 822 for b, e in subRanges: 823 if b == startCode and e == endCode: 824 break # the whole range, we're fine 825 if b == startCode or e == endCode: 826 threshold = 4 # split costs one more segment 827 else: 828 threshold = 8 # split costs two more segments 829 if (e - b + 1) > threshold: 830 newRanges.append((b, e)) 831 subRanges = newRanges 832 833 if not subRanges: 834 return [], [endCode] 835 836 if subRanges[0][0] != startCode: 837 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 838 if subRanges[-1][1] != endCode: 839 subRanges.append((subRanges[-1][1] + 1, endCode)) 840 841 # Fill the "holes" in the segments list -- those are the segments in which 842 # the glyph IDs are _not_ consecutive. 843 i = 1 844 while i < len(subRanges): 845 if subRanges[i - 1][1] + 1 != subRanges[i][0]: 846 subRanges.insert(i, (subRanges[i - 1][1] + 1, subRanges[i][0] - 1)) 847 i = i + 1 848 i = i + 1 849 850 # Transform the ranges into startCode/endCode lists. 851 start = [] 852 end = [] 853 for b, e in subRanges: 854 start.append(b) 855 end.append(e) 856 start.pop(0) 857 858 assert len(start) + 1 == len(end) 859 return start, end 860 861 862class cmap_format_4(CmapSubtable): 863 def decompile(self, data, ttFont): 864 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 865 # If not, someone is calling the subtable decompile() directly, and must provide both args. 866 if data is not None and ttFont is not None: 867 self.decompileHeader(data, ttFont) 868 else: 869 assert ( 870 data is None and ttFont is None 871 ), "Need both data and ttFont arguments" 872 873 data = ( 874 self.data 875 ) # decompileHeader assigns the data after the header to self.data 876 (segCountX2, searchRange, entrySelector, rangeShift) = struct.unpack( 877 ">4H", data[:8] 878 ) 879 data = data[8:] 880 segCount = segCountX2 // 2 881 882 allCodes = array.array("H") 883 allCodes.frombytes(data) 884 self.data = data = None 885 886 if sys.byteorder != "big": 887 allCodes.byteswap() 888 889 # divide the data 890 endCode = allCodes[:segCount] 891 allCodes = allCodes[segCount + 1 :] # the +1 is skipping the reservedPad field 892 startCode = allCodes[:segCount] 893 allCodes = allCodes[segCount:] 894 idDelta = allCodes[:segCount] 895 allCodes = allCodes[segCount:] 896 idRangeOffset = allCodes[:segCount] 897 glyphIndexArray = allCodes[segCount:] 898 lenGIArray = len(glyphIndexArray) 899 900 # build 2-byte character mapping 901 charCodes = [] 902 gids = [] 903 for i in range(len(startCode) - 1): # don't do 0xffff! 904 start = startCode[i] 905 delta = idDelta[i] 906 rangeOffset = idRangeOffset[i] 907 partial = rangeOffset // 2 - start + i - len(idRangeOffset) 908 909 rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) 910 charCodes.extend(rangeCharCodes) 911 if rangeOffset == 0: 912 gids.extend( 913 [(charCode + delta) & 0xFFFF for charCode in rangeCharCodes] 914 ) 915 else: 916 for charCode in rangeCharCodes: 917 index = charCode + partial 918 assert index < lenGIArray, ( 919 "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" 920 % (i, index, lenGIArray) 921 ) 922 if glyphIndexArray[index] != 0: # if not missing glyph 923 glyphID = glyphIndexArray[index] + delta 924 else: 925 glyphID = 0 # missing glyph 926 gids.append(glyphID & 0xFFFF) 927 928 self.cmap = _make_map(self.ttFont, charCodes, gids) 929 930 def compile(self, ttFont): 931 if self.data: 932 return ( 933 struct.pack(">HHH", self.format, self.length, self.language) + self.data 934 ) 935 936 charCodes = list(self.cmap.keys()) 937 if not charCodes: 938 startCode = [0xFFFF] 939 endCode = [0xFFFF] 940 else: 941 charCodes.sort() 942 names = [self.cmap[code] for code in charCodes] 943 nameMap = ttFont.getReverseGlyphMap() 944 try: 945 gids = [nameMap[name] for name in names] 946 except KeyError: 947 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 948 try: 949 gids = [nameMap[name] for name in names] 950 except KeyError: 951 # allow virtual GIDs in format 4 tables 952 gids = [] 953 for name in names: 954 try: 955 gid = nameMap[name] 956 except KeyError: 957 try: 958 if name[:3] == "gid": 959 gid = int(name[3:]) 960 else: 961 gid = ttFont.getGlyphID(name) 962 except: 963 raise KeyError(name) 964 965 gids.append(gid) 966 cmap = {} # code:glyphID mapping 967 for code, gid in zip(charCodes, gids): 968 cmap[code] = gid 969 970 # Build startCode and endCode lists. 971 # Split the char codes in ranges of consecutive char codes, then split 972 # each range in more ranges of consecutive/not consecutive glyph IDs. 973 # See splitRange(). 974 lastCode = charCodes[0] 975 endCode = [] 976 startCode = [lastCode] 977 for charCode in charCodes[ 978 1: 979 ]: # skip the first code, it's the first start code 980 if charCode == lastCode + 1: 981 lastCode = charCode 982 continue 983 start, end = splitRange(startCode[-1], lastCode, cmap) 984 startCode.extend(start) 985 endCode.extend(end) 986 startCode.append(charCode) 987 lastCode = charCode 988 start, end = splitRange(startCode[-1], lastCode, cmap) 989 startCode.extend(start) 990 endCode.extend(end) 991 startCode.append(0xFFFF) 992 endCode.append(0xFFFF) 993 994 # build up rest of cruft 995 idDelta = [] 996 idRangeOffset = [] 997 glyphIndexArray = [] 998 for i in range(len(endCode) - 1): # skip the closing codes (0xffff) 999 indices = [] 1000 for charCode in range(startCode[i], endCode[i] + 1): 1001 indices.append(cmap[charCode]) 1002 if indices == list(range(indices[0], indices[0] + len(indices))): 1003 idDelta.append((indices[0] - startCode[i]) % 0x10000) 1004 idRangeOffset.append(0) 1005 else: 1006 idDelta.append(0) 1007 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 1008 glyphIndexArray.extend(indices) 1009 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 1010 idRangeOffset.append(0) 1011 1012 # Insane. 1013 segCount = len(endCode) 1014 segCountX2 = segCount * 2 1015 searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2) 1016 1017 charCodeArray = array.array("H", endCode + [0] + startCode) 1018 idDeltaArray = array.array("H", idDelta) 1019 restArray = array.array("H", idRangeOffset + glyphIndexArray) 1020 if sys.byteorder != "big": 1021 charCodeArray.byteswap() 1022 if sys.byteorder != "big": 1023 idDeltaArray.byteswap() 1024 if sys.byteorder != "big": 1025 restArray.byteswap() 1026 data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes() 1027 1028 length = struct.calcsize(cmap_format_4_format) + len(data) 1029 header = struct.pack( 1030 cmap_format_4_format, 1031 self.format, 1032 length, 1033 self.language, 1034 segCountX2, 1035 searchRange, 1036 entrySelector, 1037 rangeShift, 1038 ) 1039 return header + data 1040 1041 def fromXML(self, name, attrs, content, ttFont): 1042 self.language = safeEval(attrs["language"]) 1043 if not hasattr(self, "cmap"): 1044 self.cmap = {} 1045 cmap = self.cmap 1046 1047 for element in content: 1048 if not isinstance(element, tuple): 1049 continue 1050 nameMap, attrsMap, dummyContent = element 1051 if nameMap != "map": 1052 assert 0, "Unrecognized keyword in cmap subtable" 1053 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 1054 1055 1056class cmap_format_6(CmapSubtable): 1057 def decompile(self, data, ttFont): 1058 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1059 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1060 if data is not None and ttFont is not None: 1061 self.decompileHeader(data, ttFont) 1062 else: 1063 assert ( 1064 data is None and ttFont is None 1065 ), "Need both data and ttFont arguments" 1066 1067 data = ( 1068 self.data 1069 ) # decompileHeader assigns the data after the header to self.data 1070 firstCode, entryCount = struct.unpack(">HH", data[:4]) 1071 firstCode = int(firstCode) 1072 data = data[4:] 1073 # assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 1074 gids = array.array("H") 1075 gids.frombytes(data[: 2 * int(entryCount)]) 1076 if sys.byteorder != "big": 1077 gids.byteswap() 1078 self.data = data = None 1079 1080 charCodes = list(range(firstCode, firstCode + len(gids))) 1081 self.cmap = _make_map(self.ttFont, charCodes, gids) 1082 1083 def compile(self, ttFont): 1084 if self.data: 1085 return ( 1086 struct.pack(">HHH", self.format, self.length, self.language) + self.data 1087 ) 1088 cmap = self.cmap 1089 codes = sorted(cmap.keys()) 1090 if codes: # yes, there are empty cmap tables. 1091 codes = list(range(codes[0], codes[-1] + 1)) 1092 firstCode = codes[0] 1093 valueList = [ 1094 ttFont.getGlyphID(cmap[code]) if code in cmap else 0 for code in codes 1095 ] 1096 gids = array.array("H", valueList) 1097 if sys.byteorder != "big": 1098 gids.byteswap() 1099 data = gids.tobytes() 1100 else: 1101 data = b"" 1102 firstCode = 0 1103 header = struct.pack( 1104 ">HHHHH", 6, len(data) + 10, self.language, firstCode, len(codes) 1105 ) 1106 return header + data 1107 1108 def fromXML(self, name, attrs, content, ttFont): 1109 self.language = safeEval(attrs["language"]) 1110 if not hasattr(self, "cmap"): 1111 self.cmap = {} 1112 cmap = self.cmap 1113 1114 for element in content: 1115 if not isinstance(element, tuple): 1116 continue 1117 name, attrs, content = element 1118 if name != "map": 1119 continue 1120 cmap[safeEval(attrs["code"])] = attrs["name"] 1121 1122 1123class cmap_format_12_or_13(CmapSubtable): 1124 def __init__(self, format): 1125 self.format = format 1126 self.reserved = 0 1127 self.data = None 1128 self.ttFont = None 1129 1130 def decompileHeader(self, data, ttFont): 1131 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 1132 assert ( 1133 len(data) == (16 + nGroups * 12) == (length) 1134 ), "corrupt cmap table format %d (data length: %d, header length: %d)" % ( 1135 self.format, 1136 len(data), 1137 length, 1138 ) 1139 self.format = format 1140 self.reserved = reserved 1141 self.length = length 1142 self.language = language 1143 self.nGroups = nGroups 1144 self.data = data[16:] 1145 self.ttFont = ttFont 1146 1147 def decompile(self, data, ttFont): 1148 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1149 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1150 if data is not None and ttFont is not None: 1151 self.decompileHeader(data, ttFont) 1152 else: 1153 assert ( 1154 data is None and ttFont is None 1155 ), "Need both data and ttFont arguments" 1156 1157 data = ( 1158 self.data 1159 ) # decompileHeader assigns the data after the header to self.data 1160 charCodes = [] 1161 gids = [] 1162 pos = 0 1163 for i in range(self.nGroups): 1164 startCharCode, endCharCode, glyphID = struct.unpack( 1165 ">LLL", data[pos : pos + 12] 1166 ) 1167 pos += 12 1168 lenGroup = 1 + endCharCode - startCharCode 1169 charCodes.extend(list(range(startCharCode, endCharCode + 1))) 1170 gids.extend(self._computeGIDs(glyphID, lenGroup)) 1171 self.data = data = None 1172 self.cmap = _make_map(self.ttFont, charCodes, gids) 1173 1174 def compile(self, ttFont): 1175 if self.data: 1176 return ( 1177 struct.pack( 1178 ">HHLLL", 1179 self.format, 1180 self.reserved, 1181 self.length, 1182 self.language, 1183 self.nGroups, 1184 ) 1185 + self.data 1186 ) 1187 charCodes = list(self.cmap.keys()) 1188 names = list(self.cmap.values()) 1189 nameMap = ttFont.getReverseGlyphMap() 1190 try: 1191 gids = [nameMap[name] for name in names] 1192 except KeyError: 1193 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 1194 try: 1195 gids = [nameMap[name] for name in names] 1196 except KeyError: 1197 # allow virtual GIDs in format 12 tables 1198 gids = [] 1199 for name in names: 1200 try: 1201 gid = nameMap[name] 1202 except KeyError: 1203 try: 1204 if name[:3] == "gid": 1205 gid = int(name[3:]) 1206 else: 1207 gid = ttFont.getGlyphID(name) 1208 except: 1209 raise KeyError(name) 1210 1211 gids.append(gid) 1212 1213 cmap = {} # code:glyphID mapping 1214 for code, gid in zip(charCodes, gids): 1215 cmap[code] = gid 1216 1217 charCodes.sort() 1218 index = 0 1219 startCharCode = charCodes[0] 1220 startGlyphID = cmap[startCharCode] 1221 lastGlyphID = startGlyphID - self._format_step 1222 lastCharCode = startCharCode - 1 1223 nGroups = 0 1224 dataList = [] 1225 maxIndex = len(charCodes) 1226 for index in range(maxIndex): 1227 charCode = charCodes[index] 1228 glyphID = cmap[charCode] 1229 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 1230 dataList.append( 1231 struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID) 1232 ) 1233 startCharCode = charCode 1234 startGlyphID = glyphID 1235 nGroups = nGroups + 1 1236 lastGlyphID = glyphID 1237 lastCharCode = charCode 1238 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1239 nGroups = nGroups + 1 1240 data = bytesjoin(dataList) 1241 lengthSubtable = len(data) + 16 1242 assert len(data) == (nGroups * 12) == (lengthSubtable - 16) 1243 return ( 1244 struct.pack( 1245 ">HHLLL", 1246 self.format, 1247 self.reserved, 1248 lengthSubtable, 1249 self.language, 1250 nGroups, 1251 ) 1252 + data 1253 ) 1254 1255 def toXML(self, writer, ttFont): 1256 writer.begintag( 1257 self.__class__.__name__, 1258 [ 1259 ("platformID", self.platformID), 1260 ("platEncID", self.platEncID), 1261 ("format", self.format), 1262 ("reserved", self.reserved), 1263 ("length", self.length), 1264 ("language", self.language), 1265 ("nGroups", self.nGroups), 1266 ], 1267 ) 1268 writer.newline() 1269 codes = sorted(self.cmap.items()) 1270 self._writeCodes(codes, writer) 1271 writer.endtag(self.__class__.__name__) 1272 writer.newline() 1273 1274 def fromXML(self, name, attrs, content, ttFont): 1275 self.format = safeEval(attrs["format"]) 1276 self.reserved = safeEval(attrs["reserved"]) 1277 self.length = safeEval(attrs["length"]) 1278 self.language = safeEval(attrs["language"]) 1279 self.nGroups = safeEval(attrs["nGroups"]) 1280 if not hasattr(self, "cmap"): 1281 self.cmap = {} 1282 cmap = self.cmap 1283 1284 for element in content: 1285 if not isinstance(element, tuple): 1286 continue 1287 name, attrs, content = element 1288 if name != "map": 1289 continue 1290 cmap[safeEval(attrs["code"])] = attrs["name"] 1291 1292 1293class cmap_format_12(cmap_format_12_or_13): 1294 _format_step = 1 1295 1296 def __init__(self, format=12): 1297 cmap_format_12_or_13.__init__(self, format) 1298 1299 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1300 return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) 1301 1302 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1303 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1304 1305 1306class cmap_format_13(cmap_format_12_or_13): 1307 _format_step = 0 1308 1309 def __init__(self, format=13): 1310 cmap_format_12_or_13.__init__(self, format) 1311 1312 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1313 return [startingGlyph] * numberOfGlyphs 1314 1315 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1316 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1317 1318 1319def cvtToUVS(threeByteString): 1320 data = b"\0" + threeByteString 1321 (val,) = struct.unpack(">L", data) 1322 return val 1323 1324 1325def cvtFromUVS(val): 1326 assert 0 <= val < 0x1000000 1327 fourByteString = struct.pack(">L", val) 1328 return fourByteString[1:] 1329 1330 1331class cmap_format_14(CmapSubtable): 1332 def decompileHeader(self, data, ttFont): 1333 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1334 self.data = data[10:] 1335 self.length = length 1336 self.numVarSelectorRecords = numVarSelectorRecords 1337 self.ttFont = ttFont 1338 self.language = 0xFF # has no language. 1339 1340 def decompile(self, data, ttFont): 1341 if data is not None and ttFont is not None: 1342 self.decompileHeader(data, ttFont) 1343 else: 1344 assert ( 1345 data is None and ttFont is None 1346 ), "Need both data and ttFont arguments" 1347 data = self.data 1348 1349 self.cmap = ( 1350 {} 1351 ) # so that clients that expect this to exist in a cmap table won't fail. 1352 uvsDict = {} 1353 recOffset = 0 1354 for n in range(self.numVarSelectorRecords): 1355 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack( 1356 ">3sLL", data[recOffset : recOffset + 11] 1357 ) 1358 recOffset += 11 1359 varUVS = cvtToUVS(uvs) 1360 if defOVSOffset: 1361 startOffset = defOVSOffset - 10 1362 (numValues,) = struct.unpack(">L", data[startOffset : startOffset + 4]) 1363 startOffset += 4 1364 for r in range(numValues): 1365 uv, addtlCnt = struct.unpack( 1366 ">3sB", data[startOffset : startOffset + 4] 1367 ) 1368 startOffset += 4 1369 firstBaseUV = cvtToUVS(uv) 1370 cnt = addtlCnt + 1 1371 baseUVList = list(range(firstBaseUV, firstBaseUV + cnt)) 1372 glyphList = [None] * cnt 1373 localUVList = zip(baseUVList, glyphList) 1374 try: 1375 uvsDict[varUVS].extend(localUVList) 1376 except KeyError: 1377 uvsDict[varUVS] = list(localUVList) 1378 1379 if nonDefUVSOffset: 1380 startOffset = nonDefUVSOffset - 10 1381 (numRecs,) = struct.unpack(">L", data[startOffset : startOffset + 4]) 1382 startOffset += 4 1383 localUVList = [] 1384 for r in range(numRecs): 1385 uv, gid = struct.unpack(">3sH", data[startOffset : startOffset + 5]) 1386 startOffset += 5 1387 uv = cvtToUVS(uv) 1388 glyphName = self.ttFont.getGlyphName(gid) 1389 localUVList.append((uv, glyphName)) 1390 try: 1391 uvsDict[varUVS].extend(localUVList) 1392 except KeyError: 1393 uvsDict[varUVS] = localUVList 1394 1395 self.uvsDict = uvsDict 1396 1397 def toXML(self, writer, ttFont): 1398 writer.begintag( 1399 self.__class__.__name__, 1400 [ 1401 ("platformID", self.platformID), 1402 ("platEncID", self.platEncID), 1403 ], 1404 ) 1405 writer.newline() 1406 uvsDict = self.uvsDict 1407 uvsList = sorted(uvsDict.keys()) 1408 for uvs in uvsList: 1409 uvList = uvsDict[uvs] 1410 uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) 1411 for uv, gname in uvList: 1412 attrs = [("uv", hex(uv)), ("uvs", hex(uvs))] 1413 if gname is not None: 1414 attrs.append(("name", gname)) 1415 writer.simpletag("map", attrs) 1416 writer.newline() 1417 writer.endtag(self.__class__.__name__) 1418 writer.newline() 1419 1420 def fromXML(self, name, attrs, content, ttFont): 1421 self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail 1422 if not hasattr(self, "cmap"): 1423 self.cmap = ( 1424 {} 1425 ) # so that clients that expect this to exist in a cmap table won't fail. 1426 if not hasattr(self, "uvsDict"): 1427 self.uvsDict = {} 1428 uvsDict = self.uvsDict 1429 1430 # For backwards compatibility reasons we accept "None" as an indicator 1431 # for "default mapping", unless the font actually has a glyph named 1432 # "None". 1433 _hasGlyphNamedNone = None 1434 1435 for element in content: 1436 if not isinstance(element, tuple): 1437 continue 1438 name, attrs, content = element 1439 if name != "map": 1440 continue 1441 uvs = safeEval(attrs["uvs"]) 1442 uv = safeEval(attrs["uv"]) 1443 gname = attrs.get("name") 1444 if gname == "None": 1445 if _hasGlyphNamedNone is None: 1446 _hasGlyphNamedNone = "None" in ttFont.getGlyphOrder() 1447 if not _hasGlyphNamedNone: 1448 gname = None 1449 try: 1450 uvsDict[uvs].append((uv, gname)) 1451 except KeyError: 1452 uvsDict[uvs] = [(uv, gname)] 1453 1454 def compile(self, ttFont): 1455 if self.data: 1456 return ( 1457 struct.pack( 1458 ">HLL", self.format, self.length, self.numVarSelectorRecords 1459 ) 1460 + self.data 1461 ) 1462 1463 uvsDict = self.uvsDict 1464 uvsList = sorted(uvsDict.keys()) 1465 self.numVarSelectorRecords = len(uvsList) 1466 offset = ( 1467 10 + self.numVarSelectorRecords * 11 1468 ) # current value is end of VarSelectorRecords block. 1469 data = [] 1470 varSelectorRecords = [] 1471 for uvs in uvsList: 1472 entryList = uvsDict[uvs] 1473 1474 defList = [entry for entry in entryList if entry[1] is None] 1475 if defList: 1476 defList = [entry[0] for entry in defList] 1477 defOVSOffset = offset 1478 defList.sort() 1479 1480 lastUV = defList[0] 1481 cnt = -1 1482 defRecs = [] 1483 for defEntry in defList: 1484 cnt += 1 1485 if (lastUV + cnt) != defEntry: 1486 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt - 1) 1487 lastUV = defEntry 1488 defRecs.append(rec) 1489 cnt = 0 1490 1491 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1492 defRecs.append(rec) 1493 1494 numDefRecs = len(defRecs) 1495 data.append(struct.pack(">L", numDefRecs)) 1496 data.extend(defRecs) 1497 offset += 4 + numDefRecs * 4 1498 else: 1499 defOVSOffset = 0 1500 1501 ndefList = [entry for entry in entryList if entry[1] is not None] 1502 if ndefList: 1503 nonDefUVSOffset = offset 1504 ndefList.sort() 1505 numNonDefRecs = len(ndefList) 1506 data.append(struct.pack(">L", numNonDefRecs)) 1507 offset += 4 + numNonDefRecs * 5 1508 1509 for uv, gname in ndefList: 1510 gid = ttFont.getGlyphID(gname) 1511 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1512 data.append(ndrec) 1513 else: 1514 nonDefUVSOffset = 0 1515 1516 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1517 varSelectorRecords.append(vrec) 1518 1519 data = bytesjoin(varSelectorRecords) + bytesjoin(data) 1520 self.length = 10 + len(data) 1521 headerdata = struct.pack( 1522 ">HLL", self.format, self.length, self.numVarSelectorRecords 1523 ) 1524 1525 return headerdata + data 1526 1527 1528class cmap_format_unknown(CmapSubtable): 1529 def toXML(self, writer, ttFont): 1530 cmapName = self.__class__.__name__[:12] + str(self.format) 1531 writer.begintag( 1532 cmapName, 1533 [ 1534 ("platformID", self.platformID), 1535 ("platEncID", self.platEncID), 1536 ], 1537 ) 1538 writer.newline() 1539 writer.dumphex(self.data) 1540 writer.endtag(cmapName) 1541 writer.newline() 1542 1543 def fromXML(self, name, attrs, content, ttFont): 1544 self.data = readHex(content) 1545 self.cmap = {} 1546 1547 def decompileHeader(self, data, ttFont): 1548 self.language = 0 # dummy value 1549 self.data = data 1550 1551 def decompile(self, data, ttFont): 1552 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1553 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1554 if data is not None and ttFont is not None: 1555 self.decompileHeader(data, ttFont) 1556 else: 1557 assert ( 1558 data is None and ttFont is None 1559 ), "Need both data and ttFont arguments" 1560 1561 def compile(self, ttFont): 1562 if self.data: 1563 return self.data 1564 else: 1565 return None 1566 1567 1568cmap_classes = { 1569 0: cmap_format_0, 1570 2: cmap_format_2, 1571 4: cmap_format_4, 1572 6: cmap_format_6, 1573 12: cmap_format_12, 1574 13: cmap_format_13, 1575 14: cmap_format_14, 1576} 1577