1from fontTools.misc import sstruct 2from fontTools.misc.roundTools import otRound 3from fontTools.misc.textTools import safeEval, num2binary, binary2num 4from fontTools.ttLib.tables import DefaultTable 5import bisect 6import logging 7 8 9log = logging.getLogger(__name__) 10 11# panose classification 12 13panoseFormat = """ 14 bFamilyType: B 15 bSerifStyle: B 16 bWeight: B 17 bProportion: B 18 bContrast: B 19 bStrokeVariation: B 20 bArmStyle: B 21 bLetterForm: B 22 bMidline: B 23 bXHeight: B 24""" 25 26 27class Panose(object): 28 def __init__(self, **kwargs): 29 _, names, _ = sstruct.getformat(panoseFormat) 30 for name in names: 31 setattr(self, name, kwargs.pop(name, 0)) 32 for k in kwargs: 33 raise TypeError(f"Panose() got an unexpected keyword argument {k!r}") 34 35 def toXML(self, writer, ttFont): 36 formatstring, names, fixes = sstruct.getformat(panoseFormat) 37 for name in names: 38 writer.simpletag(name, value=getattr(self, name)) 39 writer.newline() 40 41 def fromXML(self, name, attrs, content, ttFont): 42 setattr(self, name, safeEval(attrs["value"])) 43 44 45# 'sfnt' OS/2 and Windows Metrics table - 'OS/2' 46 47OS2_format_0 = """ 48 > # big endian 49 version: H # version 50 xAvgCharWidth: h # average character width 51 usWeightClass: H # degree of thickness of strokes 52 usWidthClass: H # aspect ratio 53 fsType: H # type flags 54 ySubscriptXSize: h # subscript horizontal font size 55 ySubscriptYSize: h # subscript vertical font size 56 ySubscriptXOffset: h # subscript x offset 57 ySubscriptYOffset: h # subscript y offset 58 ySuperscriptXSize: h # superscript horizontal font size 59 ySuperscriptYSize: h # superscript vertical font size 60 ySuperscriptXOffset: h # superscript x offset 61 ySuperscriptYOffset: h # superscript y offset 62 yStrikeoutSize: h # strikeout size 63 yStrikeoutPosition: h # strikeout position 64 sFamilyClass: h # font family class and subclass 65 panose: 10s # panose classification number 66 ulUnicodeRange1: L # character range 67 ulUnicodeRange2: L # character range 68 ulUnicodeRange3: L # character range 69 ulUnicodeRange4: L # character range 70 achVendID: 4s # font vendor identification 71 fsSelection: H # font selection flags 72 usFirstCharIndex: H # first unicode character index 73 usLastCharIndex: H # last unicode character index 74 sTypoAscender: h # typographic ascender 75 sTypoDescender: h # typographic descender 76 sTypoLineGap: h # typographic line gap 77 usWinAscent: H # Windows ascender 78 usWinDescent: H # Windows descender 79""" 80 81OS2_format_1_addition = """ 82 ulCodePageRange1: L 83 ulCodePageRange2: L 84""" 85 86OS2_format_2_addition = ( 87 OS2_format_1_addition 88 + """ 89 sxHeight: h 90 sCapHeight: h 91 usDefaultChar: H 92 usBreakChar: H 93 usMaxContext: H 94""" 95) 96 97OS2_format_5_addition = ( 98 OS2_format_2_addition 99 + """ 100 usLowerOpticalPointSize: H 101 usUpperOpticalPointSize: H 102""" 103) 104 105bigendian = " > # big endian\n" 106 107OS2_format_1 = OS2_format_0 + OS2_format_1_addition 108OS2_format_2 = OS2_format_0 + OS2_format_2_addition 109OS2_format_5 = OS2_format_0 + OS2_format_5_addition 110OS2_format_1_addition = bigendian + OS2_format_1_addition 111OS2_format_2_addition = bigendian + OS2_format_2_addition 112OS2_format_5_addition = bigendian + OS2_format_5_addition 113 114 115class table_O_S_2f_2(DefaultTable.DefaultTable): 116 """the OS/2 table""" 117 118 dependencies = ["head"] 119 120 def decompile(self, data, ttFont): 121 dummy, data = sstruct.unpack2(OS2_format_0, data, self) 122 123 if self.version == 1: 124 dummy, data = sstruct.unpack2(OS2_format_1_addition, data, self) 125 elif self.version in (2, 3, 4): 126 dummy, data = sstruct.unpack2(OS2_format_2_addition, data, self) 127 elif self.version == 5: 128 dummy, data = sstruct.unpack2(OS2_format_5_addition, data, self) 129 self.usLowerOpticalPointSize /= 20 130 self.usUpperOpticalPointSize /= 20 131 elif self.version != 0: 132 from fontTools import ttLib 133 134 raise ttLib.TTLibError( 135 "unknown format for OS/2 table: version %s" % self.version 136 ) 137 if len(data): 138 log.warning("too much 'OS/2' table data") 139 140 self.panose = sstruct.unpack(panoseFormat, self.panose, Panose()) 141 142 def compile(self, ttFont): 143 self.updateFirstAndLastCharIndex(ttFont) 144 panose = self.panose 145 head = ttFont["head"] 146 if (self.fsSelection & 1) and not (head.macStyle & 1 << 1): 147 log.warning( 148 "fsSelection bit 0 (italic) and " 149 "head table macStyle bit 1 (italic) should match" 150 ) 151 if (self.fsSelection & 1 << 5) and not (head.macStyle & 1): 152 log.warning( 153 "fsSelection bit 5 (bold) and " 154 "head table macStyle bit 0 (bold) should match" 155 ) 156 if (self.fsSelection & 1 << 6) and (self.fsSelection & 1 + (1 << 5)): 157 log.warning( 158 "fsSelection bit 6 (regular) is set, " 159 "bits 0 (italic) and 5 (bold) must be clear" 160 ) 161 if self.version < 4 and self.fsSelection & 0b1110000000: 162 log.warning( 163 "fsSelection bits 7, 8 and 9 are only defined in " 164 "OS/2 table version 4 and up: version %s", 165 self.version, 166 ) 167 self.panose = sstruct.pack(panoseFormat, self.panose) 168 if self.version == 0: 169 data = sstruct.pack(OS2_format_0, self) 170 elif self.version == 1: 171 data = sstruct.pack(OS2_format_1, self) 172 elif self.version in (2, 3, 4): 173 data = sstruct.pack(OS2_format_2, self) 174 elif self.version == 5: 175 d = self.__dict__.copy() 176 d["usLowerOpticalPointSize"] = round(self.usLowerOpticalPointSize * 20) 177 d["usUpperOpticalPointSize"] = round(self.usUpperOpticalPointSize * 20) 178 data = sstruct.pack(OS2_format_5, d) 179 else: 180 from fontTools import ttLib 181 182 raise ttLib.TTLibError( 183 "unknown format for OS/2 table: version %s" % self.version 184 ) 185 self.panose = panose 186 return data 187 188 def toXML(self, writer, ttFont): 189 writer.comment( 190 "The fields 'usFirstCharIndex' and 'usLastCharIndex'\n" 191 "will be recalculated by the compiler" 192 ) 193 writer.newline() 194 if self.version == 1: 195 format = OS2_format_1 196 elif self.version in (2, 3, 4): 197 format = OS2_format_2 198 elif self.version == 5: 199 format = OS2_format_5 200 else: 201 format = OS2_format_0 202 formatstring, names, fixes = sstruct.getformat(format) 203 for name in names: 204 value = getattr(self, name) 205 if name == "panose": 206 writer.begintag("panose") 207 writer.newline() 208 value.toXML(writer, ttFont) 209 writer.endtag("panose") 210 elif name in ( 211 "ulUnicodeRange1", 212 "ulUnicodeRange2", 213 "ulUnicodeRange3", 214 "ulUnicodeRange4", 215 "ulCodePageRange1", 216 "ulCodePageRange2", 217 ): 218 writer.simpletag(name, value=num2binary(value)) 219 elif name in ("fsType", "fsSelection"): 220 writer.simpletag(name, value=num2binary(value, 16)) 221 elif name == "achVendID": 222 writer.simpletag(name, value=repr(value)[1:-1]) 223 else: 224 writer.simpletag(name, value=value) 225 writer.newline() 226 227 def fromXML(self, name, attrs, content, ttFont): 228 if name == "panose": 229 self.panose = panose = Panose() 230 for element in content: 231 if isinstance(element, tuple): 232 name, attrs, content = element 233 panose.fromXML(name, attrs, content, ttFont) 234 elif name in ( 235 "ulUnicodeRange1", 236 "ulUnicodeRange2", 237 "ulUnicodeRange3", 238 "ulUnicodeRange4", 239 "ulCodePageRange1", 240 "ulCodePageRange2", 241 "fsType", 242 "fsSelection", 243 ): 244 setattr(self, name, binary2num(attrs["value"])) 245 elif name == "achVendID": 246 setattr(self, name, safeEval("'''" + attrs["value"] + "'''")) 247 else: 248 setattr(self, name, safeEval(attrs["value"])) 249 250 def updateFirstAndLastCharIndex(self, ttFont): 251 if "cmap" not in ttFont: 252 return 253 codes = set() 254 for table in getattr(ttFont["cmap"], "tables", []): 255 if table.isUnicode(): 256 codes.update(table.cmap.keys()) 257 if codes: 258 minCode = min(codes) 259 maxCode = max(codes) 260 # USHORT cannot hold codepoints greater than 0xFFFF 261 self.usFirstCharIndex = min(0xFFFF, minCode) 262 self.usLastCharIndex = min(0xFFFF, maxCode) 263 264 # misspelled attributes kept for legacy reasons 265 266 @property 267 def usMaxContex(self): 268 return self.usMaxContext 269 270 @usMaxContex.setter 271 def usMaxContex(self, value): 272 self.usMaxContext = value 273 274 @property 275 def fsFirstCharIndex(self): 276 return self.usFirstCharIndex 277 278 @fsFirstCharIndex.setter 279 def fsFirstCharIndex(self, value): 280 self.usFirstCharIndex = value 281 282 @property 283 def fsLastCharIndex(self): 284 return self.usLastCharIndex 285 286 @fsLastCharIndex.setter 287 def fsLastCharIndex(self, value): 288 self.usLastCharIndex = value 289 290 def getUnicodeRanges(self): 291 """Return the set of 'ulUnicodeRange*' bits currently enabled.""" 292 bits = set() 293 ul1, ul2 = self.ulUnicodeRange1, self.ulUnicodeRange2 294 ul3, ul4 = self.ulUnicodeRange3, self.ulUnicodeRange4 295 for i in range(32): 296 if ul1 & (1 << i): 297 bits.add(i) 298 if ul2 & (1 << i): 299 bits.add(i + 32) 300 if ul3 & (1 << i): 301 bits.add(i + 64) 302 if ul4 & (1 << i): 303 bits.add(i + 96) 304 return bits 305 306 def setUnicodeRanges(self, bits): 307 """Set the 'ulUnicodeRange*' fields to the specified 'bits'.""" 308 ul1, ul2, ul3, ul4 = 0, 0, 0, 0 309 for bit in bits: 310 if 0 <= bit < 32: 311 ul1 |= 1 << bit 312 elif 32 <= bit < 64: 313 ul2 |= 1 << (bit - 32) 314 elif 64 <= bit < 96: 315 ul3 |= 1 << (bit - 64) 316 elif 96 <= bit < 123: 317 ul4 |= 1 << (bit - 96) 318 else: 319 raise ValueError("expected 0 <= int <= 122, found: %r" % bit) 320 self.ulUnicodeRange1, self.ulUnicodeRange2 = ul1, ul2 321 self.ulUnicodeRange3, self.ulUnicodeRange4 = ul3, ul4 322 323 def recalcUnicodeRanges(self, ttFont, pruneOnly=False): 324 """Intersect the codepoints in the font's Unicode cmap subtables with 325 the Unicode block ranges defined in the OpenType specification (v1.7), 326 and set the respective 'ulUnicodeRange*' bits if there is at least ONE 327 intersection. 328 If 'pruneOnly' is True, only clear unused bits with NO intersection. 329 """ 330 unicodes = set() 331 for table in ttFont["cmap"].tables: 332 if table.isUnicode(): 333 unicodes.update(table.cmap.keys()) 334 if pruneOnly: 335 empty = intersectUnicodeRanges(unicodes, inverse=True) 336 bits = self.getUnicodeRanges() - empty 337 else: 338 bits = intersectUnicodeRanges(unicodes) 339 self.setUnicodeRanges(bits) 340 return bits 341 342 def getCodePageRanges(self): 343 """Return the set of 'ulCodePageRange*' bits currently enabled.""" 344 bits = set() 345 if self.version < 1: 346 return bits 347 ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2 348 for i in range(32): 349 if ul1 & (1 << i): 350 bits.add(i) 351 if ul2 & (1 << i): 352 bits.add(i + 32) 353 return bits 354 355 def setCodePageRanges(self, bits): 356 """Set the 'ulCodePageRange*' fields to the specified 'bits'.""" 357 ul1, ul2 = 0, 0 358 for bit in bits: 359 if 0 <= bit < 32: 360 ul1 |= 1 << bit 361 elif 32 <= bit < 64: 362 ul2 |= 1 << (bit - 32) 363 else: 364 raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}") 365 if self.version < 1: 366 self.version = 1 367 self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2 368 369 def recalcCodePageRanges(self, ttFont, pruneOnly=False): 370 unicodes = set() 371 for table in ttFont["cmap"].tables: 372 if table.isUnicode(): 373 unicodes.update(table.cmap.keys()) 374 bits = calcCodePageRanges(unicodes) 375 if pruneOnly: 376 bits &= self.getCodePageRanges() 377 # when no codepage ranges can be enabled, fall back to enabling bit 0 378 # (Latin 1) so that the font works in MS Word: 379 # https://github.com/googlei18n/fontmake/issues/468 380 if not bits: 381 bits = {0} 382 self.setCodePageRanges(bits) 383 return bits 384 385 def recalcAvgCharWidth(self, ttFont): 386 """Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table. 387 388 Set it to 0 if the unlikely event 'hmtx' table is not found. 389 """ 390 avg_width = 0 391 hmtx = ttFont.get("hmtx") 392 if hmtx is not None: 393 widths = [width for width, _ in hmtx.metrics.values() if width > 0] 394 if widths: 395 avg_width = otRound(sum(widths) / len(widths)) 396 self.xAvgCharWidth = avg_width 397 return avg_width 398 399 400# Unicode ranges data from the OpenType OS/2 table specification v1.7 401 402OS2_UNICODE_RANGES = ( 403 (("Basic Latin", (0x0000, 0x007F)),), 404 (("Latin-1 Supplement", (0x0080, 0x00FF)),), 405 (("Latin Extended-A", (0x0100, 0x017F)),), 406 (("Latin Extended-B", (0x0180, 0x024F)),), 407 ( 408 ("IPA Extensions", (0x0250, 0x02AF)), 409 ("Phonetic Extensions", (0x1D00, 0x1D7F)), 410 ("Phonetic Extensions Supplement", (0x1D80, 0x1DBF)), 411 ), 412 ( 413 ("Spacing Modifier Letters", (0x02B0, 0x02FF)), 414 ("Modifier Tone Letters", (0xA700, 0xA71F)), 415 ), 416 ( 417 ("Combining Diacritical Marks", (0x0300, 0x036F)), 418 ("Combining Diacritical Marks Supplement", (0x1DC0, 0x1DFF)), 419 ), 420 (("Greek and Coptic", (0x0370, 0x03FF)),), 421 (("Coptic", (0x2C80, 0x2CFF)),), 422 ( 423 ("Cyrillic", (0x0400, 0x04FF)), 424 ("Cyrillic Supplement", (0x0500, 0x052F)), 425 ("Cyrillic Extended-A", (0x2DE0, 0x2DFF)), 426 ("Cyrillic Extended-B", (0xA640, 0xA69F)), 427 ), 428 (("Armenian", (0x0530, 0x058F)),), 429 (("Hebrew", (0x0590, 0x05FF)),), 430 (("Vai", (0xA500, 0xA63F)),), 431 (("Arabic", (0x0600, 0x06FF)), ("Arabic Supplement", (0x0750, 0x077F))), 432 (("NKo", (0x07C0, 0x07FF)),), 433 (("Devanagari", (0x0900, 0x097F)),), 434 (("Bengali", (0x0980, 0x09FF)),), 435 (("Gurmukhi", (0x0A00, 0x0A7F)),), 436 (("Gujarati", (0x0A80, 0x0AFF)),), 437 (("Oriya", (0x0B00, 0x0B7F)),), 438 (("Tamil", (0x0B80, 0x0BFF)),), 439 (("Telugu", (0x0C00, 0x0C7F)),), 440 (("Kannada", (0x0C80, 0x0CFF)),), 441 (("Malayalam", (0x0D00, 0x0D7F)),), 442 (("Thai", (0x0E00, 0x0E7F)),), 443 (("Lao", (0x0E80, 0x0EFF)),), 444 (("Georgian", (0x10A0, 0x10FF)), ("Georgian Supplement", (0x2D00, 0x2D2F))), 445 (("Balinese", (0x1B00, 0x1B7F)),), 446 (("Hangul Jamo", (0x1100, 0x11FF)),), 447 ( 448 ("Latin Extended Additional", (0x1E00, 0x1EFF)), 449 ("Latin Extended-C", (0x2C60, 0x2C7F)), 450 ("Latin Extended-D", (0xA720, 0xA7FF)), 451 ), 452 (("Greek Extended", (0x1F00, 0x1FFF)),), 453 ( 454 ("General Punctuation", (0x2000, 0x206F)), 455 ("Supplemental Punctuation", (0x2E00, 0x2E7F)), 456 ), 457 (("Superscripts And Subscripts", (0x2070, 0x209F)),), 458 (("Currency Symbols", (0x20A0, 0x20CF)),), 459 (("Combining Diacritical Marks For Symbols", (0x20D0, 0x20FF)),), 460 (("Letterlike Symbols", (0x2100, 0x214F)),), 461 (("Number Forms", (0x2150, 0x218F)),), 462 ( 463 ("Arrows", (0x2190, 0x21FF)), 464 ("Supplemental Arrows-A", (0x27F0, 0x27FF)), 465 ("Supplemental Arrows-B", (0x2900, 0x297F)), 466 ("Miscellaneous Symbols and Arrows", (0x2B00, 0x2BFF)), 467 ), 468 ( 469 ("Mathematical Operators", (0x2200, 0x22FF)), 470 ("Supplemental Mathematical Operators", (0x2A00, 0x2AFF)), 471 ("Miscellaneous Mathematical Symbols-A", (0x27C0, 0x27EF)), 472 ("Miscellaneous Mathematical Symbols-B", (0x2980, 0x29FF)), 473 ), 474 (("Miscellaneous Technical", (0x2300, 0x23FF)),), 475 (("Control Pictures", (0x2400, 0x243F)),), 476 (("Optical Character Recognition", (0x2440, 0x245F)),), 477 (("Enclosed Alphanumerics", (0x2460, 0x24FF)),), 478 (("Box Drawing", (0x2500, 0x257F)),), 479 (("Block Elements", (0x2580, 0x259F)),), 480 (("Geometric Shapes", (0x25A0, 0x25FF)),), 481 (("Miscellaneous Symbols", (0x2600, 0x26FF)),), 482 (("Dingbats", (0x2700, 0x27BF)),), 483 (("CJK Symbols And Punctuation", (0x3000, 0x303F)),), 484 (("Hiragana", (0x3040, 0x309F)),), 485 ( 486 ("Katakana", (0x30A0, 0x30FF)), 487 ("Katakana Phonetic Extensions", (0x31F0, 0x31FF)), 488 ), 489 (("Bopomofo", (0x3100, 0x312F)), ("Bopomofo Extended", (0x31A0, 0x31BF))), 490 (("Hangul Compatibility Jamo", (0x3130, 0x318F)),), 491 (("Phags-pa", (0xA840, 0xA87F)),), 492 (("Enclosed CJK Letters And Months", (0x3200, 0x32FF)),), 493 (("CJK Compatibility", (0x3300, 0x33FF)),), 494 (("Hangul Syllables", (0xAC00, 0xD7AF)),), 495 (("Non-Plane 0 *", (0xD800, 0xDFFF)),), 496 (("Phoenician", (0x10900, 0x1091F)),), 497 ( 498 ("CJK Unified Ideographs", (0x4E00, 0x9FFF)), 499 ("CJK Radicals Supplement", (0x2E80, 0x2EFF)), 500 ("Kangxi Radicals", (0x2F00, 0x2FDF)), 501 ("Ideographic Description Characters", (0x2FF0, 0x2FFF)), 502 ("CJK Unified Ideographs Extension A", (0x3400, 0x4DBF)), 503 ("CJK Unified Ideographs Extension B", (0x20000, 0x2A6DF)), 504 ("Kanbun", (0x3190, 0x319F)), 505 ), 506 (("Private Use Area (plane 0)", (0xE000, 0xF8FF)),), 507 ( 508 ("CJK Strokes", (0x31C0, 0x31EF)), 509 ("CJK Compatibility Ideographs", (0xF900, 0xFAFF)), 510 ("CJK Compatibility Ideographs Supplement", (0x2F800, 0x2FA1F)), 511 ), 512 (("Alphabetic Presentation Forms", (0xFB00, 0xFB4F)),), 513 (("Arabic Presentation Forms-A", (0xFB50, 0xFDFF)),), 514 (("Combining Half Marks", (0xFE20, 0xFE2F)),), 515 ( 516 ("Vertical Forms", (0xFE10, 0xFE1F)), 517 ("CJK Compatibility Forms", (0xFE30, 0xFE4F)), 518 ), 519 (("Small Form Variants", (0xFE50, 0xFE6F)),), 520 (("Arabic Presentation Forms-B", (0xFE70, 0xFEFF)),), 521 (("Halfwidth And Fullwidth Forms", (0xFF00, 0xFFEF)),), 522 (("Specials", (0xFFF0, 0xFFFF)),), 523 (("Tibetan", (0x0F00, 0x0FFF)),), 524 (("Syriac", (0x0700, 0x074F)),), 525 (("Thaana", (0x0780, 0x07BF)),), 526 (("Sinhala", (0x0D80, 0x0DFF)),), 527 (("Myanmar", (0x1000, 0x109F)),), 528 ( 529 ("Ethiopic", (0x1200, 0x137F)), 530 ("Ethiopic Supplement", (0x1380, 0x139F)), 531 ("Ethiopic Extended", (0x2D80, 0x2DDF)), 532 ), 533 (("Cherokee", (0x13A0, 0x13FF)),), 534 (("Unified Canadian Aboriginal Syllabics", (0x1400, 0x167F)),), 535 (("Ogham", (0x1680, 0x169F)),), 536 (("Runic", (0x16A0, 0x16FF)),), 537 (("Khmer", (0x1780, 0x17FF)), ("Khmer Symbols", (0x19E0, 0x19FF))), 538 (("Mongolian", (0x1800, 0x18AF)),), 539 (("Braille Patterns", (0x2800, 0x28FF)),), 540 (("Yi Syllables", (0xA000, 0xA48F)), ("Yi Radicals", (0xA490, 0xA4CF))), 541 ( 542 ("Tagalog", (0x1700, 0x171F)), 543 ("Hanunoo", (0x1720, 0x173F)), 544 ("Buhid", (0x1740, 0x175F)), 545 ("Tagbanwa", (0x1760, 0x177F)), 546 ), 547 (("Old Italic", (0x10300, 0x1032F)),), 548 (("Gothic", (0x10330, 0x1034F)),), 549 (("Deseret", (0x10400, 0x1044F)),), 550 ( 551 ("Byzantine Musical Symbols", (0x1D000, 0x1D0FF)), 552 ("Musical Symbols", (0x1D100, 0x1D1FF)), 553 ("Ancient Greek Musical Notation", (0x1D200, 0x1D24F)), 554 ), 555 (("Mathematical Alphanumeric Symbols", (0x1D400, 0x1D7FF)),), 556 ( 557 ("Private Use (plane 15)", (0xF0000, 0xFFFFD)), 558 ("Private Use (plane 16)", (0x100000, 0x10FFFD)), 559 ), 560 ( 561 ("Variation Selectors", (0xFE00, 0xFE0F)), 562 ("Variation Selectors Supplement", (0xE0100, 0xE01EF)), 563 ), 564 (("Tags", (0xE0000, 0xE007F)),), 565 (("Limbu", (0x1900, 0x194F)),), 566 (("Tai Le", (0x1950, 0x197F)),), 567 (("New Tai Lue", (0x1980, 0x19DF)),), 568 (("Buginese", (0x1A00, 0x1A1F)),), 569 (("Glagolitic", (0x2C00, 0x2C5F)),), 570 (("Tifinagh", (0x2D30, 0x2D7F)),), 571 (("Yijing Hexagram Symbols", (0x4DC0, 0x4DFF)),), 572 (("Syloti Nagri", (0xA800, 0xA82F)),), 573 ( 574 ("Linear B Syllabary", (0x10000, 0x1007F)), 575 ("Linear B Ideograms", (0x10080, 0x100FF)), 576 ("Aegean Numbers", (0x10100, 0x1013F)), 577 ), 578 (("Ancient Greek Numbers", (0x10140, 0x1018F)),), 579 (("Ugaritic", (0x10380, 0x1039F)),), 580 (("Old Persian", (0x103A0, 0x103DF)),), 581 (("Shavian", (0x10450, 0x1047F)),), 582 (("Osmanya", (0x10480, 0x104AF)),), 583 (("Cypriot Syllabary", (0x10800, 0x1083F)),), 584 (("Kharoshthi", (0x10A00, 0x10A5F)),), 585 (("Tai Xuan Jing Symbols", (0x1D300, 0x1D35F)),), 586 ( 587 ("Cuneiform", (0x12000, 0x123FF)), 588 ("Cuneiform Numbers and Punctuation", (0x12400, 0x1247F)), 589 ), 590 (("Counting Rod Numerals", (0x1D360, 0x1D37F)),), 591 (("Sundanese", (0x1B80, 0x1BBF)),), 592 (("Lepcha", (0x1C00, 0x1C4F)),), 593 (("Ol Chiki", (0x1C50, 0x1C7F)),), 594 (("Saurashtra", (0xA880, 0xA8DF)),), 595 (("Kayah Li", (0xA900, 0xA92F)),), 596 (("Rejang", (0xA930, 0xA95F)),), 597 (("Cham", (0xAA00, 0xAA5F)),), 598 (("Ancient Symbols", (0x10190, 0x101CF)),), 599 (("Phaistos Disc", (0x101D0, 0x101FF)),), 600 ( 601 ("Carian", (0x102A0, 0x102DF)), 602 ("Lycian", (0x10280, 0x1029F)), 603 ("Lydian", (0x10920, 0x1093F)), 604 ), 605 (("Domino Tiles", (0x1F030, 0x1F09F)), ("Mahjong Tiles", (0x1F000, 0x1F02F))), 606) 607 608 609_unicodeStarts = [] 610_unicodeValues = [None] 611 612 613def _getUnicodeRanges(): 614 # build the ranges of codepoints for each unicode range bit, and cache result 615 if not _unicodeStarts: 616 unicodeRanges = [ 617 (start, (stop, bit)) 618 for bit, blocks in enumerate(OS2_UNICODE_RANGES) 619 for _, (start, stop) in blocks 620 ] 621 for start, (stop, bit) in sorted(unicodeRanges): 622 _unicodeStarts.append(start) 623 _unicodeValues.append((stop, bit)) 624 return _unicodeStarts, _unicodeValues 625 626 627def intersectUnicodeRanges(unicodes, inverse=False): 628 """Intersect a sequence of (int) Unicode codepoints with the Unicode block 629 ranges defined in the OpenType specification v1.7, and return the set of 630 'ulUnicodeRanges' bits for which there is at least ONE intersection. 631 If 'inverse' is True, return the the bits for which there is NO intersection. 632 633 >>> intersectUnicodeRanges([0x0410]) == {9} 634 True 635 >>> intersectUnicodeRanges([0x0410, 0x1F000]) == {9, 57, 122} 636 True 637 >>> intersectUnicodeRanges([0x0410, 0x1F000], inverse=True) == ( 638 ... set(range(len(OS2_UNICODE_RANGES))) - {9, 57, 122}) 639 True 640 """ 641 unicodes = set(unicodes) 642 unicodestarts, unicodevalues = _getUnicodeRanges() 643 bits = set() 644 for code in unicodes: 645 stop, bit = unicodevalues[bisect.bisect(unicodestarts, code)] 646 if code <= stop: 647 bits.add(bit) 648 # The spec says that bit 57 ("Non Plane 0") implies that there's 649 # at least one codepoint beyond the BMP; so I also include all 650 # the non-BMP codepoints here 651 if any(0x10000 <= code < 0x110000 for code in unicodes): 652 bits.add(57) 653 return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits 654 655 656def calcCodePageRanges(unicodes): 657 """Given a set of Unicode codepoints (integers), calculate the 658 corresponding OS/2 CodePage range bits. 659 This is a direct translation of FontForge implementation: 660 https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158 661 """ 662 bits = set() 663 hasAscii = set(range(0x20, 0x7E)).issubset(unicodes) 664 hasLineart = ord("┤") in unicodes 665 666 for uni in unicodes: 667 if uni == ord("Þ") and hasAscii: 668 bits.add(0) # Latin 1 669 elif uni == ord("Ľ") and hasAscii: 670 bits.add(1) # Latin 2: Eastern Europe 671 if hasLineart: 672 bits.add(58) # Latin 2 673 elif uni == ord("Б"): 674 bits.add(2) # Cyrillic 675 if ord("Ѕ") in unicodes and hasLineart: 676 bits.add(57) # IBM Cyrillic 677 if ord("╜") in unicodes and hasLineart: 678 bits.add(49) # MS-DOS Russian 679 elif uni == ord("Ά"): 680 bits.add(3) # Greek 681 if hasLineart and ord("½") in unicodes: 682 bits.add(48) # IBM Greek 683 if hasLineart and ord("√") in unicodes: 684 bits.add(60) # Greek, former 437 G 685 elif uni == ord("İ") and hasAscii: 686 bits.add(4) # Turkish 687 if hasLineart: 688 bits.add(56) # IBM turkish 689 elif uni == ord("א"): 690 bits.add(5) # Hebrew 691 if hasLineart and ord("√") in unicodes: 692 bits.add(53) # Hebrew 693 elif uni == ord("ر"): 694 bits.add(6) # Arabic 695 if ord("√") in unicodes: 696 bits.add(51) # Arabic 697 if hasLineart: 698 bits.add(61) # Arabic; ASMO 708 699 elif uni == ord("ŗ") and hasAscii: 700 bits.add(7) # Windows Baltic 701 if hasLineart: 702 bits.add(59) # MS-DOS Baltic 703 elif uni == ord("₫") and hasAscii: 704 bits.add(8) # Vietnamese 705 elif uni == ord("ๅ"): 706 bits.add(16) # Thai 707 elif uni == ord("エ"): 708 bits.add(17) # JIS/Japan 709 elif uni == ord("ㄅ"): 710 bits.add(18) # Chinese: Simplified 711 elif uni == ord("ㄱ"): 712 bits.add(19) # Korean wansung 713 elif uni == ord("央"): 714 bits.add(20) # Chinese: Traditional 715 elif uni == ord("곴"): 716 bits.add(21) # Korean Johab 717 elif uni == ord("♥") and hasAscii: 718 bits.add(30) # OEM Character Set 719 # TODO: Symbol bit has a special meaning (check the spec), we need 720 # to confirm if this is wanted by default. 721 # elif chr(0xF000) <= char <= chr(0xF0FF): 722 # codepageRanges.add(31) # Symbol Character Set 723 elif uni == ord("þ") and hasAscii and hasLineart: 724 bits.add(54) # MS-DOS Icelandic 725 elif uni == ord("╚") and hasAscii: 726 bits.add(62) # WE/Latin 1 727 bits.add(63) # US 728 elif hasAscii and hasLineart and ord("√") in unicodes: 729 if uni == ord("Å"): 730 bits.add(50) # MS-DOS Nordic 731 elif uni == ord("é"): 732 bits.add(52) # MS-DOS Canadian French 733 elif uni == ord("õ"): 734 bits.add(55) # MS-DOS Portuguese 735 736 if hasAscii and ord("‰") in unicodes and ord("∑") in unicodes: 737 bits.add(29) # Macintosh Character Set (US Roman) 738 739 return bits 740 741 742if __name__ == "__main__": 743 import doctest, sys 744 745 sys.exit(doctest.testmod().failed) 746