xref: /aosp_15_r20/external/fonttools/Lib/fontTools/ttLib/tables/O_S_2f_2.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1from fontTools.misc import sstruct
2from fontTools.misc.roundTools import otRound
3from fontTools.misc.textTools import safeEval, num2binary, binary2num
4from fontTools.ttLib.tables import DefaultTable
5import bisect
6import logging
7
8
9log = logging.getLogger(__name__)
10
11# panose classification
12
13panoseFormat = """
14	bFamilyType:        B
15	bSerifStyle:        B
16	bWeight:            B
17	bProportion:        B
18	bContrast:          B
19	bStrokeVariation:   B
20	bArmStyle:          B
21	bLetterForm:        B
22	bMidline:           B
23	bXHeight:           B
24"""
25
26
27class Panose(object):
28    def __init__(self, **kwargs):
29        _, names, _ = sstruct.getformat(panoseFormat)
30        for name in names:
31            setattr(self, name, kwargs.pop(name, 0))
32        for k in kwargs:
33            raise TypeError(f"Panose() got an unexpected keyword argument {k!r}")
34
35    def toXML(self, writer, ttFont):
36        formatstring, names, fixes = sstruct.getformat(panoseFormat)
37        for name in names:
38            writer.simpletag(name, value=getattr(self, name))
39            writer.newline()
40
41    def fromXML(self, name, attrs, content, ttFont):
42        setattr(self, name, safeEval(attrs["value"]))
43
44
45# 'sfnt' OS/2 and Windows Metrics table - 'OS/2'
46
47OS2_format_0 = """
48	>   # big endian
49	version:                H       # version
50	xAvgCharWidth:          h       # average character width
51	usWeightClass:          H       # degree of thickness of strokes
52	usWidthClass:           H       # aspect ratio
53	fsType:                 H       # type flags
54	ySubscriptXSize:        h       # subscript horizontal font size
55	ySubscriptYSize:        h       # subscript vertical font size
56	ySubscriptXOffset:      h       # subscript x offset
57	ySubscriptYOffset:      h       # subscript y offset
58	ySuperscriptXSize:      h       # superscript horizontal font size
59	ySuperscriptYSize:      h       # superscript vertical font size
60	ySuperscriptXOffset:    h       # superscript x offset
61	ySuperscriptYOffset:    h       # superscript y offset
62	yStrikeoutSize:         h       # strikeout size
63	yStrikeoutPosition:     h       # strikeout position
64	sFamilyClass:           h       # font family class and subclass
65	panose:                 10s     # panose classification number
66	ulUnicodeRange1:        L       # character range
67	ulUnicodeRange2:        L       # character range
68	ulUnicodeRange3:        L       # character range
69	ulUnicodeRange4:        L       # character range
70	achVendID:              4s      # font vendor identification
71	fsSelection:            H       # font selection flags
72	usFirstCharIndex:       H       # first unicode character index
73	usLastCharIndex:        H       # last unicode character index
74	sTypoAscender:          h       # typographic ascender
75	sTypoDescender:         h       # typographic descender
76	sTypoLineGap:           h       # typographic line gap
77	usWinAscent:            H       # Windows ascender
78	usWinDescent:           H       # Windows descender
79"""
80
81OS2_format_1_addition = """
82	ulCodePageRange1:   L
83	ulCodePageRange2:   L
84"""
85
86OS2_format_2_addition = (
87    OS2_format_1_addition
88    + """
89	sxHeight:           h
90	sCapHeight:         h
91	usDefaultChar:      H
92	usBreakChar:        H
93	usMaxContext:       H
94"""
95)
96
97OS2_format_5_addition = (
98    OS2_format_2_addition
99    + """
100	usLowerOpticalPointSize:    H
101	usUpperOpticalPointSize:    H
102"""
103)
104
105bigendian = "	>	# big endian\n"
106
107OS2_format_1 = OS2_format_0 + OS2_format_1_addition
108OS2_format_2 = OS2_format_0 + OS2_format_2_addition
109OS2_format_5 = OS2_format_0 + OS2_format_5_addition
110OS2_format_1_addition = bigendian + OS2_format_1_addition
111OS2_format_2_addition = bigendian + OS2_format_2_addition
112OS2_format_5_addition = bigendian + OS2_format_5_addition
113
114
115class table_O_S_2f_2(DefaultTable.DefaultTable):
116    """the OS/2 table"""
117
118    dependencies = ["head"]
119
120    def decompile(self, data, ttFont):
121        dummy, data = sstruct.unpack2(OS2_format_0, data, self)
122
123        if self.version == 1:
124            dummy, data = sstruct.unpack2(OS2_format_1_addition, data, self)
125        elif self.version in (2, 3, 4):
126            dummy, data = sstruct.unpack2(OS2_format_2_addition, data, self)
127        elif self.version == 5:
128            dummy, data = sstruct.unpack2(OS2_format_5_addition, data, self)
129            self.usLowerOpticalPointSize /= 20
130            self.usUpperOpticalPointSize /= 20
131        elif self.version != 0:
132            from fontTools import ttLib
133
134            raise ttLib.TTLibError(
135                "unknown format for OS/2 table: version %s" % self.version
136            )
137        if len(data):
138            log.warning("too much 'OS/2' table data")
139
140        self.panose = sstruct.unpack(panoseFormat, self.panose, Panose())
141
142    def compile(self, ttFont):
143        self.updateFirstAndLastCharIndex(ttFont)
144        panose = self.panose
145        head = ttFont["head"]
146        if (self.fsSelection & 1) and not (head.macStyle & 1 << 1):
147            log.warning(
148                "fsSelection bit 0 (italic) and "
149                "head table macStyle bit 1 (italic) should match"
150            )
151        if (self.fsSelection & 1 << 5) and not (head.macStyle & 1):
152            log.warning(
153                "fsSelection bit 5 (bold) and "
154                "head table macStyle bit 0 (bold) should match"
155            )
156        if (self.fsSelection & 1 << 6) and (self.fsSelection & 1 + (1 << 5)):
157            log.warning(
158                "fsSelection bit 6 (regular) is set, "
159                "bits 0 (italic) and 5 (bold) must be clear"
160            )
161        if self.version < 4 and self.fsSelection & 0b1110000000:
162            log.warning(
163                "fsSelection bits 7, 8 and 9 are only defined in "
164                "OS/2 table version 4 and up: version %s",
165                self.version,
166            )
167        self.panose = sstruct.pack(panoseFormat, self.panose)
168        if self.version == 0:
169            data = sstruct.pack(OS2_format_0, self)
170        elif self.version == 1:
171            data = sstruct.pack(OS2_format_1, self)
172        elif self.version in (2, 3, 4):
173            data = sstruct.pack(OS2_format_2, self)
174        elif self.version == 5:
175            d = self.__dict__.copy()
176            d["usLowerOpticalPointSize"] = round(self.usLowerOpticalPointSize * 20)
177            d["usUpperOpticalPointSize"] = round(self.usUpperOpticalPointSize * 20)
178            data = sstruct.pack(OS2_format_5, d)
179        else:
180            from fontTools import ttLib
181
182            raise ttLib.TTLibError(
183                "unknown format for OS/2 table: version %s" % self.version
184            )
185        self.panose = panose
186        return data
187
188    def toXML(self, writer, ttFont):
189        writer.comment(
190            "The fields 'usFirstCharIndex' and 'usLastCharIndex'\n"
191            "will be recalculated by the compiler"
192        )
193        writer.newline()
194        if self.version == 1:
195            format = OS2_format_1
196        elif self.version in (2, 3, 4):
197            format = OS2_format_2
198        elif self.version == 5:
199            format = OS2_format_5
200        else:
201            format = OS2_format_0
202        formatstring, names, fixes = sstruct.getformat(format)
203        for name in names:
204            value = getattr(self, name)
205            if name == "panose":
206                writer.begintag("panose")
207                writer.newline()
208                value.toXML(writer, ttFont)
209                writer.endtag("panose")
210            elif name in (
211                "ulUnicodeRange1",
212                "ulUnicodeRange2",
213                "ulUnicodeRange3",
214                "ulUnicodeRange4",
215                "ulCodePageRange1",
216                "ulCodePageRange2",
217            ):
218                writer.simpletag(name, value=num2binary(value))
219            elif name in ("fsType", "fsSelection"):
220                writer.simpletag(name, value=num2binary(value, 16))
221            elif name == "achVendID":
222                writer.simpletag(name, value=repr(value)[1:-1])
223            else:
224                writer.simpletag(name, value=value)
225            writer.newline()
226
227    def fromXML(self, name, attrs, content, ttFont):
228        if name == "panose":
229            self.panose = panose = Panose()
230            for element in content:
231                if isinstance(element, tuple):
232                    name, attrs, content = element
233                    panose.fromXML(name, attrs, content, ttFont)
234        elif name in (
235            "ulUnicodeRange1",
236            "ulUnicodeRange2",
237            "ulUnicodeRange3",
238            "ulUnicodeRange4",
239            "ulCodePageRange1",
240            "ulCodePageRange2",
241            "fsType",
242            "fsSelection",
243        ):
244            setattr(self, name, binary2num(attrs["value"]))
245        elif name == "achVendID":
246            setattr(self, name, safeEval("'''" + attrs["value"] + "'''"))
247        else:
248            setattr(self, name, safeEval(attrs["value"]))
249
250    def updateFirstAndLastCharIndex(self, ttFont):
251        if "cmap" not in ttFont:
252            return
253        codes = set()
254        for table in getattr(ttFont["cmap"], "tables", []):
255            if table.isUnicode():
256                codes.update(table.cmap.keys())
257        if codes:
258            minCode = min(codes)
259            maxCode = max(codes)
260            # USHORT cannot hold codepoints greater than 0xFFFF
261            self.usFirstCharIndex = min(0xFFFF, minCode)
262            self.usLastCharIndex = min(0xFFFF, maxCode)
263
264    # misspelled attributes kept for legacy reasons
265
266    @property
267    def usMaxContex(self):
268        return self.usMaxContext
269
270    @usMaxContex.setter
271    def usMaxContex(self, value):
272        self.usMaxContext = value
273
274    @property
275    def fsFirstCharIndex(self):
276        return self.usFirstCharIndex
277
278    @fsFirstCharIndex.setter
279    def fsFirstCharIndex(self, value):
280        self.usFirstCharIndex = value
281
282    @property
283    def fsLastCharIndex(self):
284        return self.usLastCharIndex
285
286    @fsLastCharIndex.setter
287    def fsLastCharIndex(self, value):
288        self.usLastCharIndex = value
289
290    def getUnicodeRanges(self):
291        """Return the set of 'ulUnicodeRange*' bits currently enabled."""
292        bits = set()
293        ul1, ul2 = self.ulUnicodeRange1, self.ulUnicodeRange2
294        ul3, ul4 = self.ulUnicodeRange3, self.ulUnicodeRange4
295        for i in range(32):
296            if ul1 & (1 << i):
297                bits.add(i)
298            if ul2 & (1 << i):
299                bits.add(i + 32)
300            if ul3 & (1 << i):
301                bits.add(i + 64)
302            if ul4 & (1 << i):
303                bits.add(i + 96)
304        return bits
305
306    def setUnicodeRanges(self, bits):
307        """Set the 'ulUnicodeRange*' fields to the specified 'bits'."""
308        ul1, ul2, ul3, ul4 = 0, 0, 0, 0
309        for bit in bits:
310            if 0 <= bit < 32:
311                ul1 |= 1 << bit
312            elif 32 <= bit < 64:
313                ul2 |= 1 << (bit - 32)
314            elif 64 <= bit < 96:
315                ul3 |= 1 << (bit - 64)
316            elif 96 <= bit < 123:
317                ul4 |= 1 << (bit - 96)
318            else:
319                raise ValueError("expected 0 <= int <= 122, found: %r" % bit)
320        self.ulUnicodeRange1, self.ulUnicodeRange2 = ul1, ul2
321        self.ulUnicodeRange3, self.ulUnicodeRange4 = ul3, ul4
322
323    def recalcUnicodeRanges(self, ttFont, pruneOnly=False):
324        """Intersect the codepoints in the font's Unicode cmap subtables with
325        the Unicode block ranges defined in the OpenType specification (v1.7),
326        and set the respective 'ulUnicodeRange*' bits if there is at least ONE
327        intersection.
328        If 'pruneOnly' is True, only clear unused bits with NO intersection.
329        """
330        unicodes = set()
331        for table in ttFont["cmap"].tables:
332            if table.isUnicode():
333                unicodes.update(table.cmap.keys())
334        if pruneOnly:
335            empty = intersectUnicodeRanges(unicodes, inverse=True)
336            bits = self.getUnicodeRanges() - empty
337        else:
338            bits = intersectUnicodeRanges(unicodes)
339        self.setUnicodeRanges(bits)
340        return bits
341
342    def getCodePageRanges(self):
343        """Return the set of 'ulCodePageRange*' bits currently enabled."""
344        bits = set()
345        if self.version < 1:
346            return bits
347        ul1, ul2 = self.ulCodePageRange1, self.ulCodePageRange2
348        for i in range(32):
349            if ul1 & (1 << i):
350                bits.add(i)
351            if ul2 & (1 << i):
352                bits.add(i + 32)
353        return bits
354
355    def setCodePageRanges(self, bits):
356        """Set the 'ulCodePageRange*' fields to the specified 'bits'."""
357        ul1, ul2 = 0, 0
358        for bit in bits:
359            if 0 <= bit < 32:
360                ul1 |= 1 << bit
361            elif 32 <= bit < 64:
362                ul2 |= 1 << (bit - 32)
363            else:
364                raise ValueError(f"expected 0 <= int <= 63, found: {bit:r}")
365        if self.version < 1:
366            self.version = 1
367        self.ulCodePageRange1, self.ulCodePageRange2 = ul1, ul2
368
369    def recalcCodePageRanges(self, ttFont, pruneOnly=False):
370        unicodes = set()
371        for table in ttFont["cmap"].tables:
372            if table.isUnicode():
373                unicodes.update(table.cmap.keys())
374        bits = calcCodePageRanges(unicodes)
375        if pruneOnly:
376            bits &= self.getCodePageRanges()
377        # when no codepage ranges can be enabled, fall back to enabling bit 0
378        # (Latin 1) so that the font works in MS Word:
379        # https://github.com/googlei18n/fontmake/issues/468
380        if not bits:
381            bits = {0}
382        self.setCodePageRanges(bits)
383        return bits
384
385    def recalcAvgCharWidth(self, ttFont):
386        """Recalculate xAvgCharWidth using metrics from ttFont's 'hmtx' table.
387
388        Set it to 0 if the unlikely event 'hmtx' table is not found.
389        """
390        avg_width = 0
391        hmtx = ttFont.get("hmtx")
392        if hmtx is not None:
393            widths = [width for width, _ in hmtx.metrics.values() if width > 0]
394            if widths:
395                avg_width = otRound(sum(widths) / len(widths))
396        self.xAvgCharWidth = avg_width
397        return avg_width
398
399
400# Unicode ranges data from the OpenType OS/2 table specification v1.7
401
402OS2_UNICODE_RANGES = (
403    (("Basic Latin", (0x0000, 0x007F)),),
404    (("Latin-1 Supplement", (0x0080, 0x00FF)),),
405    (("Latin Extended-A", (0x0100, 0x017F)),),
406    (("Latin Extended-B", (0x0180, 0x024F)),),
407    (
408        ("IPA Extensions", (0x0250, 0x02AF)),
409        ("Phonetic Extensions", (0x1D00, 0x1D7F)),
410        ("Phonetic Extensions Supplement", (0x1D80, 0x1DBF)),
411    ),
412    (
413        ("Spacing Modifier Letters", (0x02B0, 0x02FF)),
414        ("Modifier Tone Letters", (0xA700, 0xA71F)),
415    ),
416    (
417        ("Combining Diacritical Marks", (0x0300, 0x036F)),
418        ("Combining Diacritical Marks Supplement", (0x1DC0, 0x1DFF)),
419    ),
420    (("Greek and Coptic", (0x0370, 0x03FF)),),
421    (("Coptic", (0x2C80, 0x2CFF)),),
422    (
423        ("Cyrillic", (0x0400, 0x04FF)),
424        ("Cyrillic Supplement", (0x0500, 0x052F)),
425        ("Cyrillic Extended-A", (0x2DE0, 0x2DFF)),
426        ("Cyrillic Extended-B", (0xA640, 0xA69F)),
427    ),
428    (("Armenian", (0x0530, 0x058F)),),
429    (("Hebrew", (0x0590, 0x05FF)),),
430    (("Vai", (0xA500, 0xA63F)),),
431    (("Arabic", (0x0600, 0x06FF)), ("Arabic Supplement", (0x0750, 0x077F))),
432    (("NKo", (0x07C0, 0x07FF)),),
433    (("Devanagari", (0x0900, 0x097F)),),
434    (("Bengali", (0x0980, 0x09FF)),),
435    (("Gurmukhi", (0x0A00, 0x0A7F)),),
436    (("Gujarati", (0x0A80, 0x0AFF)),),
437    (("Oriya", (0x0B00, 0x0B7F)),),
438    (("Tamil", (0x0B80, 0x0BFF)),),
439    (("Telugu", (0x0C00, 0x0C7F)),),
440    (("Kannada", (0x0C80, 0x0CFF)),),
441    (("Malayalam", (0x0D00, 0x0D7F)),),
442    (("Thai", (0x0E00, 0x0E7F)),),
443    (("Lao", (0x0E80, 0x0EFF)),),
444    (("Georgian", (0x10A0, 0x10FF)), ("Georgian Supplement", (0x2D00, 0x2D2F))),
445    (("Balinese", (0x1B00, 0x1B7F)),),
446    (("Hangul Jamo", (0x1100, 0x11FF)),),
447    (
448        ("Latin Extended Additional", (0x1E00, 0x1EFF)),
449        ("Latin Extended-C", (0x2C60, 0x2C7F)),
450        ("Latin Extended-D", (0xA720, 0xA7FF)),
451    ),
452    (("Greek Extended", (0x1F00, 0x1FFF)),),
453    (
454        ("General Punctuation", (0x2000, 0x206F)),
455        ("Supplemental Punctuation", (0x2E00, 0x2E7F)),
456    ),
457    (("Superscripts And Subscripts", (0x2070, 0x209F)),),
458    (("Currency Symbols", (0x20A0, 0x20CF)),),
459    (("Combining Diacritical Marks For Symbols", (0x20D0, 0x20FF)),),
460    (("Letterlike Symbols", (0x2100, 0x214F)),),
461    (("Number Forms", (0x2150, 0x218F)),),
462    (
463        ("Arrows", (0x2190, 0x21FF)),
464        ("Supplemental Arrows-A", (0x27F0, 0x27FF)),
465        ("Supplemental Arrows-B", (0x2900, 0x297F)),
466        ("Miscellaneous Symbols and Arrows", (0x2B00, 0x2BFF)),
467    ),
468    (
469        ("Mathematical Operators", (0x2200, 0x22FF)),
470        ("Supplemental Mathematical Operators", (0x2A00, 0x2AFF)),
471        ("Miscellaneous Mathematical Symbols-A", (0x27C0, 0x27EF)),
472        ("Miscellaneous Mathematical Symbols-B", (0x2980, 0x29FF)),
473    ),
474    (("Miscellaneous Technical", (0x2300, 0x23FF)),),
475    (("Control Pictures", (0x2400, 0x243F)),),
476    (("Optical Character Recognition", (0x2440, 0x245F)),),
477    (("Enclosed Alphanumerics", (0x2460, 0x24FF)),),
478    (("Box Drawing", (0x2500, 0x257F)),),
479    (("Block Elements", (0x2580, 0x259F)),),
480    (("Geometric Shapes", (0x25A0, 0x25FF)),),
481    (("Miscellaneous Symbols", (0x2600, 0x26FF)),),
482    (("Dingbats", (0x2700, 0x27BF)),),
483    (("CJK Symbols And Punctuation", (0x3000, 0x303F)),),
484    (("Hiragana", (0x3040, 0x309F)),),
485    (
486        ("Katakana", (0x30A0, 0x30FF)),
487        ("Katakana Phonetic Extensions", (0x31F0, 0x31FF)),
488    ),
489    (("Bopomofo", (0x3100, 0x312F)), ("Bopomofo Extended", (0x31A0, 0x31BF))),
490    (("Hangul Compatibility Jamo", (0x3130, 0x318F)),),
491    (("Phags-pa", (0xA840, 0xA87F)),),
492    (("Enclosed CJK Letters And Months", (0x3200, 0x32FF)),),
493    (("CJK Compatibility", (0x3300, 0x33FF)),),
494    (("Hangul Syllables", (0xAC00, 0xD7AF)),),
495    (("Non-Plane 0 *", (0xD800, 0xDFFF)),),
496    (("Phoenician", (0x10900, 0x1091F)),),
497    (
498        ("CJK Unified Ideographs", (0x4E00, 0x9FFF)),
499        ("CJK Radicals Supplement", (0x2E80, 0x2EFF)),
500        ("Kangxi Radicals", (0x2F00, 0x2FDF)),
501        ("Ideographic Description Characters", (0x2FF0, 0x2FFF)),
502        ("CJK Unified Ideographs Extension A", (0x3400, 0x4DBF)),
503        ("CJK Unified Ideographs Extension B", (0x20000, 0x2A6DF)),
504        ("Kanbun", (0x3190, 0x319F)),
505    ),
506    (("Private Use Area (plane 0)", (0xE000, 0xF8FF)),),
507    (
508        ("CJK Strokes", (0x31C0, 0x31EF)),
509        ("CJK Compatibility Ideographs", (0xF900, 0xFAFF)),
510        ("CJK Compatibility Ideographs Supplement", (0x2F800, 0x2FA1F)),
511    ),
512    (("Alphabetic Presentation Forms", (0xFB00, 0xFB4F)),),
513    (("Arabic Presentation Forms-A", (0xFB50, 0xFDFF)),),
514    (("Combining Half Marks", (0xFE20, 0xFE2F)),),
515    (
516        ("Vertical Forms", (0xFE10, 0xFE1F)),
517        ("CJK Compatibility Forms", (0xFE30, 0xFE4F)),
518    ),
519    (("Small Form Variants", (0xFE50, 0xFE6F)),),
520    (("Arabic Presentation Forms-B", (0xFE70, 0xFEFF)),),
521    (("Halfwidth And Fullwidth Forms", (0xFF00, 0xFFEF)),),
522    (("Specials", (0xFFF0, 0xFFFF)),),
523    (("Tibetan", (0x0F00, 0x0FFF)),),
524    (("Syriac", (0x0700, 0x074F)),),
525    (("Thaana", (0x0780, 0x07BF)),),
526    (("Sinhala", (0x0D80, 0x0DFF)),),
527    (("Myanmar", (0x1000, 0x109F)),),
528    (
529        ("Ethiopic", (0x1200, 0x137F)),
530        ("Ethiopic Supplement", (0x1380, 0x139F)),
531        ("Ethiopic Extended", (0x2D80, 0x2DDF)),
532    ),
533    (("Cherokee", (0x13A0, 0x13FF)),),
534    (("Unified Canadian Aboriginal Syllabics", (0x1400, 0x167F)),),
535    (("Ogham", (0x1680, 0x169F)),),
536    (("Runic", (0x16A0, 0x16FF)),),
537    (("Khmer", (0x1780, 0x17FF)), ("Khmer Symbols", (0x19E0, 0x19FF))),
538    (("Mongolian", (0x1800, 0x18AF)),),
539    (("Braille Patterns", (0x2800, 0x28FF)),),
540    (("Yi Syllables", (0xA000, 0xA48F)), ("Yi Radicals", (0xA490, 0xA4CF))),
541    (
542        ("Tagalog", (0x1700, 0x171F)),
543        ("Hanunoo", (0x1720, 0x173F)),
544        ("Buhid", (0x1740, 0x175F)),
545        ("Tagbanwa", (0x1760, 0x177F)),
546    ),
547    (("Old Italic", (0x10300, 0x1032F)),),
548    (("Gothic", (0x10330, 0x1034F)),),
549    (("Deseret", (0x10400, 0x1044F)),),
550    (
551        ("Byzantine Musical Symbols", (0x1D000, 0x1D0FF)),
552        ("Musical Symbols", (0x1D100, 0x1D1FF)),
553        ("Ancient Greek Musical Notation", (0x1D200, 0x1D24F)),
554    ),
555    (("Mathematical Alphanumeric Symbols", (0x1D400, 0x1D7FF)),),
556    (
557        ("Private Use (plane 15)", (0xF0000, 0xFFFFD)),
558        ("Private Use (plane 16)", (0x100000, 0x10FFFD)),
559    ),
560    (
561        ("Variation Selectors", (0xFE00, 0xFE0F)),
562        ("Variation Selectors Supplement", (0xE0100, 0xE01EF)),
563    ),
564    (("Tags", (0xE0000, 0xE007F)),),
565    (("Limbu", (0x1900, 0x194F)),),
566    (("Tai Le", (0x1950, 0x197F)),),
567    (("New Tai Lue", (0x1980, 0x19DF)),),
568    (("Buginese", (0x1A00, 0x1A1F)),),
569    (("Glagolitic", (0x2C00, 0x2C5F)),),
570    (("Tifinagh", (0x2D30, 0x2D7F)),),
571    (("Yijing Hexagram Symbols", (0x4DC0, 0x4DFF)),),
572    (("Syloti Nagri", (0xA800, 0xA82F)),),
573    (
574        ("Linear B Syllabary", (0x10000, 0x1007F)),
575        ("Linear B Ideograms", (0x10080, 0x100FF)),
576        ("Aegean Numbers", (0x10100, 0x1013F)),
577    ),
578    (("Ancient Greek Numbers", (0x10140, 0x1018F)),),
579    (("Ugaritic", (0x10380, 0x1039F)),),
580    (("Old Persian", (0x103A0, 0x103DF)),),
581    (("Shavian", (0x10450, 0x1047F)),),
582    (("Osmanya", (0x10480, 0x104AF)),),
583    (("Cypriot Syllabary", (0x10800, 0x1083F)),),
584    (("Kharoshthi", (0x10A00, 0x10A5F)),),
585    (("Tai Xuan Jing Symbols", (0x1D300, 0x1D35F)),),
586    (
587        ("Cuneiform", (0x12000, 0x123FF)),
588        ("Cuneiform Numbers and Punctuation", (0x12400, 0x1247F)),
589    ),
590    (("Counting Rod Numerals", (0x1D360, 0x1D37F)),),
591    (("Sundanese", (0x1B80, 0x1BBF)),),
592    (("Lepcha", (0x1C00, 0x1C4F)),),
593    (("Ol Chiki", (0x1C50, 0x1C7F)),),
594    (("Saurashtra", (0xA880, 0xA8DF)),),
595    (("Kayah Li", (0xA900, 0xA92F)),),
596    (("Rejang", (0xA930, 0xA95F)),),
597    (("Cham", (0xAA00, 0xAA5F)),),
598    (("Ancient Symbols", (0x10190, 0x101CF)),),
599    (("Phaistos Disc", (0x101D0, 0x101FF)),),
600    (
601        ("Carian", (0x102A0, 0x102DF)),
602        ("Lycian", (0x10280, 0x1029F)),
603        ("Lydian", (0x10920, 0x1093F)),
604    ),
605    (("Domino Tiles", (0x1F030, 0x1F09F)), ("Mahjong Tiles", (0x1F000, 0x1F02F))),
606)
607
608
609_unicodeStarts = []
610_unicodeValues = [None]
611
612
613def _getUnicodeRanges():
614    # build the ranges of codepoints for each unicode range bit, and cache result
615    if not _unicodeStarts:
616        unicodeRanges = [
617            (start, (stop, bit))
618            for bit, blocks in enumerate(OS2_UNICODE_RANGES)
619            for _, (start, stop) in blocks
620        ]
621        for start, (stop, bit) in sorted(unicodeRanges):
622            _unicodeStarts.append(start)
623            _unicodeValues.append((stop, bit))
624    return _unicodeStarts, _unicodeValues
625
626
627def intersectUnicodeRanges(unicodes, inverse=False):
628    """Intersect a sequence of (int) Unicode codepoints with the Unicode block
629    ranges defined in the OpenType specification v1.7, and return the set of
630    'ulUnicodeRanges' bits for which there is at least ONE intersection.
631    If 'inverse' is True, return the the bits for which there is NO intersection.
632
633    >>> intersectUnicodeRanges([0x0410]) == {9}
634    True
635    >>> intersectUnicodeRanges([0x0410, 0x1F000]) == {9, 57, 122}
636    True
637    >>> intersectUnicodeRanges([0x0410, 0x1F000], inverse=True) == (
638    ...     set(range(len(OS2_UNICODE_RANGES))) - {9, 57, 122})
639    True
640    """
641    unicodes = set(unicodes)
642    unicodestarts, unicodevalues = _getUnicodeRanges()
643    bits = set()
644    for code in unicodes:
645        stop, bit = unicodevalues[bisect.bisect(unicodestarts, code)]
646        if code <= stop:
647            bits.add(bit)
648    # The spec says that bit 57 ("Non Plane 0") implies that there's
649    # at least one codepoint beyond the BMP; so I also include all
650    # the non-BMP codepoints here
651    if any(0x10000 <= code < 0x110000 for code in unicodes):
652        bits.add(57)
653    return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits
654
655
656def calcCodePageRanges(unicodes):
657    """Given a set of Unicode codepoints (integers), calculate the
658    corresponding OS/2 CodePage range bits.
659    This is a direct translation of FontForge implementation:
660    https://github.com/fontforge/fontforge/blob/7b2c074/fontforge/tottf.c#L3158
661    """
662    bits = set()
663    hasAscii = set(range(0x20, 0x7E)).issubset(unicodes)
664    hasLineart = ord("┤") in unicodes
665
666    for uni in unicodes:
667        if uni == ord("Þ") and hasAscii:
668            bits.add(0)  # Latin 1
669        elif uni == ord("Ľ") and hasAscii:
670            bits.add(1)  # Latin 2: Eastern Europe
671            if hasLineart:
672                bits.add(58)  # Latin 2
673        elif uni == ord("Б"):
674            bits.add(2)  # Cyrillic
675            if ord("Ѕ") in unicodes and hasLineart:
676                bits.add(57)  # IBM Cyrillic
677            if ord("╜") in unicodes and hasLineart:
678                bits.add(49)  # MS-DOS Russian
679        elif uni == ord("Ά"):
680            bits.add(3)  # Greek
681            if hasLineart and ord("½") in unicodes:
682                bits.add(48)  # IBM Greek
683            if hasLineart and ord("√") in unicodes:
684                bits.add(60)  # Greek, former 437 G
685        elif uni == ord("İ") and hasAscii:
686            bits.add(4)  # Turkish
687            if hasLineart:
688                bits.add(56)  # IBM turkish
689        elif uni == ord("א"):
690            bits.add(5)  # Hebrew
691            if hasLineart and ord("√") in unicodes:
692                bits.add(53)  # Hebrew
693        elif uni == ord("ر"):
694            bits.add(6)  # Arabic
695            if ord("√") in unicodes:
696                bits.add(51)  # Arabic
697            if hasLineart:
698                bits.add(61)  # Arabic; ASMO 708
699        elif uni == ord("ŗ") and hasAscii:
700            bits.add(7)  # Windows Baltic
701            if hasLineart:
702                bits.add(59)  # MS-DOS Baltic
703        elif uni == ord("₫") and hasAscii:
704            bits.add(8)  # Vietnamese
705        elif uni == ord("ๅ"):
706            bits.add(16)  # Thai
707        elif uni == ord("エ"):
708            bits.add(17)  # JIS/Japan
709        elif uni == ord("ㄅ"):
710            bits.add(18)  # Chinese: Simplified
711        elif uni == ord("ㄱ"):
712            bits.add(19)  # Korean wansung
713        elif uni == ord("央"):
714            bits.add(20)  # Chinese: Traditional
715        elif uni == ord("곴"):
716            bits.add(21)  # Korean Johab
717        elif uni == ord("♥") and hasAscii:
718            bits.add(30)  # OEM Character Set
719        # TODO: Symbol bit has a special meaning (check the spec), we need
720        # to confirm if this is wanted by default.
721        # elif chr(0xF000) <= char <= chr(0xF0FF):
722        #    codepageRanges.add(31)          # Symbol Character Set
723        elif uni == ord("þ") and hasAscii and hasLineart:
724            bits.add(54)  # MS-DOS Icelandic
725        elif uni == ord("╚") and hasAscii:
726            bits.add(62)  # WE/Latin 1
727            bits.add(63)  # US
728        elif hasAscii and hasLineart and ord("√") in unicodes:
729            if uni == ord("Å"):
730                bits.add(50)  # MS-DOS Nordic
731            elif uni == ord("é"):
732                bits.add(52)  # MS-DOS Canadian French
733            elif uni == ord("õ"):
734                bits.add(55)  # MS-DOS Portuguese
735
736    if hasAscii and ord("‰") in unicodes and ord("∑") in unicodes:
737        bits.add(29)  # Macintosh Character Set (US Roman)
738
739    return bits
740
741
742if __name__ == "__main__":
743    import doctest, sys
744
745    sys.exit(doctest.testmod().failed)
746