xref: /aosp_15_r20/external/fonttools/Lib/fontTools/misc/psLib.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr
2from fontTools.misc import eexec
3from .psOperators import (
4    PSOperators,
5    ps_StandardEncoding,
6    ps_array,
7    ps_boolean,
8    ps_dict,
9    ps_integer,
10    ps_literal,
11    ps_mark,
12    ps_name,
13    ps_operator,
14    ps_procedure,
15    ps_procmark,
16    ps_real,
17    ps_string,
18)
19import re
20from collections.abc import Callable
21from string import whitespace
22import logging
23
24
25log = logging.getLogger(__name__)
26
27ps_special = b"()<>[]{}%"  # / is one too, but we take care of that one differently
28
29skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"]))
30endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"])
31endofthingRE = re.compile(endofthingPat)
32commentRE = re.compile(b"%[^\n\r]*")
33
34# XXX This not entirely correct as it doesn't allow *nested* embedded parens:
35stringPat = rb"""
36	\(
37		(
38			(
39				[^()]*   \   [()]
40			)
41			|
42			(
43				[^()]*  \(   [^()]*  \)
44			)
45		)*
46		[^()]*
47	\)
48"""
49stringPat = b"".join(stringPat.split())
50stringRE = re.compile(stringPat)
51
52hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"]))
53
54
55class PSTokenError(Exception):
56    pass
57
58
59class PSError(Exception):
60    pass
61
62
63class PSTokenizer(object):
64    def __init__(self, buf=b"", encoding="ascii"):
65        # Force self.buf to be a byte string
66        buf = tobytes(buf)
67        self.buf = buf
68        self.len = len(buf)
69        self.pos = 0
70        self.closed = False
71        self.encoding = encoding
72
73    def read(self, n=-1):
74        """Read at most 'n' bytes from the buffer, or less if the read
75        hits EOF before obtaining 'n' bytes.
76        If 'n' is negative or omitted, read all data until EOF is reached.
77        """
78        if self.closed:
79            raise ValueError("I/O operation on closed file")
80        if n is None or n < 0:
81            newpos = self.len
82        else:
83            newpos = min(self.pos + n, self.len)
84        r = self.buf[self.pos : newpos]
85        self.pos = newpos
86        return r
87
88    def close(self):
89        if not self.closed:
90            self.closed = True
91            del self.buf, self.pos
92
93    def getnexttoken(
94        self,
95        # localize some stuff, for performance
96        len=len,
97        ps_special=ps_special,
98        stringmatch=stringRE.match,
99        hexstringmatch=hexstringRE.match,
100        commentmatch=commentRE.match,
101        endmatch=endofthingRE.match,
102    ):
103        self.skipwhite()
104        if self.pos >= self.len:
105            return None, None
106        pos = self.pos
107        buf = self.buf
108        char = bytechr(byteord(buf[pos]))
109        if char in ps_special:
110            if char in b"{}[]":
111                tokentype = "do_special"
112                token = char
113            elif char == b"%":
114                tokentype = "do_comment"
115                _, nextpos = commentmatch(buf, pos).span()
116                token = buf[pos:nextpos]
117            elif char == b"(":
118                tokentype = "do_string"
119                m = stringmatch(buf, pos)
120                if m is None:
121                    raise PSTokenError("bad string at character %d" % pos)
122                _, nextpos = m.span()
123                token = buf[pos:nextpos]
124            elif char == b"<":
125                tokentype = "do_hexstring"
126                m = hexstringmatch(buf, pos)
127                if m is None:
128                    raise PSTokenError("bad hexstring at character %d" % pos)
129                _, nextpos = m.span()
130                token = buf[pos:nextpos]
131            else:
132                raise PSTokenError("bad token at character %d" % pos)
133        else:
134            if char == b"/":
135                tokentype = "do_literal"
136                m = endmatch(buf, pos + 1)
137            else:
138                tokentype = ""
139                m = endmatch(buf, pos)
140            if m is None:
141                raise PSTokenError("bad token at character %d" % pos)
142            _, nextpos = m.span()
143            token = buf[pos:nextpos]
144        self.pos = pos + len(token)
145        token = tostr(token, encoding=self.encoding)
146        return tokentype, token
147
148    def skipwhite(self, whitematch=skipwhiteRE.match):
149        _, nextpos = whitematch(self.buf, self.pos).span()
150        self.pos = nextpos
151
152    def starteexec(self):
153        self.pos = self.pos + 1
154        self.dirtybuf = self.buf[self.pos :]
155        self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
156        self.len = len(self.buf)
157        self.pos = 4
158
159    def stopeexec(self):
160        if not hasattr(self, "dirtybuf"):
161            return
162        self.buf = self.dirtybuf
163        del self.dirtybuf
164
165
166class PSInterpreter(PSOperators):
167    def __init__(self, encoding="ascii"):
168        systemdict = {}
169        userdict = {}
170        self.encoding = encoding
171        self.dictstack = [systemdict, userdict]
172        self.stack = []
173        self.proclevel = 0
174        self.procmark = ps_procmark()
175        self.fillsystemdict()
176
177    def fillsystemdict(self):
178        systemdict = self.dictstack[0]
179        systemdict["["] = systemdict["mark"] = self.mark = ps_mark()
180        systemdict["]"] = ps_operator("]", self.do_makearray)
181        systemdict["true"] = ps_boolean(1)
182        systemdict["false"] = ps_boolean(0)
183        systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding)
184        systemdict["FontDirectory"] = ps_dict({})
185        self.suckoperators(systemdict, self.__class__)
186
187    def suckoperators(self, systemdict, klass):
188        for name in dir(klass):
189            attr = getattr(self, name)
190            if isinstance(attr, Callable) and name[:3] == "ps_":
191                name = name[3:]
192                systemdict[name] = ps_operator(name, attr)
193        for baseclass in klass.__bases__:
194            self.suckoperators(systemdict, baseclass)
195
196    def interpret(self, data, getattr=getattr):
197        tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
198        getnexttoken = tokenizer.getnexttoken
199        do_token = self.do_token
200        handle_object = self.handle_object
201        try:
202            while 1:
203                tokentype, token = getnexttoken()
204                if not token:
205                    break
206                if tokentype:
207                    handler = getattr(self, tokentype)
208                    object = handler(token)
209                else:
210                    object = do_token(token)
211                if object is not None:
212                    handle_object(object)
213            tokenizer.close()
214            self.tokenizer = None
215        except:
216            if self.tokenizer is not None:
217                log.debug(
218                    "ps error:\n"
219                    "- - - - - - -\n"
220                    "%s\n"
221                    ">>>\n"
222                    "%s\n"
223                    "- - - - - - -",
224                    self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos],
225                    self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50],
226                )
227            raise
228
229    def handle_object(self, object):
230        if not (self.proclevel or object.literal or object.type == "proceduretype"):
231            if object.type != "operatortype":
232                object = self.resolve_name(object.value)
233            if object.literal:
234                self.push(object)
235            else:
236                if object.type == "proceduretype":
237                    self.call_procedure(object)
238                else:
239                    object.function()
240        else:
241            self.push(object)
242
243    def call_procedure(self, proc):
244        handle_object = self.handle_object
245        for item in proc.value:
246            handle_object(item)
247
248    def resolve_name(self, name):
249        dictstack = self.dictstack
250        for i in range(len(dictstack) - 1, -1, -1):
251            if name in dictstack[i]:
252                return dictstack[i][name]
253        raise PSError("name error: " + str(name))
254
255    def do_token(
256        self,
257        token,
258        int=int,
259        float=float,
260        ps_name=ps_name,
261        ps_integer=ps_integer,
262        ps_real=ps_real,
263    ):
264        try:
265            num = int(token)
266        except (ValueError, OverflowError):
267            try:
268                num = float(token)
269            except (ValueError, OverflowError):
270                if "#" in token:
271                    hashpos = token.find("#")
272                    try:
273                        base = int(token[:hashpos])
274                        num = int(token[hashpos + 1 :], base)
275                    except (ValueError, OverflowError):
276                        return ps_name(token)
277                    else:
278                        return ps_integer(num)
279                else:
280                    return ps_name(token)
281            else:
282                return ps_real(num)
283        else:
284            return ps_integer(num)
285
286    def do_comment(self, token):
287        pass
288
289    def do_literal(self, token):
290        return ps_literal(token[1:])
291
292    def do_string(self, token):
293        return ps_string(token[1:-1])
294
295    def do_hexstring(self, token):
296        hexStr = "".join(token[1:-1].split())
297        if len(hexStr) % 2:
298            hexStr = hexStr + "0"
299        cleanstr = []
300        for i in range(0, len(hexStr), 2):
301            cleanstr.append(chr(int(hexStr[i : i + 2], 16)))
302        cleanstr = "".join(cleanstr)
303        return ps_string(cleanstr)
304
305    def do_special(self, token):
306        if token == "{":
307            self.proclevel = self.proclevel + 1
308            return self.procmark
309        elif token == "}":
310            proc = []
311            while 1:
312                topobject = self.pop()
313                if topobject == self.procmark:
314                    break
315                proc.append(topobject)
316            self.proclevel = self.proclevel - 1
317            proc.reverse()
318            return ps_procedure(proc)
319        elif token == "[":
320            return self.mark
321        elif token == "]":
322            return ps_name("]")
323        else:
324            raise PSTokenError("huh?")
325
326    def push(self, object):
327        self.stack.append(object)
328
329    def pop(self, *types):
330        stack = self.stack
331        if not stack:
332            raise PSError("stack underflow")
333        object = stack[-1]
334        if types:
335            if object.type not in types:
336                raise PSError(
337                    "typecheck, expected %s, found %s" % (repr(types), object.type)
338                )
339        del stack[-1]
340        return object
341
342    def do_makearray(self):
343        array = []
344        while 1:
345            topobject = self.pop()
346            if topobject == self.mark:
347                break
348            array.append(topobject)
349        array.reverse()
350        self.push(ps_array(array))
351
352    def close(self):
353        """Remove circular references."""
354        del self.stack
355        del self.dictstack
356
357
358def unpack_item(item):
359    tp = type(item.value)
360    if tp == dict:
361        newitem = {}
362        for key, value in item.value.items():
363            newitem[key] = unpack_item(value)
364    elif tp == list:
365        newitem = [None] * len(item.value)
366        for i in range(len(item.value)):
367            newitem[i] = unpack_item(item.value[i])
368        if item.type == "proceduretype":
369            newitem = tuple(newitem)
370    else:
371        newitem = item.value
372    return newitem
373
374
375def suckfont(data, encoding="ascii"):
376    m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data)
377    if m:
378        fontName = m.group(1)
379        fontName = fontName.decode()
380    else:
381        fontName = None
382    interpreter = PSInterpreter(encoding=encoding)
383    interpreter.interpret(
384        b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop"
385    )
386    interpreter.interpret(data)
387    fontdir = interpreter.dictstack[0]["FontDirectory"].value
388    if fontName in fontdir:
389        rawfont = fontdir[fontName]
390    else:
391        # fall back, in case fontName wasn't found
392        fontNames = list(fontdir.keys())
393        if len(fontNames) > 1:
394            fontNames.remove("Helvetica")
395        fontNames.sort()
396        rawfont = fontdir[fontNames[0]]
397    interpreter.close()
398    return unpack_item(rawfont)
399