1from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr 2from fontTools.misc import eexec 3from .psOperators import ( 4 PSOperators, 5 ps_StandardEncoding, 6 ps_array, 7 ps_boolean, 8 ps_dict, 9 ps_integer, 10 ps_literal, 11 ps_mark, 12 ps_name, 13 ps_operator, 14 ps_procedure, 15 ps_procmark, 16 ps_real, 17 ps_string, 18) 19import re 20from collections.abc import Callable 21from string import whitespace 22import logging 23 24 25log = logging.getLogger(__name__) 26 27ps_special = b"()<>[]{}%" # / is one too, but we take care of that one differently 28 29skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"])) 30endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"]) 31endofthingRE = re.compile(endofthingPat) 32commentRE = re.compile(b"%[^\n\r]*") 33 34# XXX This not entirely correct as it doesn't allow *nested* embedded parens: 35stringPat = rb""" 36 \( 37 ( 38 ( 39 [^()]* \ [()] 40 ) 41 | 42 ( 43 [^()]* \( [^()]* \) 44 ) 45 )* 46 [^()]* 47 \) 48""" 49stringPat = b"".join(stringPat.split()) 50stringRE = re.compile(stringPat) 51 52hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"])) 53 54 55class PSTokenError(Exception): 56 pass 57 58 59class PSError(Exception): 60 pass 61 62 63class PSTokenizer(object): 64 def __init__(self, buf=b"", encoding="ascii"): 65 # Force self.buf to be a byte string 66 buf = tobytes(buf) 67 self.buf = buf 68 self.len = len(buf) 69 self.pos = 0 70 self.closed = False 71 self.encoding = encoding 72 73 def read(self, n=-1): 74 """Read at most 'n' bytes from the buffer, or less if the read 75 hits EOF before obtaining 'n' bytes. 76 If 'n' is negative or omitted, read all data until EOF is reached. 77 """ 78 if self.closed: 79 raise ValueError("I/O operation on closed file") 80 if n is None or n < 0: 81 newpos = self.len 82 else: 83 newpos = min(self.pos + n, self.len) 84 r = self.buf[self.pos : newpos] 85 self.pos = newpos 86 return r 87 88 def close(self): 89 if not self.closed: 90 self.closed = True 91 del self.buf, self.pos 92 93 def getnexttoken( 94 self, 95 # localize some stuff, for performance 96 len=len, 97 ps_special=ps_special, 98 stringmatch=stringRE.match, 99 hexstringmatch=hexstringRE.match, 100 commentmatch=commentRE.match, 101 endmatch=endofthingRE.match, 102 ): 103 self.skipwhite() 104 if self.pos >= self.len: 105 return None, None 106 pos = self.pos 107 buf = self.buf 108 char = bytechr(byteord(buf[pos])) 109 if char in ps_special: 110 if char in b"{}[]": 111 tokentype = "do_special" 112 token = char 113 elif char == b"%": 114 tokentype = "do_comment" 115 _, nextpos = commentmatch(buf, pos).span() 116 token = buf[pos:nextpos] 117 elif char == b"(": 118 tokentype = "do_string" 119 m = stringmatch(buf, pos) 120 if m is None: 121 raise PSTokenError("bad string at character %d" % pos) 122 _, nextpos = m.span() 123 token = buf[pos:nextpos] 124 elif char == b"<": 125 tokentype = "do_hexstring" 126 m = hexstringmatch(buf, pos) 127 if m is None: 128 raise PSTokenError("bad hexstring at character %d" % pos) 129 _, nextpos = m.span() 130 token = buf[pos:nextpos] 131 else: 132 raise PSTokenError("bad token at character %d" % pos) 133 else: 134 if char == b"/": 135 tokentype = "do_literal" 136 m = endmatch(buf, pos + 1) 137 else: 138 tokentype = "" 139 m = endmatch(buf, pos) 140 if m is None: 141 raise PSTokenError("bad token at character %d" % pos) 142 _, nextpos = m.span() 143 token = buf[pos:nextpos] 144 self.pos = pos + len(token) 145 token = tostr(token, encoding=self.encoding) 146 return tokentype, token 147 148 def skipwhite(self, whitematch=skipwhiteRE.match): 149 _, nextpos = whitematch(self.buf, self.pos).span() 150 self.pos = nextpos 151 152 def starteexec(self): 153 self.pos = self.pos + 1 154 self.dirtybuf = self.buf[self.pos :] 155 self.buf, R = eexec.decrypt(self.dirtybuf, 55665) 156 self.len = len(self.buf) 157 self.pos = 4 158 159 def stopeexec(self): 160 if not hasattr(self, "dirtybuf"): 161 return 162 self.buf = self.dirtybuf 163 del self.dirtybuf 164 165 166class PSInterpreter(PSOperators): 167 def __init__(self, encoding="ascii"): 168 systemdict = {} 169 userdict = {} 170 self.encoding = encoding 171 self.dictstack = [systemdict, userdict] 172 self.stack = [] 173 self.proclevel = 0 174 self.procmark = ps_procmark() 175 self.fillsystemdict() 176 177 def fillsystemdict(self): 178 systemdict = self.dictstack[0] 179 systemdict["["] = systemdict["mark"] = self.mark = ps_mark() 180 systemdict["]"] = ps_operator("]", self.do_makearray) 181 systemdict["true"] = ps_boolean(1) 182 systemdict["false"] = ps_boolean(0) 183 systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding) 184 systemdict["FontDirectory"] = ps_dict({}) 185 self.suckoperators(systemdict, self.__class__) 186 187 def suckoperators(self, systemdict, klass): 188 for name in dir(klass): 189 attr = getattr(self, name) 190 if isinstance(attr, Callable) and name[:3] == "ps_": 191 name = name[3:] 192 systemdict[name] = ps_operator(name, attr) 193 for baseclass in klass.__bases__: 194 self.suckoperators(systemdict, baseclass) 195 196 def interpret(self, data, getattr=getattr): 197 tokenizer = self.tokenizer = PSTokenizer(data, self.encoding) 198 getnexttoken = tokenizer.getnexttoken 199 do_token = self.do_token 200 handle_object = self.handle_object 201 try: 202 while 1: 203 tokentype, token = getnexttoken() 204 if not token: 205 break 206 if tokentype: 207 handler = getattr(self, tokentype) 208 object = handler(token) 209 else: 210 object = do_token(token) 211 if object is not None: 212 handle_object(object) 213 tokenizer.close() 214 self.tokenizer = None 215 except: 216 if self.tokenizer is not None: 217 log.debug( 218 "ps error:\n" 219 "- - - - - - -\n" 220 "%s\n" 221 ">>>\n" 222 "%s\n" 223 "- - - - - - -", 224 self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos], 225 self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50], 226 ) 227 raise 228 229 def handle_object(self, object): 230 if not (self.proclevel or object.literal or object.type == "proceduretype"): 231 if object.type != "operatortype": 232 object = self.resolve_name(object.value) 233 if object.literal: 234 self.push(object) 235 else: 236 if object.type == "proceduretype": 237 self.call_procedure(object) 238 else: 239 object.function() 240 else: 241 self.push(object) 242 243 def call_procedure(self, proc): 244 handle_object = self.handle_object 245 for item in proc.value: 246 handle_object(item) 247 248 def resolve_name(self, name): 249 dictstack = self.dictstack 250 for i in range(len(dictstack) - 1, -1, -1): 251 if name in dictstack[i]: 252 return dictstack[i][name] 253 raise PSError("name error: " + str(name)) 254 255 def do_token( 256 self, 257 token, 258 int=int, 259 float=float, 260 ps_name=ps_name, 261 ps_integer=ps_integer, 262 ps_real=ps_real, 263 ): 264 try: 265 num = int(token) 266 except (ValueError, OverflowError): 267 try: 268 num = float(token) 269 except (ValueError, OverflowError): 270 if "#" in token: 271 hashpos = token.find("#") 272 try: 273 base = int(token[:hashpos]) 274 num = int(token[hashpos + 1 :], base) 275 except (ValueError, OverflowError): 276 return ps_name(token) 277 else: 278 return ps_integer(num) 279 else: 280 return ps_name(token) 281 else: 282 return ps_real(num) 283 else: 284 return ps_integer(num) 285 286 def do_comment(self, token): 287 pass 288 289 def do_literal(self, token): 290 return ps_literal(token[1:]) 291 292 def do_string(self, token): 293 return ps_string(token[1:-1]) 294 295 def do_hexstring(self, token): 296 hexStr = "".join(token[1:-1].split()) 297 if len(hexStr) % 2: 298 hexStr = hexStr + "0" 299 cleanstr = [] 300 for i in range(0, len(hexStr), 2): 301 cleanstr.append(chr(int(hexStr[i : i + 2], 16))) 302 cleanstr = "".join(cleanstr) 303 return ps_string(cleanstr) 304 305 def do_special(self, token): 306 if token == "{": 307 self.proclevel = self.proclevel + 1 308 return self.procmark 309 elif token == "}": 310 proc = [] 311 while 1: 312 topobject = self.pop() 313 if topobject == self.procmark: 314 break 315 proc.append(topobject) 316 self.proclevel = self.proclevel - 1 317 proc.reverse() 318 return ps_procedure(proc) 319 elif token == "[": 320 return self.mark 321 elif token == "]": 322 return ps_name("]") 323 else: 324 raise PSTokenError("huh?") 325 326 def push(self, object): 327 self.stack.append(object) 328 329 def pop(self, *types): 330 stack = self.stack 331 if not stack: 332 raise PSError("stack underflow") 333 object = stack[-1] 334 if types: 335 if object.type not in types: 336 raise PSError( 337 "typecheck, expected %s, found %s" % (repr(types), object.type) 338 ) 339 del stack[-1] 340 return object 341 342 def do_makearray(self): 343 array = [] 344 while 1: 345 topobject = self.pop() 346 if topobject == self.mark: 347 break 348 array.append(topobject) 349 array.reverse() 350 self.push(ps_array(array)) 351 352 def close(self): 353 """Remove circular references.""" 354 del self.stack 355 del self.dictstack 356 357 358def unpack_item(item): 359 tp = type(item.value) 360 if tp == dict: 361 newitem = {} 362 for key, value in item.value.items(): 363 newitem[key] = unpack_item(value) 364 elif tp == list: 365 newitem = [None] * len(item.value) 366 for i in range(len(item.value)): 367 newitem[i] = unpack_item(item.value[i]) 368 if item.type == "proceduretype": 369 newitem = tuple(newitem) 370 else: 371 newitem = item.value 372 return newitem 373 374 375def suckfont(data, encoding="ascii"): 376 m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data) 377 if m: 378 fontName = m.group(1) 379 fontName = fontName.decode() 380 else: 381 fontName = None 382 interpreter = PSInterpreter(encoding=encoding) 383 interpreter.interpret( 384 b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop" 385 ) 386 interpreter.interpret(data) 387 fontdir = interpreter.dictstack[0]["FontDirectory"].value 388 if fontName in fontdir: 389 rawfont = fontdir[fontName] 390 else: 391 # fall back, in case fontName wasn't found 392 fontNames = list(fontdir.keys()) 393 if len(fontNames) > 1: 394 fontNames.remove("Helvetica") 395 fontNames.sort() 396 rawfont = fontdir[fontNames[0]] 397 interpreter.close() 398 return unpack_item(rawfont) 399