1import collections.abc 2import re 3from typing import ( 4 Any, 5 Callable, 6 Dict, 7 List, 8 Mapping, 9 MutableMapping, 10 Optional, 11 Sequence, 12 Type, 13 Union, 14 IO, 15) 16import warnings 17from io import BytesIO 18from datetime import datetime 19from base64 import b64encode, b64decode 20from numbers import Integral 21from types import SimpleNamespace 22from functools import singledispatch 23 24from fontTools.misc import etree 25 26from fontTools.misc.textTools import tostr 27 28 29# By default, we 30# - deserialize <data> elements as bytes and 31# - serialize bytes as <data> elements. 32# Before, on Python 2, we 33# - deserialized <data> elements as plistlib.Data objects, in order to 34# distinguish them from the built-in str type (which is bytes on python2) 35# - serialized bytes as <string> elements (they must have only contained 36# ASCII characters in this case) 37# You can pass use_builtin_types=[True|False] to the load/dump etc. functions 38# to enforce a specific treatment. 39# NOTE that unicode type always maps to <string> element, and plistlib.Data 40# always maps to <data> element, regardless of use_builtin_types. 41USE_BUILTIN_TYPES = True 42 43XML_DECLARATION = b"""<?xml version='1.0' encoding='UTF-8'?>""" 44 45PLIST_DOCTYPE = ( 46 b'<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" ' 47 b'"http://www.apple.com/DTDs/PropertyList-1.0.dtd">' 48) 49 50 51# Date should conform to a subset of ISO 8601: 52# YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z' 53_date_parser = re.compile( 54 r"(?P<year>\d\d\d\d)" 55 r"(?:-(?P<month>\d\d)" 56 r"(?:-(?P<day>\d\d)" 57 r"(?:T(?P<hour>\d\d)" 58 r"(?::(?P<minute>\d\d)" 59 r"(?::(?P<second>\d\d))" 60 r"?)?)?)?)?Z", 61 re.ASCII, 62) 63 64 65def _date_from_string(s: str) -> datetime: 66 order = ("year", "month", "day", "hour", "minute", "second") 67 m = _date_parser.match(s) 68 if m is None: 69 raise ValueError(f"Expected ISO 8601 date string, but got '{s:r}'.") 70 gd = m.groupdict() 71 lst = [] 72 for key in order: 73 val = gd[key] 74 if val is None: 75 break 76 lst.append(int(val)) 77 # NOTE: mypy doesn't know that lst is 6 elements long. 78 return datetime(*lst) # type:ignore 79 80 81def _date_to_string(d: datetime) -> str: 82 return "%04d-%02d-%02dT%02d:%02d:%02dZ" % ( 83 d.year, 84 d.month, 85 d.day, 86 d.hour, 87 d.minute, 88 d.second, 89 ) 90 91 92class Data: 93 """Represents binary data when ``use_builtin_types=False.`` 94 95 This class wraps binary data loaded from a plist file when the 96 ``use_builtin_types`` argument to the loading function (:py:func:`fromtree`, 97 :py:func:`load`, :py:func:`loads`) is false. 98 99 The actual binary data is retrieved using the ``data`` attribute. 100 """ 101 102 def __init__(self, data: bytes) -> None: 103 if not isinstance(data, bytes): 104 raise TypeError("Expected bytes, found %s" % type(data).__name__) 105 self.data = data 106 107 @classmethod 108 def fromBase64(cls, data: Union[bytes, str]) -> "Data": 109 return cls(b64decode(data)) 110 111 def asBase64(self, maxlinelength: int = 76, indent_level: int = 1) -> bytes: 112 return _encode_base64( 113 self.data, maxlinelength=maxlinelength, indent_level=indent_level 114 ) 115 116 def __eq__(self, other: Any) -> bool: 117 if isinstance(other, self.__class__): 118 return self.data == other.data 119 elif isinstance(other, bytes): 120 return self.data == other 121 else: 122 return NotImplemented 123 124 def __repr__(self) -> str: 125 return "%s(%s)" % (self.__class__.__name__, repr(self.data)) 126 127 128def _encode_base64( 129 data: bytes, maxlinelength: Optional[int] = 76, indent_level: int = 1 130) -> bytes: 131 data = b64encode(data) 132 if data and maxlinelength: 133 # split into multiple lines right-justified to 'maxlinelength' chars 134 indent = b"\n" + b" " * indent_level 135 max_length = max(16, maxlinelength - len(indent)) 136 chunks = [] 137 for i in range(0, len(data), max_length): 138 chunks.append(indent) 139 chunks.append(data[i : i + max_length]) 140 chunks.append(indent) 141 data = b"".join(chunks) 142 return data 143 144 145# Mypy does not support recursive type aliases as of 0.782, Pylance does. 146# https://github.com/python/mypy/issues/731 147# https://devblogs.microsoft.com/python/pylance-introduces-five-new-features-that-enable-type-magic-for-python-developers/#1-support-for-recursive-type-aliases 148PlistEncodable = Union[ 149 bool, 150 bytes, 151 Data, 152 datetime, 153 float, 154 Integral, 155 Mapping[str, Any], 156 Sequence[Any], 157 str, 158] 159 160 161class PlistTarget: 162 """Event handler using the ElementTree Target API that can be 163 passed to a XMLParser to produce property list objects from XML. 164 It is based on the CPython plistlib module's _PlistParser class, 165 but does not use the expat parser. 166 167 >>> from fontTools.misc import etree 168 >>> parser = etree.XMLParser(target=PlistTarget()) 169 >>> result = etree.XML( 170 ... "<dict>" 171 ... " <key>something</key>" 172 ... " <string>blah</string>" 173 ... "</dict>", 174 ... parser=parser) 175 >>> result == {"something": "blah"} 176 True 177 178 Links: 179 https://github.com/python/cpython/blob/main/Lib/plistlib.py 180 http://lxml.de/parsing.html#the-target-parser-interface 181 """ 182 183 def __init__( 184 self, 185 use_builtin_types: Optional[bool] = None, 186 dict_type: Type[MutableMapping[str, Any]] = dict, 187 ) -> None: 188 self.stack: List[PlistEncodable] = [] 189 self.current_key: Optional[str] = None 190 self.root: Optional[PlistEncodable] = None 191 if use_builtin_types is None: 192 self._use_builtin_types = USE_BUILTIN_TYPES 193 else: 194 if use_builtin_types is False: 195 warnings.warn( 196 "Setting use_builtin_types to False is deprecated and will be " 197 "removed soon.", 198 DeprecationWarning, 199 ) 200 self._use_builtin_types = use_builtin_types 201 self._dict_type = dict_type 202 203 def start(self, tag: str, attrib: Mapping[str, str]) -> None: 204 self._data: List[str] = [] 205 handler = _TARGET_START_HANDLERS.get(tag) 206 if handler is not None: 207 handler(self) 208 209 def end(self, tag: str) -> None: 210 handler = _TARGET_END_HANDLERS.get(tag) 211 if handler is not None: 212 handler(self) 213 214 def data(self, data: str) -> None: 215 self._data.append(data) 216 217 def close(self) -> PlistEncodable: 218 if self.root is None: 219 raise ValueError("No root set.") 220 return self.root 221 222 # helpers 223 224 def add_object(self, value: PlistEncodable) -> None: 225 if self.current_key is not None: 226 stack_top = self.stack[-1] 227 if not isinstance(stack_top, collections.abc.MutableMapping): 228 raise ValueError("unexpected element: %r" % stack_top) 229 stack_top[self.current_key] = value 230 self.current_key = None 231 elif not self.stack: 232 # this is the root object 233 self.root = value 234 else: 235 stack_top = self.stack[-1] 236 if not isinstance(stack_top, list): 237 raise ValueError("unexpected element: %r" % stack_top) 238 stack_top.append(value) 239 240 def get_data(self) -> str: 241 data = "".join(self._data) 242 self._data = [] 243 return data 244 245 246# event handlers 247 248 249def start_dict(self: PlistTarget) -> None: 250 d = self._dict_type() 251 self.add_object(d) 252 self.stack.append(d) 253 254 255def end_dict(self: PlistTarget) -> None: 256 if self.current_key: 257 raise ValueError("missing value for key '%s'" % self.current_key) 258 self.stack.pop() 259 260 261def end_key(self: PlistTarget) -> None: 262 if self.current_key or not isinstance(self.stack[-1], collections.abc.Mapping): 263 raise ValueError("unexpected key") 264 self.current_key = self.get_data() 265 266 267def start_array(self: PlistTarget) -> None: 268 a: List[PlistEncodable] = [] 269 self.add_object(a) 270 self.stack.append(a) 271 272 273def end_array(self: PlistTarget) -> None: 274 self.stack.pop() 275 276 277def end_true(self: PlistTarget) -> None: 278 self.add_object(True) 279 280 281def end_false(self: PlistTarget) -> None: 282 self.add_object(False) 283 284 285def end_integer(self: PlistTarget) -> None: 286 self.add_object(int(self.get_data())) 287 288 289def end_real(self: PlistTarget) -> None: 290 self.add_object(float(self.get_data())) 291 292 293def end_string(self: PlistTarget) -> None: 294 self.add_object(self.get_data()) 295 296 297def end_data(self: PlistTarget) -> None: 298 if self._use_builtin_types: 299 self.add_object(b64decode(self.get_data())) 300 else: 301 self.add_object(Data.fromBase64(self.get_data())) 302 303 304def end_date(self: PlistTarget) -> None: 305 self.add_object(_date_from_string(self.get_data())) 306 307 308_TARGET_START_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = { 309 "dict": start_dict, 310 "array": start_array, 311} 312 313_TARGET_END_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = { 314 "dict": end_dict, 315 "array": end_array, 316 "key": end_key, 317 "true": end_true, 318 "false": end_false, 319 "integer": end_integer, 320 "real": end_real, 321 "string": end_string, 322 "data": end_data, 323 "date": end_date, 324} 325 326 327# functions to build element tree from plist data 328 329 330def _string_element(value: str, ctx: SimpleNamespace) -> etree.Element: 331 el = etree.Element("string") 332 el.text = value 333 return el 334 335 336def _bool_element(value: bool, ctx: SimpleNamespace) -> etree.Element: 337 if value: 338 return etree.Element("true") 339 return etree.Element("false") 340 341 342def _integer_element(value: int, ctx: SimpleNamespace) -> etree.Element: 343 if -1 << 63 <= value < 1 << 64: 344 el = etree.Element("integer") 345 el.text = "%d" % value 346 return el 347 raise OverflowError(value) 348 349 350def _real_element(value: float, ctx: SimpleNamespace) -> etree.Element: 351 el = etree.Element("real") 352 el.text = repr(value) 353 return el 354 355 356def _dict_element( 357 d: Mapping[str, PlistEncodable], ctx: SimpleNamespace 358) -> etree.Element: 359 el = etree.Element("dict") 360 items = d.items() 361 if ctx.sort_keys: 362 items = sorted(items) # type: ignore 363 ctx.indent_level += 1 364 for key, value in items: 365 if not isinstance(key, str): 366 if ctx.skipkeys: 367 continue 368 raise TypeError("keys must be strings") 369 k = etree.SubElement(el, "key") 370 k.text = tostr(key, "utf-8") 371 el.append(_make_element(value, ctx)) 372 ctx.indent_level -= 1 373 return el 374 375 376def _array_element( 377 array: Sequence[PlistEncodable], ctx: SimpleNamespace 378) -> etree.Element: 379 el = etree.Element("array") 380 if len(array) == 0: 381 return el 382 ctx.indent_level += 1 383 for value in array: 384 el.append(_make_element(value, ctx)) 385 ctx.indent_level -= 1 386 return el 387 388 389def _date_element(date: datetime, ctx: SimpleNamespace) -> etree.Element: 390 el = etree.Element("date") 391 el.text = _date_to_string(date) 392 return el 393 394 395def _data_element(data: bytes, ctx: SimpleNamespace) -> etree.Element: 396 el = etree.Element("data") 397 # NOTE: mypy is confused about whether el.text should be str or bytes. 398 el.text = _encode_base64( # type: ignore 399 data, 400 maxlinelength=(76 if ctx.pretty_print else None), 401 indent_level=ctx.indent_level, 402 ) 403 return el 404 405 406def _string_or_data_element(raw_bytes: bytes, ctx: SimpleNamespace) -> etree.Element: 407 if ctx.use_builtin_types: 408 return _data_element(raw_bytes, ctx) 409 else: 410 try: 411 string = raw_bytes.decode(encoding="ascii", errors="strict") 412 except UnicodeDecodeError: 413 raise ValueError( 414 "invalid non-ASCII bytes; use unicode string instead: %r" % raw_bytes 415 ) 416 return _string_element(string, ctx) 417 418 419# The following is probably not entirely correct. The signature should take `Any` 420# and return `NoReturn`. At the time of this writing, neither mypy nor Pyright 421# can deal with singledispatch properly and will apply the signature of the base 422# function to all others. Being slightly dishonest makes it type-check and return 423# usable typing information for the optimistic case. 424@singledispatch 425def _make_element(value: PlistEncodable, ctx: SimpleNamespace) -> etree.Element: 426 raise TypeError("unsupported type: %s" % type(value)) 427 428 429_make_element.register(str)(_string_element) 430_make_element.register(bool)(_bool_element) 431_make_element.register(Integral)(_integer_element) 432_make_element.register(float)(_real_element) 433_make_element.register(collections.abc.Mapping)(_dict_element) 434_make_element.register(list)(_array_element) 435_make_element.register(tuple)(_array_element) 436_make_element.register(datetime)(_date_element) 437_make_element.register(bytes)(_string_or_data_element) 438_make_element.register(bytearray)(_data_element) 439_make_element.register(Data)(lambda v, ctx: _data_element(v.data, ctx)) 440 441 442# Public functions to create element tree from plist-compatible python 443# data structures and viceversa, for use when (de)serializing GLIF xml. 444 445 446def totree( 447 value: PlistEncodable, 448 sort_keys: bool = True, 449 skipkeys: bool = False, 450 use_builtin_types: Optional[bool] = None, 451 pretty_print: bool = True, 452 indent_level: int = 1, 453) -> etree.Element: 454 """Convert a value derived from a plist into an XML tree. 455 456 Args: 457 value: Any kind of value to be serialized to XML. 458 sort_keys: Whether keys of dictionaries should be sorted. 459 skipkeys (bool): Whether to silently skip non-string dictionary 460 keys. 461 use_builtin_types (bool): If true, byte strings will be 462 encoded in Base-64 and wrapped in a ``data`` tag; if 463 false, they will be either stored as ASCII strings or an 464 exception raised if they cannot be decoded as such. Defaults 465 to ``True`` if not present. Deprecated. 466 pretty_print (bool): Whether to indent the output. 467 indent_level (int): Level of indentation when serializing. 468 469 Returns: an ``etree`` ``Element`` object. 470 471 Raises: 472 ``TypeError`` 473 if non-string dictionary keys are serialized 474 and ``skipkeys`` is false. 475 ``ValueError`` 476 if non-ASCII binary data is present 477 and `use_builtin_types` is false. 478 """ 479 if use_builtin_types is None: 480 use_builtin_types = USE_BUILTIN_TYPES 481 else: 482 use_builtin_types = use_builtin_types 483 context = SimpleNamespace( 484 sort_keys=sort_keys, 485 skipkeys=skipkeys, 486 use_builtin_types=use_builtin_types, 487 pretty_print=pretty_print, 488 indent_level=indent_level, 489 ) 490 return _make_element(value, context) 491 492 493def fromtree( 494 tree: etree.Element, 495 use_builtin_types: Optional[bool] = None, 496 dict_type: Type[MutableMapping[str, Any]] = dict, 497) -> Any: 498 """Convert an XML tree to a plist structure. 499 500 Args: 501 tree: An ``etree`` ``Element``. 502 use_builtin_types: If True, binary data is deserialized to 503 bytes strings. If False, it is wrapped in :py:class:`Data` 504 objects. Defaults to True if not provided. Deprecated. 505 dict_type: What type to use for dictionaries. 506 507 Returns: An object (usually a dictionary). 508 """ 509 target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type) 510 for action, element in etree.iterwalk(tree, events=("start", "end")): 511 if action == "start": 512 target.start(element.tag, element.attrib) 513 elif action == "end": 514 # if there are no children, parse the leaf's data 515 if not len(element): 516 # always pass str, not None 517 target.data(element.text or "") 518 target.end(element.tag) 519 return target.close() 520 521 522# python3 plistlib API 523 524 525def load( 526 fp: IO[bytes], 527 use_builtin_types: Optional[bool] = None, 528 dict_type: Type[MutableMapping[str, Any]] = dict, 529) -> Any: 530 """Load a plist file into an object. 531 532 Args: 533 fp: An opened file. 534 use_builtin_types: If True, binary data is deserialized to 535 bytes strings. If False, it is wrapped in :py:class:`Data` 536 objects. Defaults to True if not provided. Deprecated. 537 dict_type: What type to use for dictionaries. 538 539 Returns: 540 An object (usually a dictionary) representing the top level of 541 the plist file. 542 """ 543 544 if not hasattr(fp, "read"): 545 raise AttributeError("'%s' object has no attribute 'read'" % type(fp).__name__) 546 target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type) 547 parser = etree.XMLParser(target=target) 548 result = etree.parse(fp, parser=parser) 549 # lxml returns the target object directly, while ElementTree wraps 550 # it as the root of an ElementTree object 551 try: 552 return result.getroot() 553 except AttributeError: 554 return result 555 556 557def loads( 558 value: bytes, 559 use_builtin_types: Optional[bool] = None, 560 dict_type: Type[MutableMapping[str, Any]] = dict, 561) -> Any: 562 """Load a plist file from a string into an object. 563 564 Args: 565 value: A bytes string containing a plist. 566 use_builtin_types: If True, binary data is deserialized to 567 bytes strings. If False, it is wrapped in :py:class:`Data` 568 objects. Defaults to True if not provided. Deprecated. 569 dict_type: What type to use for dictionaries. 570 571 Returns: 572 An object (usually a dictionary) representing the top level of 573 the plist file. 574 """ 575 576 fp = BytesIO(value) 577 return load(fp, use_builtin_types=use_builtin_types, dict_type=dict_type) 578 579 580def dump( 581 value: PlistEncodable, 582 fp: IO[bytes], 583 sort_keys: bool = True, 584 skipkeys: bool = False, 585 use_builtin_types: Optional[bool] = None, 586 pretty_print: bool = True, 587) -> None: 588 """Write a Python object to a plist file. 589 590 Args: 591 value: An object to write. 592 fp: A file opened for writing. 593 sort_keys (bool): Whether keys of dictionaries should be sorted. 594 skipkeys (bool): Whether to silently skip non-string dictionary 595 keys. 596 use_builtin_types (bool): If true, byte strings will be 597 encoded in Base-64 and wrapped in a ``data`` tag; if 598 false, they will be either stored as ASCII strings or an 599 exception raised if they cannot be represented. Defaults 600 pretty_print (bool): Whether to indent the output. 601 indent_level (int): Level of indentation when serializing. 602 603 Raises: 604 ``TypeError`` 605 if non-string dictionary keys are serialized 606 and ``skipkeys`` is false. 607 ``ValueError`` 608 if non-representable binary data is present 609 and `use_builtin_types` is false. 610 """ 611 612 if not hasattr(fp, "write"): 613 raise AttributeError("'%s' object has no attribute 'write'" % type(fp).__name__) 614 root = etree.Element("plist", version="1.0") 615 el = totree( 616 value, 617 sort_keys=sort_keys, 618 skipkeys=skipkeys, 619 use_builtin_types=use_builtin_types, 620 pretty_print=pretty_print, 621 ) 622 root.append(el) 623 tree = etree.ElementTree(root) 624 # we write the doctype ourselves instead of using the 'doctype' argument 625 # of 'write' method, becuse lxml will force adding a '\n' even when 626 # pretty_print is False. 627 if pretty_print: 628 header = b"\n".join((XML_DECLARATION, PLIST_DOCTYPE, b"")) 629 else: 630 header = XML_DECLARATION + PLIST_DOCTYPE 631 fp.write(header) 632 tree.write( # type: ignore 633 fp, 634 encoding="utf-8", 635 pretty_print=pretty_print, 636 xml_declaration=False, 637 ) 638 639 640def dumps( 641 value: PlistEncodable, 642 sort_keys: bool = True, 643 skipkeys: bool = False, 644 use_builtin_types: Optional[bool] = None, 645 pretty_print: bool = True, 646) -> bytes: 647 """Write a Python object to a string in plist format. 648 649 Args: 650 value: An object to write. 651 sort_keys (bool): Whether keys of dictionaries should be sorted. 652 skipkeys (bool): Whether to silently skip non-string dictionary 653 keys. 654 use_builtin_types (bool): If true, byte strings will be 655 encoded in Base-64 and wrapped in a ``data`` tag; if 656 false, they will be either stored as strings or an 657 exception raised if they cannot be represented. Defaults 658 pretty_print (bool): Whether to indent the output. 659 indent_level (int): Level of indentation when serializing. 660 661 Returns: 662 string: A plist representation of the Python object. 663 664 Raises: 665 ``TypeError`` 666 if non-string dictionary keys are serialized 667 and ``skipkeys`` is false. 668 ``ValueError`` 669 if non-representable binary data is present 670 and `use_builtin_types` is false. 671 """ 672 fp = BytesIO() 673 dump( 674 value, 675 fp, 676 sort_keys=sort_keys, 677 skipkeys=skipkeys, 678 use_builtin_types=use_builtin_types, 679 pretty_print=pretty_print, 680 ) 681 return fp.getvalue() 682