xref: /aosp_15_r20/external/fonttools/Lib/fontTools/misc/plistlib/__init__.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1import collections.abc
2import re
3from typing import (
4    Any,
5    Callable,
6    Dict,
7    List,
8    Mapping,
9    MutableMapping,
10    Optional,
11    Sequence,
12    Type,
13    Union,
14    IO,
15)
16import warnings
17from io import BytesIO
18from datetime import datetime
19from base64 import b64encode, b64decode
20from numbers import Integral
21from types import SimpleNamespace
22from functools import singledispatch
23
24from fontTools.misc import etree
25
26from fontTools.misc.textTools import tostr
27
28
29# By default, we
30#  - deserialize <data> elements as bytes and
31#  - serialize bytes as <data> elements.
32# Before, on Python 2, we
33#  - deserialized <data> elements as plistlib.Data objects, in order to
34#    distinguish them from the built-in str type (which is bytes on python2)
35#  - serialized bytes as <string> elements (they must have only contained
36#    ASCII characters in this case)
37# You can pass use_builtin_types=[True|False] to the load/dump etc. functions
38# to enforce a specific treatment.
39# NOTE that unicode type always maps to <string> element, and plistlib.Data
40# always maps to <data> element, regardless of use_builtin_types.
41USE_BUILTIN_TYPES = True
42
43XML_DECLARATION = b"""<?xml version='1.0' encoding='UTF-8'?>"""
44
45PLIST_DOCTYPE = (
46    b'<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" '
47    b'"http://www.apple.com/DTDs/PropertyList-1.0.dtd">'
48)
49
50
51# Date should conform to a subset of ISO 8601:
52# YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'
53_date_parser = re.compile(
54    r"(?P<year>\d\d\d\d)"
55    r"(?:-(?P<month>\d\d)"
56    r"(?:-(?P<day>\d\d)"
57    r"(?:T(?P<hour>\d\d)"
58    r"(?::(?P<minute>\d\d)"
59    r"(?::(?P<second>\d\d))"
60    r"?)?)?)?)?Z",
61    re.ASCII,
62)
63
64
65def _date_from_string(s: str) -> datetime:
66    order = ("year", "month", "day", "hour", "minute", "second")
67    m = _date_parser.match(s)
68    if m is None:
69        raise ValueError(f"Expected ISO 8601 date string, but got '{s:r}'.")
70    gd = m.groupdict()
71    lst = []
72    for key in order:
73        val = gd[key]
74        if val is None:
75            break
76        lst.append(int(val))
77    # NOTE: mypy doesn't know that lst is 6 elements long.
78    return datetime(*lst)  # type:ignore
79
80
81def _date_to_string(d: datetime) -> str:
82    return "%04d-%02d-%02dT%02d:%02d:%02dZ" % (
83        d.year,
84        d.month,
85        d.day,
86        d.hour,
87        d.minute,
88        d.second,
89    )
90
91
92class Data:
93    """Represents binary data when ``use_builtin_types=False.``
94
95    This class wraps binary data loaded from a plist file when the
96    ``use_builtin_types`` argument to the loading function (:py:func:`fromtree`,
97    :py:func:`load`, :py:func:`loads`) is false.
98
99    The actual binary data is retrieved using the ``data`` attribute.
100    """
101
102    def __init__(self, data: bytes) -> None:
103        if not isinstance(data, bytes):
104            raise TypeError("Expected bytes, found %s" % type(data).__name__)
105        self.data = data
106
107    @classmethod
108    def fromBase64(cls, data: Union[bytes, str]) -> "Data":
109        return cls(b64decode(data))
110
111    def asBase64(self, maxlinelength: int = 76, indent_level: int = 1) -> bytes:
112        return _encode_base64(
113            self.data, maxlinelength=maxlinelength, indent_level=indent_level
114        )
115
116    def __eq__(self, other: Any) -> bool:
117        if isinstance(other, self.__class__):
118            return self.data == other.data
119        elif isinstance(other, bytes):
120            return self.data == other
121        else:
122            return NotImplemented
123
124    def __repr__(self) -> str:
125        return "%s(%s)" % (self.__class__.__name__, repr(self.data))
126
127
128def _encode_base64(
129    data: bytes, maxlinelength: Optional[int] = 76, indent_level: int = 1
130) -> bytes:
131    data = b64encode(data)
132    if data and maxlinelength:
133        # split into multiple lines right-justified to 'maxlinelength' chars
134        indent = b"\n" + b"  " * indent_level
135        max_length = max(16, maxlinelength - len(indent))
136        chunks = []
137        for i in range(0, len(data), max_length):
138            chunks.append(indent)
139            chunks.append(data[i : i + max_length])
140        chunks.append(indent)
141        data = b"".join(chunks)
142    return data
143
144
145# Mypy does not support recursive type aliases as of 0.782, Pylance does.
146# https://github.com/python/mypy/issues/731
147# https://devblogs.microsoft.com/python/pylance-introduces-five-new-features-that-enable-type-magic-for-python-developers/#1-support-for-recursive-type-aliases
148PlistEncodable = Union[
149    bool,
150    bytes,
151    Data,
152    datetime,
153    float,
154    Integral,
155    Mapping[str, Any],
156    Sequence[Any],
157    str,
158]
159
160
161class PlistTarget:
162    """Event handler using the ElementTree Target API that can be
163    passed to a XMLParser to produce property list objects from XML.
164    It is based on the CPython plistlib module's _PlistParser class,
165    but does not use the expat parser.
166
167    >>> from fontTools.misc import etree
168    >>> parser = etree.XMLParser(target=PlistTarget())
169    >>> result = etree.XML(
170    ...     "<dict>"
171    ...     "    <key>something</key>"
172    ...     "    <string>blah</string>"
173    ...     "</dict>",
174    ...     parser=parser)
175    >>> result == {"something": "blah"}
176    True
177
178    Links:
179    https://github.com/python/cpython/blob/main/Lib/plistlib.py
180    http://lxml.de/parsing.html#the-target-parser-interface
181    """
182
183    def __init__(
184        self,
185        use_builtin_types: Optional[bool] = None,
186        dict_type: Type[MutableMapping[str, Any]] = dict,
187    ) -> None:
188        self.stack: List[PlistEncodable] = []
189        self.current_key: Optional[str] = None
190        self.root: Optional[PlistEncodable] = None
191        if use_builtin_types is None:
192            self._use_builtin_types = USE_BUILTIN_TYPES
193        else:
194            if use_builtin_types is False:
195                warnings.warn(
196                    "Setting use_builtin_types to False is deprecated and will be "
197                    "removed soon.",
198                    DeprecationWarning,
199                )
200            self._use_builtin_types = use_builtin_types
201        self._dict_type = dict_type
202
203    def start(self, tag: str, attrib: Mapping[str, str]) -> None:
204        self._data: List[str] = []
205        handler = _TARGET_START_HANDLERS.get(tag)
206        if handler is not None:
207            handler(self)
208
209    def end(self, tag: str) -> None:
210        handler = _TARGET_END_HANDLERS.get(tag)
211        if handler is not None:
212            handler(self)
213
214    def data(self, data: str) -> None:
215        self._data.append(data)
216
217    def close(self) -> PlistEncodable:
218        if self.root is None:
219            raise ValueError("No root set.")
220        return self.root
221
222    # helpers
223
224    def add_object(self, value: PlistEncodable) -> None:
225        if self.current_key is not None:
226            stack_top = self.stack[-1]
227            if not isinstance(stack_top, collections.abc.MutableMapping):
228                raise ValueError("unexpected element: %r" % stack_top)
229            stack_top[self.current_key] = value
230            self.current_key = None
231        elif not self.stack:
232            # this is the root object
233            self.root = value
234        else:
235            stack_top = self.stack[-1]
236            if not isinstance(stack_top, list):
237                raise ValueError("unexpected element: %r" % stack_top)
238            stack_top.append(value)
239
240    def get_data(self) -> str:
241        data = "".join(self._data)
242        self._data = []
243        return data
244
245
246# event handlers
247
248
249def start_dict(self: PlistTarget) -> None:
250    d = self._dict_type()
251    self.add_object(d)
252    self.stack.append(d)
253
254
255def end_dict(self: PlistTarget) -> None:
256    if self.current_key:
257        raise ValueError("missing value for key '%s'" % self.current_key)
258    self.stack.pop()
259
260
261def end_key(self: PlistTarget) -> None:
262    if self.current_key or not isinstance(self.stack[-1], collections.abc.Mapping):
263        raise ValueError("unexpected key")
264    self.current_key = self.get_data()
265
266
267def start_array(self: PlistTarget) -> None:
268    a: List[PlistEncodable] = []
269    self.add_object(a)
270    self.stack.append(a)
271
272
273def end_array(self: PlistTarget) -> None:
274    self.stack.pop()
275
276
277def end_true(self: PlistTarget) -> None:
278    self.add_object(True)
279
280
281def end_false(self: PlistTarget) -> None:
282    self.add_object(False)
283
284
285def end_integer(self: PlistTarget) -> None:
286    self.add_object(int(self.get_data()))
287
288
289def end_real(self: PlistTarget) -> None:
290    self.add_object(float(self.get_data()))
291
292
293def end_string(self: PlistTarget) -> None:
294    self.add_object(self.get_data())
295
296
297def end_data(self: PlistTarget) -> None:
298    if self._use_builtin_types:
299        self.add_object(b64decode(self.get_data()))
300    else:
301        self.add_object(Data.fromBase64(self.get_data()))
302
303
304def end_date(self: PlistTarget) -> None:
305    self.add_object(_date_from_string(self.get_data()))
306
307
308_TARGET_START_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = {
309    "dict": start_dict,
310    "array": start_array,
311}
312
313_TARGET_END_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = {
314    "dict": end_dict,
315    "array": end_array,
316    "key": end_key,
317    "true": end_true,
318    "false": end_false,
319    "integer": end_integer,
320    "real": end_real,
321    "string": end_string,
322    "data": end_data,
323    "date": end_date,
324}
325
326
327# functions to build element tree from plist data
328
329
330def _string_element(value: str, ctx: SimpleNamespace) -> etree.Element:
331    el = etree.Element("string")
332    el.text = value
333    return el
334
335
336def _bool_element(value: bool, ctx: SimpleNamespace) -> etree.Element:
337    if value:
338        return etree.Element("true")
339    return etree.Element("false")
340
341
342def _integer_element(value: int, ctx: SimpleNamespace) -> etree.Element:
343    if -1 << 63 <= value < 1 << 64:
344        el = etree.Element("integer")
345        el.text = "%d" % value
346        return el
347    raise OverflowError(value)
348
349
350def _real_element(value: float, ctx: SimpleNamespace) -> etree.Element:
351    el = etree.Element("real")
352    el.text = repr(value)
353    return el
354
355
356def _dict_element(
357    d: Mapping[str, PlistEncodable], ctx: SimpleNamespace
358) -> etree.Element:
359    el = etree.Element("dict")
360    items = d.items()
361    if ctx.sort_keys:
362        items = sorted(items)  # type: ignore
363    ctx.indent_level += 1
364    for key, value in items:
365        if not isinstance(key, str):
366            if ctx.skipkeys:
367                continue
368            raise TypeError("keys must be strings")
369        k = etree.SubElement(el, "key")
370        k.text = tostr(key, "utf-8")
371        el.append(_make_element(value, ctx))
372    ctx.indent_level -= 1
373    return el
374
375
376def _array_element(
377    array: Sequence[PlistEncodable], ctx: SimpleNamespace
378) -> etree.Element:
379    el = etree.Element("array")
380    if len(array) == 0:
381        return el
382    ctx.indent_level += 1
383    for value in array:
384        el.append(_make_element(value, ctx))
385    ctx.indent_level -= 1
386    return el
387
388
389def _date_element(date: datetime, ctx: SimpleNamespace) -> etree.Element:
390    el = etree.Element("date")
391    el.text = _date_to_string(date)
392    return el
393
394
395def _data_element(data: bytes, ctx: SimpleNamespace) -> etree.Element:
396    el = etree.Element("data")
397    # NOTE: mypy is confused about whether el.text should be str or bytes.
398    el.text = _encode_base64(  # type: ignore
399        data,
400        maxlinelength=(76 if ctx.pretty_print else None),
401        indent_level=ctx.indent_level,
402    )
403    return el
404
405
406def _string_or_data_element(raw_bytes: bytes, ctx: SimpleNamespace) -> etree.Element:
407    if ctx.use_builtin_types:
408        return _data_element(raw_bytes, ctx)
409    else:
410        try:
411            string = raw_bytes.decode(encoding="ascii", errors="strict")
412        except UnicodeDecodeError:
413            raise ValueError(
414                "invalid non-ASCII bytes; use unicode string instead: %r" % raw_bytes
415            )
416        return _string_element(string, ctx)
417
418
419# The following is probably not entirely correct. The signature should take `Any`
420# and return `NoReturn`. At the time of this writing, neither mypy nor Pyright
421# can deal with singledispatch properly and will apply the signature of the base
422# function to all others. Being slightly dishonest makes it type-check and return
423# usable typing information for the optimistic case.
424@singledispatch
425def _make_element(value: PlistEncodable, ctx: SimpleNamespace) -> etree.Element:
426    raise TypeError("unsupported type: %s" % type(value))
427
428
429_make_element.register(str)(_string_element)
430_make_element.register(bool)(_bool_element)
431_make_element.register(Integral)(_integer_element)
432_make_element.register(float)(_real_element)
433_make_element.register(collections.abc.Mapping)(_dict_element)
434_make_element.register(list)(_array_element)
435_make_element.register(tuple)(_array_element)
436_make_element.register(datetime)(_date_element)
437_make_element.register(bytes)(_string_or_data_element)
438_make_element.register(bytearray)(_data_element)
439_make_element.register(Data)(lambda v, ctx: _data_element(v.data, ctx))
440
441
442# Public functions to create element tree from plist-compatible python
443# data structures and viceversa, for use when (de)serializing GLIF xml.
444
445
446def totree(
447    value: PlistEncodable,
448    sort_keys: bool = True,
449    skipkeys: bool = False,
450    use_builtin_types: Optional[bool] = None,
451    pretty_print: bool = True,
452    indent_level: int = 1,
453) -> etree.Element:
454    """Convert a value derived from a plist into an XML tree.
455
456    Args:
457        value: Any kind of value to be serialized to XML.
458        sort_keys: Whether keys of dictionaries should be sorted.
459        skipkeys (bool): Whether to silently skip non-string dictionary
460            keys.
461        use_builtin_types (bool): If true, byte strings will be
462            encoded in Base-64 and wrapped in a ``data`` tag; if
463            false, they will be either stored as ASCII strings or an
464            exception raised if they cannot be decoded as such. Defaults
465            to ``True`` if not present. Deprecated.
466        pretty_print (bool): Whether to indent the output.
467        indent_level (int): Level of indentation when serializing.
468
469    Returns: an ``etree`` ``Element`` object.
470
471    Raises:
472        ``TypeError``
473            if non-string dictionary keys are serialized
474            and ``skipkeys`` is false.
475        ``ValueError``
476            if non-ASCII binary data is present
477            and `use_builtin_types` is false.
478    """
479    if use_builtin_types is None:
480        use_builtin_types = USE_BUILTIN_TYPES
481    else:
482        use_builtin_types = use_builtin_types
483    context = SimpleNamespace(
484        sort_keys=sort_keys,
485        skipkeys=skipkeys,
486        use_builtin_types=use_builtin_types,
487        pretty_print=pretty_print,
488        indent_level=indent_level,
489    )
490    return _make_element(value, context)
491
492
493def fromtree(
494    tree: etree.Element,
495    use_builtin_types: Optional[bool] = None,
496    dict_type: Type[MutableMapping[str, Any]] = dict,
497) -> Any:
498    """Convert an XML tree to a plist structure.
499
500    Args:
501        tree: An ``etree`` ``Element``.
502        use_builtin_types: If True, binary data is deserialized to
503            bytes strings. If False, it is wrapped in :py:class:`Data`
504            objects. Defaults to True if not provided. Deprecated.
505        dict_type: What type to use for dictionaries.
506
507    Returns: An object (usually a dictionary).
508    """
509    target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type)
510    for action, element in etree.iterwalk(tree, events=("start", "end")):
511        if action == "start":
512            target.start(element.tag, element.attrib)
513        elif action == "end":
514            # if there are no children, parse the leaf's data
515            if not len(element):
516                # always pass str, not None
517                target.data(element.text or "")
518            target.end(element.tag)
519    return target.close()
520
521
522# python3 plistlib API
523
524
525def load(
526    fp: IO[bytes],
527    use_builtin_types: Optional[bool] = None,
528    dict_type: Type[MutableMapping[str, Any]] = dict,
529) -> Any:
530    """Load a plist file into an object.
531
532    Args:
533        fp: An opened file.
534        use_builtin_types: If True, binary data is deserialized to
535            bytes strings. If False, it is wrapped in :py:class:`Data`
536            objects. Defaults to True if not provided. Deprecated.
537        dict_type: What type to use for dictionaries.
538
539    Returns:
540        An object (usually a dictionary) representing the top level of
541        the plist file.
542    """
543
544    if not hasattr(fp, "read"):
545        raise AttributeError("'%s' object has no attribute 'read'" % type(fp).__name__)
546    target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type)
547    parser = etree.XMLParser(target=target)
548    result = etree.parse(fp, parser=parser)
549    # lxml returns the target object directly, while ElementTree wraps
550    # it as the root of an ElementTree object
551    try:
552        return result.getroot()
553    except AttributeError:
554        return result
555
556
557def loads(
558    value: bytes,
559    use_builtin_types: Optional[bool] = None,
560    dict_type: Type[MutableMapping[str, Any]] = dict,
561) -> Any:
562    """Load a plist file from a string into an object.
563
564    Args:
565        value: A bytes string containing a plist.
566        use_builtin_types: If True, binary data is deserialized to
567            bytes strings. If False, it is wrapped in :py:class:`Data`
568            objects. Defaults to True if not provided. Deprecated.
569        dict_type: What type to use for dictionaries.
570
571    Returns:
572        An object (usually a dictionary) representing the top level of
573        the plist file.
574    """
575
576    fp = BytesIO(value)
577    return load(fp, use_builtin_types=use_builtin_types, dict_type=dict_type)
578
579
580def dump(
581    value: PlistEncodable,
582    fp: IO[bytes],
583    sort_keys: bool = True,
584    skipkeys: bool = False,
585    use_builtin_types: Optional[bool] = None,
586    pretty_print: bool = True,
587) -> None:
588    """Write a Python object to a plist file.
589
590    Args:
591        value: An object to write.
592        fp: A file opened for writing.
593        sort_keys (bool): Whether keys of dictionaries should be sorted.
594        skipkeys (bool): Whether to silently skip non-string dictionary
595            keys.
596        use_builtin_types (bool): If true, byte strings will be
597            encoded in Base-64 and wrapped in a ``data`` tag; if
598            false, they will be either stored as ASCII strings or an
599            exception raised if they cannot be represented. Defaults
600        pretty_print (bool): Whether to indent the output.
601        indent_level (int): Level of indentation when serializing.
602
603    Raises:
604        ``TypeError``
605            if non-string dictionary keys are serialized
606            and ``skipkeys`` is false.
607        ``ValueError``
608            if non-representable binary data is present
609            and `use_builtin_types` is false.
610    """
611
612    if not hasattr(fp, "write"):
613        raise AttributeError("'%s' object has no attribute 'write'" % type(fp).__name__)
614    root = etree.Element("plist", version="1.0")
615    el = totree(
616        value,
617        sort_keys=sort_keys,
618        skipkeys=skipkeys,
619        use_builtin_types=use_builtin_types,
620        pretty_print=pretty_print,
621    )
622    root.append(el)
623    tree = etree.ElementTree(root)
624    # we write the doctype ourselves instead of using the 'doctype' argument
625    # of 'write' method, becuse lxml will force adding a '\n' even when
626    # pretty_print is False.
627    if pretty_print:
628        header = b"\n".join((XML_DECLARATION, PLIST_DOCTYPE, b""))
629    else:
630        header = XML_DECLARATION + PLIST_DOCTYPE
631    fp.write(header)
632    tree.write(  # type: ignore
633        fp,
634        encoding="utf-8",
635        pretty_print=pretty_print,
636        xml_declaration=False,
637    )
638
639
640def dumps(
641    value: PlistEncodable,
642    sort_keys: bool = True,
643    skipkeys: bool = False,
644    use_builtin_types: Optional[bool] = None,
645    pretty_print: bool = True,
646) -> bytes:
647    """Write a Python object to a string in plist format.
648
649    Args:
650        value: An object to write.
651        sort_keys (bool): Whether keys of dictionaries should be sorted.
652        skipkeys (bool): Whether to silently skip non-string dictionary
653            keys.
654        use_builtin_types (bool): If true, byte strings will be
655            encoded in Base-64 and wrapped in a ``data`` tag; if
656            false, they will be either stored as strings or an
657            exception raised if they cannot be represented. Defaults
658        pretty_print (bool): Whether to indent the output.
659        indent_level (int): Level of indentation when serializing.
660
661    Returns:
662        string: A plist representation of the Python object.
663
664    Raises:
665        ``TypeError``
666            if non-string dictionary keys are serialized
667            and ``skipkeys`` is false.
668        ``ValueError``
669            if non-representable binary data is present
670            and `use_builtin_types` is false.
671    """
672    fp = BytesIO()
673    dump(
674        value,
675        fp,
676        sort_keys=sort_keys,
677        skipkeys=skipkeys,
678        use_builtin_types=use_builtin_types,
679        pretty_print=pretty_print,
680    )
681    return fp.getvalue()
682