fontTools/misc/etree.py

*e1fe3e4aSElliott Hughes"""Shim module exporting the same ElementTree API for lxml and
*e1fe3e4aSElliott Hughesxml.etree backends.
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott HughesWhen lxml is installed, it is automatically preferred over the built-in
*e1fe3e4aSElliott Hughesxml.etree module.
*e1fe3e4aSElliott HughesOn Python 2.7, the cElementTree module is preferred over the pure-python
*e1fe3e4aSElliott HughesElementTree module.
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott HughesBesides exporting a unified interface, this also defines extra functions
*e1fe3e4aSElliott Hughesor subclasses built-in ElementTree classes to add features that are
*e1fe3e4aSElliott Hughesonly availble in lxml, like OrderedDict for attributes, pretty_print and
*e1fe3e4aSElliott Hughesiterwalk.
*e1fe3e4aSElliott Hughes"""
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughesfrom fontTools.misc.textTools import tostr
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott HughesXML_DECLARATION = """<?xml version='1.0' encoding='%s'?>"""
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes__all__ = [
*e1fe3e4aSElliott Hughes    # public symbols
*e1fe3e4aSElliott Hughes    "Comment",
*e1fe3e4aSElliott Hughes    "dump",
*e1fe3e4aSElliott Hughes    "Element",
*e1fe3e4aSElliott Hughes    "ElementTree",
*e1fe3e4aSElliott Hughes    "fromstring",
*e1fe3e4aSElliott Hughes    "fromstringlist",
*e1fe3e4aSElliott Hughes    "iselement",
*e1fe3e4aSElliott Hughes    "iterparse",
*e1fe3e4aSElliott Hughes    "parse",
*e1fe3e4aSElliott Hughes    "ParseError",
*e1fe3e4aSElliott Hughes    "PI",
*e1fe3e4aSElliott Hughes    "ProcessingInstruction",
*e1fe3e4aSElliott Hughes    "QName",
*e1fe3e4aSElliott Hughes    "SubElement",
*e1fe3e4aSElliott Hughes    "tostring",
*e1fe3e4aSElliott Hughes    "tostringlist",
*e1fe3e4aSElliott Hughes    "TreeBuilder",
*e1fe3e4aSElliott Hughes    "XML",
*e1fe3e4aSElliott Hughes    "XMLParser",
*e1fe3e4aSElliott Hughes    "register_namespace",
*e1fe3e4aSElliott Hughes]
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughestry:
*e1fe3e4aSElliott Hughes    from lxml.etree import *
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    _have_lxml = True
*e1fe3e4aSElliott Hughesexcept ImportError:
*e1fe3e4aSElliott Hughes    try:
*e1fe3e4aSElliott Hughes        from xml.etree.cElementTree import *
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes        # the cElementTree version of XML function doesn't support
*e1fe3e4aSElliott Hughes        # the optional 'parser' keyword argument
*e1fe3e4aSElliott Hughes        from xml.etree.ElementTree import XML
*e1fe3e4aSElliott Hughes    except ImportError:  # pragma: no cover
*e1fe3e4aSElliott Hughes        from xml.etree.ElementTree import *
*e1fe3e4aSElliott Hughes    _have_lxml = False
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    import sys
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    # dict is always ordered in python >= 3.6 and on pypy
*e1fe3e4aSElliott Hughes    PY36 = sys.version_info >= (3, 6)
*e1fe3e4aSElliott Hughes    try:
*e1fe3e4aSElliott Hughes        import __pypy__
*e1fe3e4aSElliott Hughes    except ImportError:
*e1fe3e4aSElliott Hughes        __pypy__ = None
*e1fe3e4aSElliott Hughes    _dict_is_ordered = bool(PY36 or __pypy__)
*e1fe3e4aSElliott Hughes    del PY36, __pypy__
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    if _dict_is_ordered:
*e1fe3e4aSElliott Hughes        _Attrib = dict
*e1fe3e4aSElliott Hughes    else:
*e1fe3e4aSElliott Hughes        from collections import OrderedDict as _Attrib
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    if isinstance(Element, type):
*e1fe3e4aSElliott Hughes        _Element = Element
*e1fe3e4aSElliott Hughes    else:
*e1fe3e4aSElliott Hughes        # in py27, cElementTree.Element cannot be subclassed, so
*e1fe3e4aSElliott Hughes        # we need to import the pure-python class
*e1fe3e4aSElliott Hughes        from xml.etree.ElementTree import Element as _Element
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    class Element(_Element):
*e1fe3e4aSElliott Hughes        """Element subclass that keeps the order of attributes."""
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes        def __init__(self, tag, attrib=_Attrib(), **extra):
*e1fe3e4aSElliott Hughes            super(Element, self).__init__(tag)
*e1fe3e4aSElliott Hughes            self.attrib = _Attrib()
*e1fe3e4aSElliott Hughes            if attrib:
*e1fe3e4aSElliott Hughes                self.attrib.update(attrib)
*e1fe3e4aSElliott Hughes            if extra:
*e1fe3e4aSElliott Hughes                self.attrib.update(extra)
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def SubElement(parent, tag, attrib=_Attrib(), **extra):
*e1fe3e4aSElliott Hughes        """Must override SubElement as well otherwise _elementtree.SubElement
*e1fe3e4aSElliott Hughes        fails if 'parent' is a subclass of Element object.
*e1fe3e4aSElliott Hughes        """
*e1fe3e4aSElliott Hughes        element = parent.__class__(tag, attrib, **extra)
*e1fe3e4aSElliott Hughes        parent.append(element)
*e1fe3e4aSElliott Hughes        return element
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def _iterwalk(element, events, tag):
*e1fe3e4aSElliott Hughes        include = tag is None or element.tag == tag
*e1fe3e4aSElliott Hughes        if include and "start" in events:
*e1fe3e4aSElliott Hughes            yield ("start", element)
*e1fe3e4aSElliott Hughes        for e in element:
*e1fe3e4aSElliott Hughes            for item in _iterwalk(e, events, tag):
*e1fe3e4aSElliott Hughes                yield item
*e1fe3e4aSElliott Hughes        if include:
*e1fe3e4aSElliott Hughes            yield ("end", element)
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def iterwalk(element_or_tree, events=("end",), tag=None):
*e1fe3e4aSElliott Hughes        """A tree walker that generates events from an existing tree as
*e1fe3e4aSElliott Hughes        if it was parsing XML data with iterparse().
*e1fe3e4aSElliott Hughes        Drop-in replacement for lxml.etree.iterwalk.
*e1fe3e4aSElliott Hughes        """
*e1fe3e4aSElliott Hughes        if iselement(element_or_tree):
*e1fe3e4aSElliott Hughes            element = element_or_tree
*e1fe3e4aSElliott Hughes        else:
*e1fe3e4aSElliott Hughes            element = element_or_tree.getroot()
*e1fe3e4aSElliott Hughes        if tag == "*":
*e1fe3e4aSElliott Hughes            tag = None
*e1fe3e4aSElliott Hughes        for item in _iterwalk(element, events, tag):
*e1fe3e4aSElliott Hughes            yield item
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    _ElementTree = ElementTree
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    class ElementTree(_ElementTree):
*e1fe3e4aSElliott Hughes        """ElementTree subclass that adds 'pretty_print' and 'doctype'
*e1fe3e4aSElliott Hughes        arguments to the 'write' method.
*e1fe3e4aSElliott Hughes        Currently these are only supported for the default XML serialization
*e1fe3e4aSElliott Hughes        'method', and not also for "html" or "text", for these are delegated
*e1fe3e4aSElliott Hughes        to the base class.
*e1fe3e4aSElliott Hughes        """
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes        def write(
*e1fe3e4aSElliott Hughes            self,
*e1fe3e4aSElliott Hughes            file_or_filename,
*e1fe3e4aSElliott Hughes            encoding=None,
*e1fe3e4aSElliott Hughes            xml_declaration=False,
*e1fe3e4aSElliott Hughes            method=None,
*e1fe3e4aSElliott Hughes            doctype=None,
*e1fe3e4aSElliott Hughes            pretty_print=False,
*e1fe3e4aSElliott Hughes        ):
*e1fe3e4aSElliott Hughes            if method and method != "xml":
*e1fe3e4aSElliott Hughes                # delegate to super-class
*e1fe3e4aSElliott Hughes                super(ElementTree, self).write(
*e1fe3e4aSElliott Hughes                    file_or_filename,
*e1fe3e4aSElliott Hughes                    encoding=encoding,
*e1fe3e4aSElliott Hughes                    xml_declaration=xml_declaration,
*e1fe3e4aSElliott Hughes                    method=method,
*e1fe3e4aSElliott Hughes                )
*e1fe3e4aSElliott Hughes                return
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes            if encoding is not None and encoding.lower() == "unicode":
*e1fe3e4aSElliott Hughes                if xml_declaration:
*e1fe3e4aSElliott Hughes                    raise ValueError(
*e1fe3e4aSElliott Hughes                        "Serialisation to unicode must not request an XML declaration"
*e1fe3e4aSElliott Hughes                    )
*e1fe3e4aSElliott Hughes                write_declaration = False
*e1fe3e4aSElliott Hughes                encoding = "unicode"
*e1fe3e4aSElliott Hughes            elif xml_declaration is None:
*e1fe3e4aSElliott Hughes                # by default, write an XML declaration only for non-standard encodings
*e1fe3e4aSElliott Hughes                write_declaration = encoding is not None and encoding.upper() not in (
*e1fe3e4aSElliott Hughes                    "ASCII",
*e1fe3e4aSElliott Hughes                    "UTF-8",
*e1fe3e4aSElliott Hughes                    "UTF8",
*e1fe3e4aSElliott Hughes                    "US-ASCII",
*e1fe3e4aSElliott Hughes                )
*e1fe3e4aSElliott Hughes            else:
*e1fe3e4aSElliott Hughes                write_declaration = xml_declaration
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes            if encoding is None:
*e1fe3e4aSElliott Hughes                encoding = "ASCII"
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes            if pretty_print:
*e1fe3e4aSElliott Hughes                # NOTE this will modify the tree in-place
*e1fe3e4aSElliott Hughes                _indent(self._root)
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes            with _get_writer(file_or_filename, encoding) as write:
*e1fe3e4aSElliott Hughes                if write_declaration:
*e1fe3e4aSElliott Hughes                    write(XML_DECLARATION % encoding.upper())
*e1fe3e4aSElliott Hughes                    if pretty_print:
*e1fe3e4aSElliott Hughes                        write("\n")
*e1fe3e4aSElliott Hughes                if doctype:
*e1fe3e4aSElliott Hughes                    write(_tounicode(doctype))
*e1fe3e4aSElliott Hughes                    if pretty_print:
*e1fe3e4aSElliott Hughes                        write("\n")
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes                qnames, namespaces = _namespaces(self._root)
*e1fe3e4aSElliott Hughes                _serialize_xml(write, self._root, qnames, namespaces)
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    import io
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def tostring(
*e1fe3e4aSElliott Hughes        element,
*e1fe3e4aSElliott Hughes        encoding=None,
*e1fe3e4aSElliott Hughes        xml_declaration=None,
*e1fe3e4aSElliott Hughes        method=None,
*e1fe3e4aSElliott Hughes        doctype=None,
*e1fe3e4aSElliott Hughes        pretty_print=False,
*e1fe3e4aSElliott Hughes    ):
*e1fe3e4aSElliott Hughes        """Custom 'tostring' function that uses our ElementTree subclass, with
*e1fe3e4aSElliott Hughes        pretty_print support.
*e1fe3e4aSElliott Hughes        """
*e1fe3e4aSElliott Hughes        stream = io.StringIO() if encoding == "unicode" else io.BytesIO()
*e1fe3e4aSElliott Hughes        ElementTree(element).write(
*e1fe3e4aSElliott Hughes            stream,
*e1fe3e4aSElliott Hughes            encoding=encoding,
*e1fe3e4aSElliott Hughes            xml_declaration=xml_declaration,
*e1fe3e4aSElliott Hughes            method=method,
*e1fe3e4aSElliott Hughes            doctype=doctype,
*e1fe3e4aSElliott Hughes            pretty_print=pretty_print,
*e1fe3e4aSElliott Hughes        )
*e1fe3e4aSElliott Hughes        return stream.getvalue()
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    # serialization support
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    import re
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    # Valid XML strings can include any Unicode character, excluding control
*e1fe3e4aSElliott Hughes    # characters, the surrogate blocks, FFFE, and FFFF:
*e1fe3e4aSElliott Hughes    #   Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*e1fe3e4aSElliott Hughes    # Here we reversed the pattern to match only the invalid characters.
*e1fe3e4aSElliott Hughes    # For the 'narrow' python builds supporting only UCS-2, which represent
*e1fe3e4aSElliott Hughes    # characters beyond BMP as UTF-16 surrogate pairs, we need to pass through
*e1fe3e4aSElliott Hughes    # the surrogate block. I haven't found a more elegant solution...
*e1fe3e4aSElliott Hughes    UCS2 = sys.maxunicode < 0x10FFFF
*e1fe3e4aSElliott Hughes    if UCS2:
*e1fe3e4aSElliott Hughes        _invalid_xml_string = re.compile(
*e1fe3e4aSElliott Hughes            "[\u0000-\u0008\u000B-\u000C\u000E-\u001F\uFFFE-\uFFFF]"
*e1fe3e4aSElliott Hughes        )
*e1fe3e4aSElliott Hughes    else:
*e1fe3e4aSElliott Hughes        _invalid_xml_string = re.compile(
*e1fe3e4aSElliott Hughes            "[\u0000-\u0008\u000B-\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE-\uFFFF]"
*e1fe3e4aSElliott Hughes        )
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def _tounicode(s):
*e1fe3e4aSElliott Hughes        """Test if a string is valid user input and decode it to unicode string
*e1fe3e4aSElliott Hughes        using ASCII encoding if it's a bytes string.
*e1fe3e4aSElliott Hughes        Reject all bytes/unicode input that contains non-XML characters.
*e1fe3e4aSElliott Hughes        Reject all bytes input that contains non-ASCII characters.
*e1fe3e4aSElliott Hughes        """
*e1fe3e4aSElliott Hughes        try:
*e1fe3e4aSElliott Hughes            s = tostr(s, encoding="ascii", errors="strict")
*e1fe3e4aSElliott Hughes        except UnicodeDecodeError:
*e1fe3e4aSElliott Hughes            raise ValueError(
*e1fe3e4aSElliott Hughes                "Bytes strings can only contain ASCII characters. "
*e1fe3e4aSElliott Hughes                "Use unicode strings for non-ASCII characters."
*e1fe3e4aSElliott Hughes            )
*e1fe3e4aSElliott Hughes        except AttributeError:
*e1fe3e4aSElliott Hughes            _raise_serialization_error(s)
*e1fe3e4aSElliott Hughes        if s and _invalid_xml_string.search(s):
*e1fe3e4aSElliott Hughes            raise ValueError(
*e1fe3e4aSElliott Hughes                "All strings must be XML compatible: Unicode or ASCII, "
*e1fe3e4aSElliott Hughes                "no NULL bytes or control characters"
*e1fe3e4aSElliott Hughes            )
*e1fe3e4aSElliott Hughes        return s
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    import contextlib
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    @contextlib.contextmanager
*e1fe3e4aSElliott Hughes    def _get_writer(file_or_filename, encoding):
*e1fe3e4aSElliott Hughes        # returns text write method and release all resources after using
*e1fe3e4aSElliott Hughes        try:
*e1fe3e4aSElliott Hughes            write = file_or_filename.write
*e1fe3e4aSElliott Hughes        except AttributeError:
*e1fe3e4aSElliott Hughes            # file_or_filename is a file name
*e1fe3e4aSElliott Hughes            f = open(
*e1fe3e4aSElliott Hughes                file_or_filename,
*e1fe3e4aSElliott Hughes                "w",
*e1fe3e4aSElliott Hughes                encoding="utf-8" if encoding == "unicode" else encoding,
*e1fe3e4aSElliott Hughes                errors="xmlcharrefreplace",
*e1fe3e4aSElliott Hughes            )
*e1fe3e4aSElliott Hughes            with f:
*e1fe3e4aSElliott Hughes                yield f.write
*e1fe3e4aSElliott Hughes        else:
*e1fe3e4aSElliott Hughes            # file_or_filename is a file-like object
*e1fe3e4aSElliott Hughes            # encoding determines if it is a text or binary writer
*e1fe3e4aSElliott Hughes            if encoding == "unicode":
*e1fe3e4aSElliott Hughes                # use a text writer as is
*e1fe3e4aSElliott Hughes                yield write
*e1fe3e4aSElliott Hughes            else:
*e1fe3e4aSElliott Hughes                # wrap a binary writer with TextIOWrapper
*e1fe3e4aSElliott Hughes                detach_buffer = False
*e1fe3e4aSElliott Hughes                if isinstance(file_or_filename, io.BufferedIOBase):
*e1fe3e4aSElliott Hughes                    buf = file_or_filename
*e1fe3e4aSElliott Hughes                elif isinstance(file_or_filename, io.RawIOBase):
*e1fe3e4aSElliott Hughes                    buf = io.BufferedWriter(file_or_filename)
*e1fe3e4aSElliott Hughes                    detach_buffer = True
*e1fe3e4aSElliott Hughes                else:
*e1fe3e4aSElliott Hughes                    # This is to handle passed objects that aren't in the
*e1fe3e4aSElliott Hughes                    # IOBase hierarchy, but just have a write method
*e1fe3e4aSElliott Hughes                    buf = io.BufferedIOBase()
*e1fe3e4aSElliott Hughes                    buf.writable = lambda: True
*e1fe3e4aSElliott Hughes                    buf.write = write
*e1fe3e4aSElliott Hughes                    try:
*e1fe3e4aSElliott Hughes                        # TextIOWrapper uses this methods to determine
*e1fe3e4aSElliott Hughes                        # if BOM (for UTF-16, etc) should be added
*e1fe3e4aSElliott Hughes                        buf.seekable = file_or_filename.seekable
*e1fe3e4aSElliott Hughes                        buf.tell = file_or_filename.tell
*e1fe3e4aSElliott Hughes                    except AttributeError:
*e1fe3e4aSElliott Hughes                        pass
*e1fe3e4aSElliott Hughes                wrapper = io.TextIOWrapper(
*e1fe3e4aSElliott Hughes                    buf,
*e1fe3e4aSElliott Hughes                    encoding=encoding,
*e1fe3e4aSElliott Hughes                    errors="xmlcharrefreplace",
*e1fe3e4aSElliott Hughes                    newline="\n",
*e1fe3e4aSElliott Hughes                )
*e1fe3e4aSElliott Hughes                try:
*e1fe3e4aSElliott Hughes                    yield wrapper.write
*e1fe3e4aSElliott Hughes                finally:
*e1fe3e4aSElliott Hughes                    # Keep the original file open when the TextIOWrapper and
*e1fe3e4aSElliott Hughes                    # the BufferedWriter are destroyed
*e1fe3e4aSElliott Hughes                    wrapper.detach()
*e1fe3e4aSElliott Hughes                    if detach_buffer:
*e1fe3e4aSElliott Hughes                        buf.detach()
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    from xml.etree.ElementTree import _namespace_map
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def _namespaces(elem):
*e1fe3e4aSElliott Hughes        # identify namespaces used in this tree
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes        # maps qnames to *encoded* prefix:local names
*e1fe3e4aSElliott Hughes        qnames = {None: None}
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes        # maps uri:s to prefixes
*e1fe3e4aSElliott Hughes        namespaces = {}
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes        def add_qname(qname):
*e1fe3e4aSElliott Hughes            # calculate serialized qname representation
*e1fe3e4aSElliott Hughes            try:
*e1fe3e4aSElliott Hughes                qname = _tounicode(qname)
*e1fe3e4aSElliott Hughes                if qname[:1] == "{":
*e1fe3e4aSElliott Hughes                    uri, tag = qname[1:].rsplit("}", 1)
*e1fe3e4aSElliott Hughes                    prefix = namespaces.get(uri)
*e1fe3e4aSElliott Hughes                    if prefix is None:
*e1fe3e4aSElliott Hughes                        prefix = _namespace_map.get(uri)
*e1fe3e4aSElliott Hughes                        if prefix is None:
*e1fe3e4aSElliott Hughes                            prefix = "ns%d" % len(namespaces)
*e1fe3e4aSElliott Hughes                        else:
*e1fe3e4aSElliott Hughes                            prefix = _tounicode(prefix)
*e1fe3e4aSElliott Hughes                        if prefix != "xml":
*e1fe3e4aSElliott Hughes                            namespaces[uri] = prefix
*e1fe3e4aSElliott Hughes                    if prefix:
*e1fe3e4aSElliott Hughes                        qnames[qname] = "%s:%s" % (prefix, tag)
*e1fe3e4aSElliott Hughes                    else:
*e1fe3e4aSElliott Hughes                        qnames[qname] = tag  # default element
*e1fe3e4aSElliott Hughes                else:
*e1fe3e4aSElliott Hughes                    qnames[qname] = qname
*e1fe3e4aSElliott Hughes            except TypeError:
*e1fe3e4aSElliott Hughes                _raise_serialization_error(qname)
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes        # populate qname and namespaces table
*e1fe3e4aSElliott Hughes        for elem in elem.iter():
*e1fe3e4aSElliott Hughes            tag = elem.tag
*e1fe3e4aSElliott Hughes            if isinstance(tag, QName):
*e1fe3e4aSElliott Hughes                if tag.text not in qnames:
*e1fe3e4aSElliott Hughes                    add_qname(tag.text)
*e1fe3e4aSElliott Hughes            elif isinstance(tag, str):
*e1fe3e4aSElliott Hughes                if tag not in qnames:
*e1fe3e4aSElliott Hughes                    add_qname(tag)
*e1fe3e4aSElliott Hughes            elif tag is not None and tag is not Comment and tag is not PI:
*e1fe3e4aSElliott Hughes                _raise_serialization_error(tag)
*e1fe3e4aSElliott Hughes            for key, value in elem.items():
*e1fe3e4aSElliott Hughes                if isinstance(key, QName):
*e1fe3e4aSElliott Hughes                    key = key.text
*e1fe3e4aSElliott Hughes                if key not in qnames:
*e1fe3e4aSElliott Hughes                    add_qname(key)
*e1fe3e4aSElliott Hughes                if isinstance(value, QName) and value.text not in qnames:
*e1fe3e4aSElliott Hughes                    add_qname(value.text)
*e1fe3e4aSElliott Hughes            text = elem.text
*e1fe3e4aSElliott Hughes            if isinstance(text, QName) and text.text not in qnames:
*e1fe3e4aSElliott Hughes                add_qname(text.text)
*e1fe3e4aSElliott Hughes        return qnames, namespaces
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def _serialize_xml(write, elem, qnames, namespaces, **kwargs):
*e1fe3e4aSElliott Hughes        tag = elem.tag
*e1fe3e4aSElliott Hughes        text = elem.text
*e1fe3e4aSElliott Hughes        if tag is Comment:
*e1fe3e4aSElliott Hughes            write("<!--%s-->" % _tounicode(text))
*e1fe3e4aSElliott Hughes        elif tag is ProcessingInstruction:
*e1fe3e4aSElliott Hughes            write("<?%s?>" % _tounicode(text))
*e1fe3e4aSElliott Hughes        else:
*e1fe3e4aSElliott Hughes            tag = qnames[_tounicode(tag) if tag is not None else None]
*e1fe3e4aSElliott Hughes            if tag is None:
*e1fe3e4aSElliott Hughes                if text:
*e1fe3e4aSElliott Hughes                    write(_escape_cdata(text))
*e1fe3e4aSElliott Hughes                for e in elem:
*e1fe3e4aSElliott Hughes                    _serialize_xml(write, e, qnames, None)
*e1fe3e4aSElliott Hughes            else:
*e1fe3e4aSElliott Hughes                write("<" + tag)
*e1fe3e4aSElliott Hughes                if namespaces:
*e1fe3e4aSElliott Hughes                    for uri, prefix in sorted(
*e1fe3e4aSElliott Hughes                        namespaces.items(), key=lambda x: x[1]
*e1fe3e4aSElliott Hughes                    ):  # sort on prefix
*e1fe3e4aSElliott Hughes                        if prefix:
*e1fe3e4aSElliott Hughes                            prefix = ":" + prefix
*e1fe3e4aSElliott Hughes                        write(' xmlns%s="%s"' % (prefix, _escape_attrib(uri)))
*e1fe3e4aSElliott Hughes                attrs = elem.attrib
*e1fe3e4aSElliott Hughes                if attrs:
*e1fe3e4aSElliott Hughes                    # try to keep existing attrib order
*e1fe3e4aSElliott Hughes                    if len(attrs) <= 1 or type(attrs) is _Attrib:
*e1fe3e4aSElliott Hughes                        items = attrs.items()
*e1fe3e4aSElliott Hughes                    else:
*e1fe3e4aSElliott Hughes                        # if plain dict, use lexical order
*e1fe3e4aSElliott Hughes                        items = sorted(attrs.items())
*e1fe3e4aSElliott Hughes                    for k, v in items:
*e1fe3e4aSElliott Hughes                        if isinstance(k, QName):
*e1fe3e4aSElliott Hughes                            k = _tounicode(k.text)
*e1fe3e4aSElliott Hughes                        else:
*e1fe3e4aSElliott Hughes                            k = _tounicode(k)
*e1fe3e4aSElliott Hughes                        if isinstance(v, QName):
*e1fe3e4aSElliott Hughes                            v = qnames[_tounicode(v.text)]
*e1fe3e4aSElliott Hughes                        else:
*e1fe3e4aSElliott Hughes                            v = _escape_attrib(v)
*e1fe3e4aSElliott Hughes                        write(' %s="%s"' % (qnames[k], v))
*e1fe3e4aSElliott Hughes                if text is not None or len(elem):
*e1fe3e4aSElliott Hughes                    write(">")
*e1fe3e4aSElliott Hughes                    if text:
*e1fe3e4aSElliott Hughes                        write(_escape_cdata(text))
*e1fe3e4aSElliott Hughes                    for e in elem:
*e1fe3e4aSElliott Hughes                        _serialize_xml(write, e, qnames, None)
*e1fe3e4aSElliott Hughes                    write("</" + tag + ">")
*e1fe3e4aSElliott Hughes                else:
*e1fe3e4aSElliott Hughes                    write("/>")
*e1fe3e4aSElliott Hughes        if elem.tail:
*e1fe3e4aSElliott Hughes            write(_escape_cdata(elem.tail))
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def _raise_serialization_error(text):
*e1fe3e4aSElliott Hughes        raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def _escape_cdata(text):
*e1fe3e4aSElliott Hughes        # escape character data
*e1fe3e4aSElliott Hughes        try:
*e1fe3e4aSElliott Hughes            text = _tounicode(text)
*e1fe3e4aSElliott Hughes            # it's worth avoiding do-nothing calls for short strings
*e1fe3e4aSElliott Hughes            if "&" in text:
*e1fe3e4aSElliott Hughes                text = text.replace("&", "&amp;")
*e1fe3e4aSElliott Hughes            if "<" in text:
*e1fe3e4aSElliott Hughes                text = text.replace("<", "&lt;")
*e1fe3e4aSElliott Hughes            if ">" in text:
*e1fe3e4aSElliott Hughes                text = text.replace(">", "&gt;")
*e1fe3e4aSElliott Hughes            return text
*e1fe3e4aSElliott Hughes        except (TypeError, AttributeError):
*e1fe3e4aSElliott Hughes            _raise_serialization_error(text)
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def _escape_attrib(text):
*e1fe3e4aSElliott Hughes        # escape attribute value
*e1fe3e4aSElliott Hughes        try:
*e1fe3e4aSElliott Hughes            text = _tounicode(text)
*e1fe3e4aSElliott Hughes            if "&" in text:
*e1fe3e4aSElliott Hughes                text = text.replace("&", "&amp;")
*e1fe3e4aSElliott Hughes            if "<" in text:
*e1fe3e4aSElliott Hughes                text = text.replace("<", "&lt;")
*e1fe3e4aSElliott Hughes            if ">" in text:
*e1fe3e4aSElliott Hughes                text = text.replace(">", "&gt;")
*e1fe3e4aSElliott Hughes            if '"' in text:
*e1fe3e4aSElliott Hughes                text = text.replace('"', "&quot;")
*e1fe3e4aSElliott Hughes            if "\n" in text:
*e1fe3e4aSElliott Hughes                text = text.replace("\n", "&#10;")
*e1fe3e4aSElliott Hughes            return text
*e1fe3e4aSElliott Hughes        except (TypeError, AttributeError):
*e1fe3e4aSElliott Hughes            _raise_serialization_error(text)
*e1fe3e4aSElliott Hughes
*e1fe3e4aSElliott Hughes    def _indent(elem, level=0):
*e1fe3e4aSElliott Hughes        # From http://effbot.org/zone/element-lib.htm#prettyprint
*e1fe3e4aSElliott Hughes        i = "\n" + level * "  "
*e1fe3e4aSElliott Hughes        if len(elem):
*e1fe3e4aSElliott Hughes            if not elem.text or not elem.text.strip():
*e1fe3e4aSElliott Hughes                elem.text = i + "  "
*e1fe3e4aSElliott Hughes            if not elem.tail or not elem.tail.strip():
*e1fe3e4aSElliott Hughes                elem.tail = i
*e1fe3e4aSElliott Hughes            for elem in elem:
*e1fe3e4aSElliott Hughes                _indent(elem, level + 1)
*e1fe3e4aSElliott Hughes            if not elem.tail or not elem.tail.strip():
*e1fe3e4aSElliott Hughes                elem.tail = i
*e1fe3e4aSElliott Hughes        else:
*e1fe3e4aSElliott Hughes            if level and (not elem.tail or not elem.tail.strip()):
*e1fe3e4aSElliott Hughes                elem.tail = i