xref: /aosp_15_r20/external/fonttools/Lib/fontTools/misc/etree.py (revision e1fe3e4ad2793916b15cccdc4a7da52a7e1dd0e9)
1*e1fe3e4aSElliott Hughes"""Shim module exporting the same ElementTree API for lxml and
2*e1fe3e4aSElliott Hughesxml.etree backends.
3*e1fe3e4aSElliott Hughes
4*e1fe3e4aSElliott HughesWhen lxml is installed, it is automatically preferred over the built-in
5*e1fe3e4aSElliott Hughesxml.etree module.
6*e1fe3e4aSElliott HughesOn Python 2.7, the cElementTree module is preferred over the pure-python
7*e1fe3e4aSElliott HughesElementTree module.
8*e1fe3e4aSElliott Hughes
9*e1fe3e4aSElliott HughesBesides exporting a unified interface, this also defines extra functions
10*e1fe3e4aSElliott Hughesor subclasses built-in ElementTree classes to add features that are
11*e1fe3e4aSElliott Hughesonly availble in lxml, like OrderedDict for attributes, pretty_print and
12*e1fe3e4aSElliott Hughesiterwalk.
13*e1fe3e4aSElliott Hughes"""
14*e1fe3e4aSElliott Hughes
15*e1fe3e4aSElliott Hughesfrom fontTools.misc.textTools import tostr
16*e1fe3e4aSElliott Hughes
17*e1fe3e4aSElliott Hughes
18*e1fe3e4aSElliott HughesXML_DECLARATION = """<?xml version='1.0' encoding='%s'?>"""
19*e1fe3e4aSElliott Hughes
20*e1fe3e4aSElliott Hughes__all__ = [
21*e1fe3e4aSElliott Hughes    # public symbols
22*e1fe3e4aSElliott Hughes    "Comment",
23*e1fe3e4aSElliott Hughes    "dump",
24*e1fe3e4aSElliott Hughes    "Element",
25*e1fe3e4aSElliott Hughes    "ElementTree",
26*e1fe3e4aSElliott Hughes    "fromstring",
27*e1fe3e4aSElliott Hughes    "fromstringlist",
28*e1fe3e4aSElliott Hughes    "iselement",
29*e1fe3e4aSElliott Hughes    "iterparse",
30*e1fe3e4aSElliott Hughes    "parse",
31*e1fe3e4aSElliott Hughes    "ParseError",
32*e1fe3e4aSElliott Hughes    "PI",
33*e1fe3e4aSElliott Hughes    "ProcessingInstruction",
34*e1fe3e4aSElliott Hughes    "QName",
35*e1fe3e4aSElliott Hughes    "SubElement",
36*e1fe3e4aSElliott Hughes    "tostring",
37*e1fe3e4aSElliott Hughes    "tostringlist",
38*e1fe3e4aSElliott Hughes    "TreeBuilder",
39*e1fe3e4aSElliott Hughes    "XML",
40*e1fe3e4aSElliott Hughes    "XMLParser",
41*e1fe3e4aSElliott Hughes    "register_namespace",
42*e1fe3e4aSElliott Hughes]
43*e1fe3e4aSElliott Hughes
44*e1fe3e4aSElliott Hughestry:
45*e1fe3e4aSElliott Hughes    from lxml.etree import *
46*e1fe3e4aSElliott Hughes
47*e1fe3e4aSElliott Hughes    _have_lxml = True
48*e1fe3e4aSElliott Hughesexcept ImportError:
49*e1fe3e4aSElliott Hughes    try:
50*e1fe3e4aSElliott Hughes        from xml.etree.cElementTree import *
51*e1fe3e4aSElliott Hughes
52*e1fe3e4aSElliott Hughes        # the cElementTree version of XML function doesn't support
53*e1fe3e4aSElliott Hughes        # the optional 'parser' keyword argument
54*e1fe3e4aSElliott Hughes        from xml.etree.ElementTree import XML
55*e1fe3e4aSElliott Hughes    except ImportError:  # pragma: no cover
56*e1fe3e4aSElliott Hughes        from xml.etree.ElementTree import *
57*e1fe3e4aSElliott Hughes    _have_lxml = False
58*e1fe3e4aSElliott Hughes
59*e1fe3e4aSElliott Hughes    import sys
60*e1fe3e4aSElliott Hughes
61*e1fe3e4aSElliott Hughes    # dict is always ordered in python >= 3.6 and on pypy
62*e1fe3e4aSElliott Hughes    PY36 = sys.version_info >= (3, 6)
63*e1fe3e4aSElliott Hughes    try:
64*e1fe3e4aSElliott Hughes        import __pypy__
65*e1fe3e4aSElliott Hughes    except ImportError:
66*e1fe3e4aSElliott Hughes        __pypy__ = None
67*e1fe3e4aSElliott Hughes    _dict_is_ordered = bool(PY36 or __pypy__)
68*e1fe3e4aSElliott Hughes    del PY36, __pypy__
69*e1fe3e4aSElliott Hughes
70*e1fe3e4aSElliott Hughes    if _dict_is_ordered:
71*e1fe3e4aSElliott Hughes        _Attrib = dict
72*e1fe3e4aSElliott Hughes    else:
73*e1fe3e4aSElliott Hughes        from collections import OrderedDict as _Attrib
74*e1fe3e4aSElliott Hughes
75*e1fe3e4aSElliott Hughes    if isinstance(Element, type):
76*e1fe3e4aSElliott Hughes        _Element = Element
77*e1fe3e4aSElliott Hughes    else:
78*e1fe3e4aSElliott Hughes        # in py27, cElementTree.Element cannot be subclassed, so
79*e1fe3e4aSElliott Hughes        # we need to import the pure-python class
80*e1fe3e4aSElliott Hughes        from xml.etree.ElementTree import Element as _Element
81*e1fe3e4aSElliott Hughes
82*e1fe3e4aSElliott Hughes    class Element(_Element):
83*e1fe3e4aSElliott Hughes        """Element subclass that keeps the order of attributes."""
84*e1fe3e4aSElliott Hughes
85*e1fe3e4aSElliott Hughes        def __init__(self, tag, attrib=_Attrib(), **extra):
86*e1fe3e4aSElliott Hughes            super(Element, self).__init__(tag)
87*e1fe3e4aSElliott Hughes            self.attrib = _Attrib()
88*e1fe3e4aSElliott Hughes            if attrib:
89*e1fe3e4aSElliott Hughes                self.attrib.update(attrib)
90*e1fe3e4aSElliott Hughes            if extra:
91*e1fe3e4aSElliott Hughes                self.attrib.update(extra)
92*e1fe3e4aSElliott Hughes
93*e1fe3e4aSElliott Hughes    def SubElement(parent, tag, attrib=_Attrib(), **extra):
94*e1fe3e4aSElliott Hughes        """Must override SubElement as well otherwise _elementtree.SubElement
95*e1fe3e4aSElliott Hughes        fails if 'parent' is a subclass of Element object.
96*e1fe3e4aSElliott Hughes        """
97*e1fe3e4aSElliott Hughes        element = parent.__class__(tag, attrib, **extra)
98*e1fe3e4aSElliott Hughes        parent.append(element)
99*e1fe3e4aSElliott Hughes        return element
100*e1fe3e4aSElliott Hughes
101*e1fe3e4aSElliott Hughes    def _iterwalk(element, events, tag):
102*e1fe3e4aSElliott Hughes        include = tag is None or element.tag == tag
103*e1fe3e4aSElliott Hughes        if include and "start" in events:
104*e1fe3e4aSElliott Hughes            yield ("start", element)
105*e1fe3e4aSElliott Hughes        for e in element:
106*e1fe3e4aSElliott Hughes            for item in _iterwalk(e, events, tag):
107*e1fe3e4aSElliott Hughes                yield item
108*e1fe3e4aSElliott Hughes        if include:
109*e1fe3e4aSElliott Hughes            yield ("end", element)
110*e1fe3e4aSElliott Hughes
111*e1fe3e4aSElliott Hughes    def iterwalk(element_or_tree, events=("end",), tag=None):
112*e1fe3e4aSElliott Hughes        """A tree walker that generates events from an existing tree as
113*e1fe3e4aSElliott Hughes        if it was parsing XML data with iterparse().
114*e1fe3e4aSElliott Hughes        Drop-in replacement for lxml.etree.iterwalk.
115*e1fe3e4aSElliott Hughes        """
116*e1fe3e4aSElliott Hughes        if iselement(element_or_tree):
117*e1fe3e4aSElliott Hughes            element = element_or_tree
118*e1fe3e4aSElliott Hughes        else:
119*e1fe3e4aSElliott Hughes            element = element_or_tree.getroot()
120*e1fe3e4aSElliott Hughes        if tag == "*":
121*e1fe3e4aSElliott Hughes            tag = None
122*e1fe3e4aSElliott Hughes        for item in _iterwalk(element, events, tag):
123*e1fe3e4aSElliott Hughes            yield item
124*e1fe3e4aSElliott Hughes
125*e1fe3e4aSElliott Hughes    _ElementTree = ElementTree
126*e1fe3e4aSElliott Hughes
127*e1fe3e4aSElliott Hughes    class ElementTree(_ElementTree):
128*e1fe3e4aSElliott Hughes        """ElementTree subclass that adds 'pretty_print' and 'doctype'
129*e1fe3e4aSElliott Hughes        arguments to the 'write' method.
130*e1fe3e4aSElliott Hughes        Currently these are only supported for the default XML serialization
131*e1fe3e4aSElliott Hughes        'method', and not also for "html" or "text", for these are delegated
132*e1fe3e4aSElliott Hughes        to the base class.
133*e1fe3e4aSElliott Hughes        """
134*e1fe3e4aSElliott Hughes
135*e1fe3e4aSElliott Hughes        def write(
136*e1fe3e4aSElliott Hughes            self,
137*e1fe3e4aSElliott Hughes            file_or_filename,
138*e1fe3e4aSElliott Hughes            encoding=None,
139*e1fe3e4aSElliott Hughes            xml_declaration=False,
140*e1fe3e4aSElliott Hughes            method=None,
141*e1fe3e4aSElliott Hughes            doctype=None,
142*e1fe3e4aSElliott Hughes            pretty_print=False,
143*e1fe3e4aSElliott Hughes        ):
144*e1fe3e4aSElliott Hughes            if method and method != "xml":
145*e1fe3e4aSElliott Hughes                # delegate to super-class
146*e1fe3e4aSElliott Hughes                super(ElementTree, self).write(
147*e1fe3e4aSElliott Hughes                    file_or_filename,
148*e1fe3e4aSElliott Hughes                    encoding=encoding,
149*e1fe3e4aSElliott Hughes                    xml_declaration=xml_declaration,
150*e1fe3e4aSElliott Hughes                    method=method,
151*e1fe3e4aSElliott Hughes                )
152*e1fe3e4aSElliott Hughes                return
153*e1fe3e4aSElliott Hughes
154*e1fe3e4aSElliott Hughes            if encoding is not None and encoding.lower() == "unicode":
155*e1fe3e4aSElliott Hughes                if xml_declaration:
156*e1fe3e4aSElliott Hughes                    raise ValueError(
157*e1fe3e4aSElliott Hughes                        "Serialisation to unicode must not request an XML declaration"
158*e1fe3e4aSElliott Hughes                    )
159*e1fe3e4aSElliott Hughes                write_declaration = False
160*e1fe3e4aSElliott Hughes                encoding = "unicode"
161*e1fe3e4aSElliott Hughes            elif xml_declaration is None:
162*e1fe3e4aSElliott Hughes                # by default, write an XML declaration only for non-standard encodings
163*e1fe3e4aSElliott Hughes                write_declaration = encoding is not None and encoding.upper() not in (
164*e1fe3e4aSElliott Hughes                    "ASCII",
165*e1fe3e4aSElliott Hughes                    "UTF-8",
166*e1fe3e4aSElliott Hughes                    "UTF8",
167*e1fe3e4aSElliott Hughes                    "US-ASCII",
168*e1fe3e4aSElliott Hughes                )
169*e1fe3e4aSElliott Hughes            else:
170*e1fe3e4aSElliott Hughes                write_declaration = xml_declaration
171*e1fe3e4aSElliott Hughes
172*e1fe3e4aSElliott Hughes            if encoding is None:
173*e1fe3e4aSElliott Hughes                encoding = "ASCII"
174*e1fe3e4aSElliott Hughes
175*e1fe3e4aSElliott Hughes            if pretty_print:
176*e1fe3e4aSElliott Hughes                # NOTE this will modify the tree in-place
177*e1fe3e4aSElliott Hughes                _indent(self._root)
178*e1fe3e4aSElliott Hughes
179*e1fe3e4aSElliott Hughes            with _get_writer(file_or_filename, encoding) as write:
180*e1fe3e4aSElliott Hughes                if write_declaration:
181*e1fe3e4aSElliott Hughes                    write(XML_DECLARATION % encoding.upper())
182*e1fe3e4aSElliott Hughes                    if pretty_print:
183*e1fe3e4aSElliott Hughes                        write("\n")
184*e1fe3e4aSElliott Hughes                if doctype:
185*e1fe3e4aSElliott Hughes                    write(_tounicode(doctype))
186*e1fe3e4aSElliott Hughes                    if pretty_print:
187*e1fe3e4aSElliott Hughes                        write("\n")
188*e1fe3e4aSElliott Hughes
189*e1fe3e4aSElliott Hughes                qnames, namespaces = _namespaces(self._root)
190*e1fe3e4aSElliott Hughes                _serialize_xml(write, self._root, qnames, namespaces)
191*e1fe3e4aSElliott Hughes
192*e1fe3e4aSElliott Hughes    import io
193*e1fe3e4aSElliott Hughes
194*e1fe3e4aSElliott Hughes    def tostring(
195*e1fe3e4aSElliott Hughes        element,
196*e1fe3e4aSElliott Hughes        encoding=None,
197*e1fe3e4aSElliott Hughes        xml_declaration=None,
198*e1fe3e4aSElliott Hughes        method=None,
199*e1fe3e4aSElliott Hughes        doctype=None,
200*e1fe3e4aSElliott Hughes        pretty_print=False,
201*e1fe3e4aSElliott Hughes    ):
202*e1fe3e4aSElliott Hughes        """Custom 'tostring' function that uses our ElementTree subclass, with
203*e1fe3e4aSElliott Hughes        pretty_print support.
204*e1fe3e4aSElliott Hughes        """
205*e1fe3e4aSElliott Hughes        stream = io.StringIO() if encoding == "unicode" else io.BytesIO()
206*e1fe3e4aSElliott Hughes        ElementTree(element).write(
207*e1fe3e4aSElliott Hughes            stream,
208*e1fe3e4aSElliott Hughes            encoding=encoding,
209*e1fe3e4aSElliott Hughes            xml_declaration=xml_declaration,
210*e1fe3e4aSElliott Hughes            method=method,
211*e1fe3e4aSElliott Hughes            doctype=doctype,
212*e1fe3e4aSElliott Hughes            pretty_print=pretty_print,
213*e1fe3e4aSElliott Hughes        )
214*e1fe3e4aSElliott Hughes        return stream.getvalue()
215*e1fe3e4aSElliott Hughes
216*e1fe3e4aSElliott Hughes    # serialization support
217*e1fe3e4aSElliott Hughes
218*e1fe3e4aSElliott Hughes    import re
219*e1fe3e4aSElliott Hughes
220*e1fe3e4aSElliott Hughes    # Valid XML strings can include any Unicode character, excluding control
221*e1fe3e4aSElliott Hughes    # characters, the surrogate blocks, FFFE, and FFFF:
222*e1fe3e4aSElliott Hughes    #   Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
223*e1fe3e4aSElliott Hughes    # Here we reversed the pattern to match only the invalid characters.
224*e1fe3e4aSElliott Hughes    # For the 'narrow' python builds supporting only UCS-2, which represent
225*e1fe3e4aSElliott Hughes    # characters beyond BMP as UTF-16 surrogate pairs, we need to pass through
226*e1fe3e4aSElliott Hughes    # the surrogate block. I haven't found a more elegant solution...
227*e1fe3e4aSElliott Hughes    UCS2 = sys.maxunicode < 0x10FFFF
228*e1fe3e4aSElliott Hughes    if UCS2:
229*e1fe3e4aSElliott Hughes        _invalid_xml_string = re.compile(
230*e1fe3e4aSElliott Hughes            "[\u0000-\u0008\u000B-\u000C\u000E-\u001F\uFFFE-\uFFFF]"
231*e1fe3e4aSElliott Hughes        )
232*e1fe3e4aSElliott Hughes    else:
233*e1fe3e4aSElliott Hughes        _invalid_xml_string = re.compile(
234*e1fe3e4aSElliott Hughes            "[\u0000-\u0008\u000B-\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE-\uFFFF]"
235*e1fe3e4aSElliott Hughes        )
236*e1fe3e4aSElliott Hughes
237*e1fe3e4aSElliott Hughes    def _tounicode(s):
238*e1fe3e4aSElliott Hughes        """Test if a string is valid user input and decode it to unicode string
239*e1fe3e4aSElliott Hughes        using ASCII encoding if it's a bytes string.
240*e1fe3e4aSElliott Hughes        Reject all bytes/unicode input that contains non-XML characters.
241*e1fe3e4aSElliott Hughes        Reject all bytes input that contains non-ASCII characters.
242*e1fe3e4aSElliott Hughes        """
243*e1fe3e4aSElliott Hughes        try:
244*e1fe3e4aSElliott Hughes            s = tostr(s, encoding="ascii", errors="strict")
245*e1fe3e4aSElliott Hughes        except UnicodeDecodeError:
246*e1fe3e4aSElliott Hughes            raise ValueError(
247*e1fe3e4aSElliott Hughes                "Bytes strings can only contain ASCII characters. "
248*e1fe3e4aSElliott Hughes                "Use unicode strings for non-ASCII characters."
249*e1fe3e4aSElliott Hughes            )
250*e1fe3e4aSElliott Hughes        except AttributeError:
251*e1fe3e4aSElliott Hughes            _raise_serialization_error(s)
252*e1fe3e4aSElliott Hughes        if s and _invalid_xml_string.search(s):
253*e1fe3e4aSElliott Hughes            raise ValueError(
254*e1fe3e4aSElliott Hughes                "All strings must be XML compatible: Unicode or ASCII, "
255*e1fe3e4aSElliott Hughes                "no NULL bytes or control characters"
256*e1fe3e4aSElliott Hughes            )
257*e1fe3e4aSElliott Hughes        return s
258*e1fe3e4aSElliott Hughes
259*e1fe3e4aSElliott Hughes    import contextlib
260*e1fe3e4aSElliott Hughes
261*e1fe3e4aSElliott Hughes    @contextlib.contextmanager
262*e1fe3e4aSElliott Hughes    def _get_writer(file_or_filename, encoding):
263*e1fe3e4aSElliott Hughes        # returns text write method and release all resources after using
264*e1fe3e4aSElliott Hughes        try:
265*e1fe3e4aSElliott Hughes            write = file_or_filename.write
266*e1fe3e4aSElliott Hughes        except AttributeError:
267*e1fe3e4aSElliott Hughes            # file_or_filename is a file name
268*e1fe3e4aSElliott Hughes            f = open(
269*e1fe3e4aSElliott Hughes                file_or_filename,
270*e1fe3e4aSElliott Hughes                "w",
271*e1fe3e4aSElliott Hughes                encoding="utf-8" if encoding == "unicode" else encoding,
272*e1fe3e4aSElliott Hughes                errors="xmlcharrefreplace",
273*e1fe3e4aSElliott Hughes            )
274*e1fe3e4aSElliott Hughes            with f:
275*e1fe3e4aSElliott Hughes                yield f.write
276*e1fe3e4aSElliott Hughes        else:
277*e1fe3e4aSElliott Hughes            # file_or_filename is a file-like object
278*e1fe3e4aSElliott Hughes            # encoding determines if it is a text or binary writer
279*e1fe3e4aSElliott Hughes            if encoding == "unicode":
280*e1fe3e4aSElliott Hughes                # use a text writer as is
281*e1fe3e4aSElliott Hughes                yield write
282*e1fe3e4aSElliott Hughes            else:
283*e1fe3e4aSElliott Hughes                # wrap a binary writer with TextIOWrapper
284*e1fe3e4aSElliott Hughes                detach_buffer = False
285*e1fe3e4aSElliott Hughes                if isinstance(file_or_filename, io.BufferedIOBase):
286*e1fe3e4aSElliott Hughes                    buf = file_or_filename
287*e1fe3e4aSElliott Hughes                elif isinstance(file_or_filename, io.RawIOBase):
288*e1fe3e4aSElliott Hughes                    buf = io.BufferedWriter(file_or_filename)
289*e1fe3e4aSElliott Hughes                    detach_buffer = True
290*e1fe3e4aSElliott Hughes                else:
291*e1fe3e4aSElliott Hughes                    # This is to handle passed objects that aren't in the
292*e1fe3e4aSElliott Hughes                    # IOBase hierarchy, but just have a write method
293*e1fe3e4aSElliott Hughes                    buf = io.BufferedIOBase()
294*e1fe3e4aSElliott Hughes                    buf.writable = lambda: True
295*e1fe3e4aSElliott Hughes                    buf.write = write
296*e1fe3e4aSElliott Hughes                    try:
297*e1fe3e4aSElliott Hughes                        # TextIOWrapper uses this methods to determine
298*e1fe3e4aSElliott Hughes                        # if BOM (for UTF-16, etc) should be added
299*e1fe3e4aSElliott Hughes                        buf.seekable = file_or_filename.seekable
300*e1fe3e4aSElliott Hughes                        buf.tell = file_or_filename.tell
301*e1fe3e4aSElliott Hughes                    except AttributeError:
302*e1fe3e4aSElliott Hughes                        pass
303*e1fe3e4aSElliott Hughes                wrapper = io.TextIOWrapper(
304*e1fe3e4aSElliott Hughes                    buf,
305*e1fe3e4aSElliott Hughes                    encoding=encoding,
306*e1fe3e4aSElliott Hughes                    errors="xmlcharrefreplace",
307*e1fe3e4aSElliott Hughes                    newline="\n",
308*e1fe3e4aSElliott Hughes                )
309*e1fe3e4aSElliott Hughes                try:
310*e1fe3e4aSElliott Hughes                    yield wrapper.write
311*e1fe3e4aSElliott Hughes                finally:
312*e1fe3e4aSElliott Hughes                    # Keep the original file open when the TextIOWrapper and
313*e1fe3e4aSElliott Hughes                    # the BufferedWriter are destroyed
314*e1fe3e4aSElliott Hughes                    wrapper.detach()
315*e1fe3e4aSElliott Hughes                    if detach_buffer:
316*e1fe3e4aSElliott Hughes                        buf.detach()
317*e1fe3e4aSElliott Hughes
318*e1fe3e4aSElliott Hughes    from xml.etree.ElementTree import _namespace_map
319*e1fe3e4aSElliott Hughes
320*e1fe3e4aSElliott Hughes    def _namespaces(elem):
321*e1fe3e4aSElliott Hughes        # identify namespaces used in this tree
322*e1fe3e4aSElliott Hughes
323*e1fe3e4aSElliott Hughes        # maps qnames to *encoded* prefix:local names
324*e1fe3e4aSElliott Hughes        qnames = {None: None}
325*e1fe3e4aSElliott Hughes
326*e1fe3e4aSElliott Hughes        # maps uri:s to prefixes
327*e1fe3e4aSElliott Hughes        namespaces = {}
328*e1fe3e4aSElliott Hughes
329*e1fe3e4aSElliott Hughes        def add_qname(qname):
330*e1fe3e4aSElliott Hughes            # calculate serialized qname representation
331*e1fe3e4aSElliott Hughes            try:
332*e1fe3e4aSElliott Hughes                qname = _tounicode(qname)
333*e1fe3e4aSElliott Hughes                if qname[:1] == "{":
334*e1fe3e4aSElliott Hughes                    uri, tag = qname[1:].rsplit("}", 1)
335*e1fe3e4aSElliott Hughes                    prefix = namespaces.get(uri)
336*e1fe3e4aSElliott Hughes                    if prefix is None:
337*e1fe3e4aSElliott Hughes                        prefix = _namespace_map.get(uri)
338*e1fe3e4aSElliott Hughes                        if prefix is None:
339*e1fe3e4aSElliott Hughes                            prefix = "ns%d" % len(namespaces)
340*e1fe3e4aSElliott Hughes                        else:
341*e1fe3e4aSElliott Hughes                            prefix = _tounicode(prefix)
342*e1fe3e4aSElliott Hughes                        if prefix != "xml":
343*e1fe3e4aSElliott Hughes                            namespaces[uri] = prefix
344*e1fe3e4aSElliott Hughes                    if prefix:
345*e1fe3e4aSElliott Hughes                        qnames[qname] = "%s:%s" % (prefix, tag)
346*e1fe3e4aSElliott Hughes                    else:
347*e1fe3e4aSElliott Hughes                        qnames[qname] = tag  # default element
348*e1fe3e4aSElliott Hughes                else:
349*e1fe3e4aSElliott Hughes                    qnames[qname] = qname
350*e1fe3e4aSElliott Hughes            except TypeError:
351*e1fe3e4aSElliott Hughes                _raise_serialization_error(qname)
352*e1fe3e4aSElliott Hughes
353*e1fe3e4aSElliott Hughes        # populate qname and namespaces table
354*e1fe3e4aSElliott Hughes        for elem in elem.iter():
355*e1fe3e4aSElliott Hughes            tag = elem.tag
356*e1fe3e4aSElliott Hughes            if isinstance(tag, QName):
357*e1fe3e4aSElliott Hughes                if tag.text not in qnames:
358*e1fe3e4aSElliott Hughes                    add_qname(tag.text)
359*e1fe3e4aSElliott Hughes            elif isinstance(tag, str):
360*e1fe3e4aSElliott Hughes                if tag not in qnames:
361*e1fe3e4aSElliott Hughes                    add_qname(tag)
362*e1fe3e4aSElliott Hughes            elif tag is not None and tag is not Comment and tag is not PI:
363*e1fe3e4aSElliott Hughes                _raise_serialization_error(tag)
364*e1fe3e4aSElliott Hughes            for key, value in elem.items():
365*e1fe3e4aSElliott Hughes                if isinstance(key, QName):
366*e1fe3e4aSElliott Hughes                    key = key.text
367*e1fe3e4aSElliott Hughes                if key not in qnames:
368*e1fe3e4aSElliott Hughes                    add_qname(key)
369*e1fe3e4aSElliott Hughes                if isinstance(value, QName) and value.text not in qnames:
370*e1fe3e4aSElliott Hughes                    add_qname(value.text)
371*e1fe3e4aSElliott Hughes            text = elem.text
372*e1fe3e4aSElliott Hughes            if isinstance(text, QName) and text.text not in qnames:
373*e1fe3e4aSElliott Hughes                add_qname(text.text)
374*e1fe3e4aSElliott Hughes        return qnames, namespaces
375*e1fe3e4aSElliott Hughes
376*e1fe3e4aSElliott Hughes    def _serialize_xml(write, elem, qnames, namespaces, **kwargs):
377*e1fe3e4aSElliott Hughes        tag = elem.tag
378*e1fe3e4aSElliott Hughes        text = elem.text
379*e1fe3e4aSElliott Hughes        if tag is Comment:
380*e1fe3e4aSElliott Hughes            write("<!--%s-->" % _tounicode(text))
381*e1fe3e4aSElliott Hughes        elif tag is ProcessingInstruction:
382*e1fe3e4aSElliott Hughes            write("<?%s?>" % _tounicode(text))
383*e1fe3e4aSElliott Hughes        else:
384*e1fe3e4aSElliott Hughes            tag = qnames[_tounicode(tag) if tag is not None else None]
385*e1fe3e4aSElliott Hughes            if tag is None:
386*e1fe3e4aSElliott Hughes                if text:
387*e1fe3e4aSElliott Hughes                    write(_escape_cdata(text))
388*e1fe3e4aSElliott Hughes                for e in elem:
389*e1fe3e4aSElliott Hughes                    _serialize_xml(write, e, qnames, None)
390*e1fe3e4aSElliott Hughes            else:
391*e1fe3e4aSElliott Hughes                write("<" + tag)
392*e1fe3e4aSElliott Hughes                if namespaces:
393*e1fe3e4aSElliott Hughes                    for uri, prefix in sorted(
394*e1fe3e4aSElliott Hughes                        namespaces.items(), key=lambda x: x[1]
395*e1fe3e4aSElliott Hughes                    ):  # sort on prefix
396*e1fe3e4aSElliott Hughes                        if prefix:
397*e1fe3e4aSElliott Hughes                            prefix = ":" + prefix
398*e1fe3e4aSElliott Hughes                        write(' xmlns%s="%s"' % (prefix, _escape_attrib(uri)))
399*e1fe3e4aSElliott Hughes                attrs = elem.attrib
400*e1fe3e4aSElliott Hughes                if attrs:
401*e1fe3e4aSElliott Hughes                    # try to keep existing attrib order
402*e1fe3e4aSElliott Hughes                    if len(attrs) <= 1 or type(attrs) is _Attrib:
403*e1fe3e4aSElliott Hughes                        items = attrs.items()
404*e1fe3e4aSElliott Hughes                    else:
405*e1fe3e4aSElliott Hughes                        # if plain dict, use lexical order
406*e1fe3e4aSElliott Hughes                        items = sorted(attrs.items())
407*e1fe3e4aSElliott Hughes                    for k, v in items:
408*e1fe3e4aSElliott Hughes                        if isinstance(k, QName):
409*e1fe3e4aSElliott Hughes                            k = _tounicode(k.text)
410*e1fe3e4aSElliott Hughes                        else:
411*e1fe3e4aSElliott Hughes                            k = _tounicode(k)
412*e1fe3e4aSElliott Hughes                        if isinstance(v, QName):
413*e1fe3e4aSElliott Hughes                            v = qnames[_tounicode(v.text)]
414*e1fe3e4aSElliott Hughes                        else:
415*e1fe3e4aSElliott Hughes                            v = _escape_attrib(v)
416*e1fe3e4aSElliott Hughes                        write(' %s="%s"' % (qnames[k], v))
417*e1fe3e4aSElliott Hughes                if text is not None or len(elem):
418*e1fe3e4aSElliott Hughes                    write(">")
419*e1fe3e4aSElliott Hughes                    if text:
420*e1fe3e4aSElliott Hughes                        write(_escape_cdata(text))
421*e1fe3e4aSElliott Hughes                    for e in elem:
422*e1fe3e4aSElliott Hughes                        _serialize_xml(write, e, qnames, None)
423*e1fe3e4aSElliott Hughes                    write("</" + tag + ">")
424*e1fe3e4aSElliott Hughes                else:
425*e1fe3e4aSElliott Hughes                    write("/>")
426*e1fe3e4aSElliott Hughes        if elem.tail:
427*e1fe3e4aSElliott Hughes            write(_escape_cdata(elem.tail))
428*e1fe3e4aSElliott Hughes
429*e1fe3e4aSElliott Hughes    def _raise_serialization_error(text):
430*e1fe3e4aSElliott Hughes        raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
431*e1fe3e4aSElliott Hughes
432*e1fe3e4aSElliott Hughes    def _escape_cdata(text):
433*e1fe3e4aSElliott Hughes        # escape character data
434*e1fe3e4aSElliott Hughes        try:
435*e1fe3e4aSElliott Hughes            text = _tounicode(text)
436*e1fe3e4aSElliott Hughes            # it's worth avoiding do-nothing calls for short strings
437*e1fe3e4aSElliott Hughes            if "&" in text:
438*e1fe3e4aSElliott Hughes                text = text.replace("&", "&amp;")
439*e1fe3e4aSElliott Hughes            if "<" in text:
440*e1fe3e4aSElliott Hughes                text = text.replace("<", "&lt;")
441*e1fe3e4aSElliott Hughes            if ">" in text:
442*e1fe3e4aSElliott Hughes                text = text.replace(">", "&gt;")
443*e1fe3e4aSElliott Hughes            return text
444*e1fe3e4aSElliott Hughes        except (TypeError, AttributeError):
445*e1fe3e4aSElliott Hughes            _raise_serialization_error(text)
446*e1fe3e4aSElliott Hughes
447*e1fe3e4aSElliott Hughes    def _escape_attrib(text):
448*e1fe3e4aSElliott Hughes        # escape attribute value
449*e1fe3e4aSElliott Hughes        try:
450*e1fe3e4aSElliott Hughes            text = _tounicode(text)
451*e1fe3e4aSElliott Hughes            if "&" in text:
452*e1fe3e4aSElliott Hughes                text = text.replace("&", "&amp;")
453*e1fe3e4aSElliott Hughes            if "<" in text:
454*e1fe3e4aSElliott Hughes                text = text.replace("<", "&lt;")
455*e1fe3e4aSElliott Hughes            if ">" in text:
456*e1fe3e4aSElliott Hughes                text = text.replace(">", "&gt;")
457*e1fe3e4aSElliott Hughes            if '"' in text:
458*e1fe3e4aSElliott Hughes                text = text.replace('"', "&quot;")
459*e1fe3e4aSElliott Hughes            if "\n" in text:
460*e1fe3e4aSElliott Hughes                text = text.replace("\n", "&#10;")
461*e1fe3e4aSElliott Hughes            return text
462*e1fe3e4aSElliott Hughes        except (TypeError, AttributeError):
463*e1fe3e4aSElliott Hughes            _raise_serialization_error(text)
464*e1fe3e4aSElliott Hughes
465*e1fe3e4aSElliott Hughes    def _indent(elem, level=0):
466*e1fe3e4aSElliott Hughes        # From http://effbot.org/zone/element-lib.htm#prettyprint
467*e1fe3e4aSElliott Hughes        i = "\n" + level * "  "
468*e1fe3e4aSElliott Hughes        if len(elem):
469*e1fe3e4aSElliott Hughes            if not elem.text or not elem.text.strip():
470*e1fe3e4aSElliott Hughes                elem.text = i + "  "
471*e1fe3e4aSElliott Hughes            if not elem.tail or not elem.tail.strip():
472*e1fe3e4aSElliott Hughes                elem.tail = i
473*e1fe3e4aSElliott Hughes            for elem in elem:
474*e1fe3e4aSElliott Hughes                _indent(elem, level + 1)
475*e1fe3e4aSElliott Hughes            if not elem.tail or not elem.tail.strip():
476*e1fe3e4aSElliott Hughes                elem.tail = i
477*e1fe3e4aSElliott Hughes        else:
478*e1fe3e4aSElliott Hughes            if level and (not elem.tail or not elem.tail.strip()):
479*e1fe3e4aSElliott Hughes                elem.tail = i
480