xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/xml/dom/minidom.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1"""Simple implementation of the Level 1 DOM.
2
3Namespaces and other minor Level 2 features are also supported.
4
5parse("foo.xml")
6
7parseString("<foo><bar/></foo>")
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14        interface
15 * SAX 2 namespaces
16"""
17
18import io
19import xml.dom
20
21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
22from xml.dom.minicompat import *
23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
24
25# This is used by the ID-cache invalidation checks; the list isn't
26# actually complete, since the nodes being checked will never be the
27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
28# the node being added or removed, not the node being modified.)
29#
30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
31                            xml.dom.Node.ENTITY_REFERENCE_NODE)
32
33
34class Node(xml.dom.Node):
35    namespaceURI = None # this is non-null only for elements and attributes
36    parentNode = None
37    ownerDocument = None
38    nextSibling = None
39    previousSibling = None
40
41    prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
42
43    def __bool__(self):
44        return True
45
46    def toxml(self, encoding=None, standalone=None):
47        return self.toprettyxml("", "", encoding, standalone)
48
49    def toprettyxml(self, indent="\t", newl="\n", encoding=None,
50                    standalone=None):
51        if encoding is None:
52            writer = io.StringIO()
53        else:
54            writer = io.TextIOWrapper(io.BytesIO(),
55                                      encoding=encoding,
56                                      errors="xmlcharrefreplace",
57                                      newline='\n')
58        if self.nodeType == Node.DOCUMENT_NODE:
59            # Can pass encoding only to document, to put it into XML header
60            self.writexml(writer, "", indent, newl, encoding, standalone)
61        else:
62            self.writexml(writer, "", indent, newl)
63        if encoding is None:
64            return writer.getvalue()
65        else:
66            return writer.detach().getvalue()
67
68    def hasChildNodes(self):
69        return bool(self.childNodes)
70
71    def _get_childNodes(self):
72        return self.childNodes
73
74    def _get_firstChild(self):
75        if self.childNodes:
76            return self.childNodes[0]
77
78    def _get_lastChild(self):
79        if self.childNodes:
80            return self.childNodes[-1]
81
82    def insertBefore(self, newChild, refChild):
83        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
84            for c in tuple(newChild.childNodes):
85                self.insertBefore(c, refChild)
86            ### The DOM does not clearly specify what to return in this case
87            return newChild
88        if newChild.nodeType not in self._child_node_types:
89            raise xml.dom.HierarchyRequestErr(
90                "%s cannot be child of %s" % (repr(newChild), repr(self)))
91        if newChild.parentNode is not None:
92            newChild.parentNode.removeChild(newChild)
93        if refChild is None:
94            self.appendChild(newChild)
95        else:
96            try:
97                index = self.childNodes.index(refChild)
98            except ValueError:
99                raise xml.dom.NotFoundErr()
100            if newChild.nodeType in _nodeTypes_with_children:
101                _clear_id_cache(self)
102            self.childNodes.insert(index, newChild)
103            newChild.nextSibling = refChild
104            refChild.previousSibling = newChild
105            if index:
106                node = self.childNodes[index-1]
107                node.nextSibling = newChild
108                newChild.previousSibling = node
109            else:
110                newChild.previousSibling = None
111            newChild.parentNode = self
112        return newChild
113
114    def appendChild(self, node):
115        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
116            for c in tuple(node.childNodes):
117                self.appendChild(c)
118            ### The DOM does not clearly specify what to return in this case
119            return node
120        if node.nodeType not in self._child_node_types:
121            raise xml.dom.HierarchyRequestErr(
122                "%s cannot be child of %s" % (repr(node), repr(self)))
123        elif node.nodeType in _nodeTypes_with_children:
124            _clear_id_cache(self)
125        if node.parentNode is not None:
126            node.parentNode.removeChild(node)
127        _append_child(self, node)
128        node.nextSibling = None
129        return node
130
131    def replaceChild(self, newChild, oldChild):
132        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
133            refChild = oldChild.nextSibling
134            self.removeChild(oldChild)
135            return self.insertBefore(newChild, refChild)
136        if newChild.nodeType not in self._child_node_types:
137            raise xml.dom.HierarchyRequestErr(
138                "%s cannot be child of %s" % (repr(newChild), repr(self)))
139        if newChild is oldChild:
140            return
141        if newChild.parentNode is not None:
142            newChild.parentNode.removeChild(newChild)
143        try:
144            index = self.childNodes.index(oldChild)
145        except ValueError:
146            raise xml.dom.NotFoundErr()
147        self.childNodes[index] = newChild
148        newChild.parentNode = self
149        oldChild.parentNode = None
150        if (newChild.nodeType in _nodeTypes_with_children
151            or oldChild.nodeType in _nodeTypes_with_children):
152            _clear_id_cache(self)
153        newChild.nextSibling = oldChild.nextSibling
154        newChild.previousSibling = oldChild.previousSibling
155        oldChild.nextSibling = None
156        oldChild.previousSibling = None
157        if newChild.previousSibling:
158            newChild.previousSibling.nextSibling = newChild
159        if newChild.nextSibling:
160            newChild.nextSibling.previousSibling = newChild
161        return oldChild
162
163    def removeChild(self, oldChild):
164        try:
165            self.childNodes.remove(oldChild)
166        except ValueError:
167            raise xml.dom.NotFoundErr()
168        if oldChild.nextSibling is not None:
169            oldChild.nextSibling.previousSibling = oldChild.previousSibling
170        if oldChild.previousSibling is not None:
171            oldChild.previousSibling.nextSibling = oldChild.nextSibling
172        oldChild.nextSibling = oldChild.previousSibling = None
173        if oldChild.nodeType in _nodeTypes_with_children:
174            _clear_id_cache(self)
175
176        oldChild.parentNode = None
177        return oldChild
178
179    def normalize(self):
180        L = []
181        for child in self.childNodes:
182            if child.nodeType == Node.TEXT_NODE:
183                if not child.data:
184                    # empty text node; discard
185                    if L:
186                        L[-1].nextSibling = child.nextSibling
187                    if child.nextSibling:
188                        child.nextSibling.previousSibling = child.previousSibling
189                    child.unlink()
190                elif L and L[-1].nodeType == child.nodeType:
191                    # collapse text node
192                    node = L[-1]
193                    node.data = node.data + child.data
194                    node.nextSibling = child.nextSibling
195                    if child.nextSibling:
196                        child.nextSibling.previousSibling = node
197                    child.unlink()
198                else:
199                    L.append(child)
200            else:
201                L.append(child)
202                if child.nodeType == Node.ELEMENT_NODE:
203                    child.normalize()
204        self.childNodes[:] = L
205
206    def cloneNode(self, deep):
207        return _clone_node(self, deep, self.ownerDocument or self)
208
209    def isSupported(self, feature, version):
210        return self.ownerDocument.implementation.hasFeature(feature, version)
211
212    def _get_localName(self):
213        # Overridden in Element and Attr where localName can be Non-Null
214        return None
215
216    # Node interfaces from Level 3 (WD 9 April 2002)
217
218    def isSameNode(self, other):
219        return self is other
220
221    def getInterface(self, feature):
222        if self.isSupported(feature, None):
223            return self
224        else:
225            return None
226
227    # The "user data" functions use a dictionary that is only present
228    # if some user data has been set, so be careful not to assume it
229    # exists.
230
231    def getUserData(self, key):
232        try:
233            return self._user_data[key][0]
234        except (AttributeError, KeyError):
235            return None
236
237    def setUserData(self, key, data, handler):
238        old = None
239        try:
240            d = self._user_data
241        except AttributeError:
242            d = {}
243            self._user_data = d
244        if key in d:
245            old = d[key][0]
246        if data is None:
247            # ignore handlers passed for None
248            handler = None
249            if old is not None:
250                del d[key]
251        else:
252            d[key] = (data, handler)
253        return old
254
255    def _call_user_data_handler(self, operation, src, dst):
256        if hasattr(self, "_user_data"):
257            for key, (data, handler) in list(self._user_data.items()):
258                if handler is not None:
259                    handler.handle(operation, key, data, src, dst)
260
261    # minidom-specific API:
262
263    def unlink(self):
264        self.parentNode = self.ownerDocument = None
265        if self.childNodes:
266            for child in self.childNodes:
267                child.unlink()
268            self.childNodes = NodeList()
269        self.previousSibling = None
270        self.nextSibling = None
271
272    # A Node is its own context manager, to ensure that an unlink() call occurs.
273    # This is similar to how a file object works.
274    def __enter__(self):
275        return self
276
277    def __exit__(self, et, ev, tb):
278        self.unlink()
279
280defproperty(Node, "firstChild", doc="First child node, or None.")
281defproperty(Node, "lastChild",  doc="Last child node, or None.")
282defproperty(Node, "localName",  doc="Namespace-local name of this node.")
283
284
285def _append_child(self, node):
286    # fast path with less checks; usable by DOM builders if careful
287    childNodes = self.childNodes
288    if childNodes:
289        last = childNodes[-1]
290        node.previousSibling = last
291        last.nextSibling = node
292    childNodes.append(node)
293    node.parentNode = self
294
295def _in_document(node):
296    # return True iff node is part of a document tree
297    while node is not None:
298        if node.nodeType == Node.DOCUMENT_NODE:
299            return True
300        node = node.parentNode
301    return False
302
303def _write_data(writer, data):
304    "Writes datachars to writer."
305    if data:
306        data = data.replace("&", "&amp;").replace("<", "&lt;"). \
307                    replace("\"", "&quot;").replace(">", "&gt;")
308        writer.write(data)
309
310def _get_elements_by_tagName_helper(parent, name, rc):
311    for node in parent.childNodes:
312        if node.nodeType == Node.ELEMENT_NODE and \
313            (name == "*" or node.tagName == name):
314            rc.append(node)
315        _get_elements_by_tagName_helper(node, name, rc)
316    return rc
317
318def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
319    for node in parent.childNodes:
320        if node.nodeType == Node.ELEMENT_NODE:
321            if ((localName == "*" or node.localName == localName) and
322                (nsURI == "*" or node.namespaceURI == nsURI)):
323                rc.append(node)
324            _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
325    return rc
326
327class DocumentFragment(Node):
328    nodeType = Node.DOCUMENT_FRAGMENT_NODE
329    nodeName = "#document-fragment"
330    nodeValue = None
331    attributes = None
332    parentNode = None
333    _child_node_types = (Node.ELEMENT_NODE,
334                         Node.TEXT_NODE,
335                         Node.CDATA_SECTION_NODE,
336                         Node.ENTITY_REFERENCE_NODE,
337                         Node.PROCESSING_INSTRUCTION_NODE,
338                         Node.COMMENT_NODE,
339                         Node.NOTATION_NODE)
340
341    def __init__(self):
342        self.childNodes = NodeList()
343
344
345class Attr(Node):
346    __slots__=('_name', '_value', 'namespaceURI',
347               '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
348    nodeType = Node.ATTRIBUTE_NODE
349    attributes = None
350    specified = False
351    _is_id = False
352
353    _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
354
355    def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
356                 prefix=None):
357        self.ownerElement = None
358        self._name = qName
359        self.namespaceURI = namespaceURI
360        self._prefix = prefix
361        if localName is not None:
362            self._localName = localName
363        self.childNodes = NodeList()
364
365        # Add the single child node that represents the value of the attr
366        self.childNodes.append(Text())
367
368        # nodeValue and value are set elsewhere
369
370    def _get_localName(self):
371        try:
372            return self._localName
373        except AttributeError:
374            return self.nodeName.split(":", 1)[-1]
375
376    def _get_specified(self):
377        return self.specified
378
379    def _get_name(self):
380        return self._name
381
382    def _set_name(self, value):
383        self._name = value
384        if self.ownerElement is not None:
385            _clear_id_cache(self.ownerElement)
386
387    nodeName = name = property(_get_name, _set_name)
388
389    def _get_value(self):
390        return self._value
391
392    def _set_value(self, value):
393        self._value = value
394        self.childNodes[0].data = value
395        if self.ownerElement is not None:
396            _clear_id_cache(self.ownerElement)
397        self.childNodes[0].data = value
398
399    nodeValue = value = property(_get_value, _set_value)
400
401    def _get_prefix(self):
402        return self._prefix
403
404    def _set_prefix(self, prefix):
405        nsuri = self.namespaceURI
406        if prefix == "xmlns":
407            if nsuri and nsuri != XMLNS_NAMESPACE:
408                raise xml.dom.NamespaceErr(
409                    "illegal use of 'xmlns' prefix for the wrong namespace")
410        self._prefix = prefix
411        if prefix is None:
412            newName = self.localName
413        else:
414            newName = "%s:%s" % (prefix, self.localName)
415        if self.ownerElement:
416            _clear_id_cache(self.ownerElement)
417        self.name = newName
418
419    prefix = property(_get_prefix, _set_prefix)
420
421    def unlink(self):
422        # This implementation does not call the base implementation
423        # since most of that is not needed, and the expense of the
424        # method call is not warranted.  We duplicate the removal of
425        # children, but that's all we needed from the base class.
426        elem = self.ownerElement
427        if elem is not None:
428            del elem._attrs[self.nodeName]
429            del elem._attrsNS[(self.namespaceURI, self.localName)]
430            if self._is_id:
431                self._is_id = False
432                elem._magic_id_nodes -= 1
433                self.ownerDocument._magic_id_count -= 1
434        for child in self.childNodes:
435            child.unlink()
436        del self.childNodes[:]
437
438    def _get_isId(self):
439        if self._is_id:
440            return True
441        doc = self.ownerDocument
442        elem = self.ownerElement
443        if doc is None or elem is None:
444            return False
445
446        info = doc._get_elem_info(elem)
447        if info is None:
448            return False
449        if self.namespaceURI:
450            return info.isIdNS(self.namespaceURI, self.localName)
451        else:
452            return info.isId(self.nodeName)
453
454    def _get_schemaType(self):
455        doc = self.ownerDocument
456        elem = self.ownerElement
457        if doc is None or elem is None:
458            return _no_type
459
460        info = doc._get_elem_info(elem)
461        if info is None:
462            return _no_type
463        if self.namespaceURI:
464            return info.getAttributeTypeNS(self.namespaceURI, self.localName)
465        else:
466            return info.getAttributeType(self.nodeName)
467
468defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
469defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
470defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
471
472
473class NamedNodeMap(object):
474    """The attribute list is a transient interface to the underlying
475    dictionaries.  Mutations here will change the underlying element's
476    dictionary.
477
478    Ordering is imposed artificially and does not reflect the order of
479    attributes as found in an input document.
480    """
481
482    __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
483
484    def __init__(self, attrs, attrsNS, ownerElement):
485        self._attrs = attrs
486        self._attrsNS = attrsNS
487        self._ownerElement = ownerElement
488
489    def _get_length(self):
490        return len(self._attrs)
491
492    def item(self, index):
493        try:
494            return self[list(self._attrs.keys())[index]]
495        except IndexError:
496            return None
497
498    def items(self):
499        L = []
500        for node in self._attrs.values():
501            L.append((node.nodeName, node.value))
502        return L
503
504    def itemsNS(self):
505        L = []
506        for node in self._attrs.values():
507            L.append(((node.namespaceURI, node.localName), node.value))
508        return L
509
510    def __contains__(self, key):
511        if isinstance(key, str):
512            return key in self._attrs
513        else:
514            return key in self._attrsNS
515
516    def keys(self):
517        return self._attrs.keys()
518
519    def keysNS(self):
520        return self._attrsNS.keys()
521
522    def values(self):
523        return self._attrs.values()
524
525    def get(self, name, value=None):
526        return self._attrs.get(name, value)
527
528    __len__ = _get_length
529
530    def _cmp(self, other):
531        if self._attrs is getattr(other, "_attrs", None):
532            return 0
533        else:
534            return (id(self) > id(other)) - (id(self) < id(other))
535
536    def __eq__(self, other):
537        return self._cmp(other) == 0
538
539    def __ge__(self, other):
540        return self._cmp(other) >= 0
541
542    def __gt__(self, other):
543        return self._cmp(other) > 0
544
545    def __le__(self, other):
546        return self._cmp(other) <= 0
547
548    def __lt__(self, other):
549        return self._cmp(other) < 0
550
551    def __getitem__(self, attname_or_tuple):
552        if isinstance(attname_or_tuple, tuple):
553            return self._attrsNS[attname_or_tuple]
554        else:
555            return self._attrs[attname_or_tuple]
556
557    # same as set
558    def __setitem__(self, attname, value):
559        if isinstance(value, str):
560            try:
561                node = self._attrs[attname]
562            except KeyError:
563                node = Attr(attname)
564                node.ownerDocument = self._ownerElement.ownerDocument
565                self.setNamedItem(node)
566            node.value = value
567        else:
568            if not isinstance(value, Attr):
569                raise TypeError("value must be a string or Attr object")
570            node = value
571            self.setNamedItem(node)
572
573    def getNamedItem(self, name):
574        try:
575            return self._attrs[name]
576        except KeyError:
577            return None
578
579    def getNamedItemNS(self, namespaceURI, localName):
580        try:
581            return self._attrsNS[(namespaceURI, localName)]
582        except KeyError:
583            return None
584
585    def removeNamedItem(self, name):
586        n = self.getNamedItem(name)
587        if n is not None:
588            _clear_id_cache(self._ownerElement)
589            del self._attrs[n.nodeName]
590            del self._attrsNS[(n.namespaceURI, n.localName)]
591            if hasattr(n, 'ownerElement'):
592                n.ownerElement = None
593            return n
594        else:
595            raise xml.dom.NotFoundErr()
596
597    def removeNamedItemNS(self, namespaceURI, localName):
598        n = self.getNamedItemNS(namespaceURI, localName)
599        if n is not None:
600            _clear_id_cache(self._ownerElement)
601            del self._attrsNS[(n.namespaceURI, n.localName)]
602            del self._attrs[n.nodeName]
603            if hasattr(n, 'ownerElement'):
604                n.ownerElement = None
605            return n
606        else:
607            raise xml.dom.NotFoundErr()
608
609    def setNamedItem(self, node):
610        if not isinstance(node, Attr):
611            raise xml.dom.HierarchyRequestErr(
612                "%s cannot be child of %s" % (repr(node), repr(self)))
613        old = self._attrs.get(node.name)
614        if old:
615            old.unlink()
616        self._attrs[node.name] = node
617        self._attrsNS[(node.namespaceURI, node.localName)] = node
618        node.ownerElement = self._ownerElement
619        _clear_id_cache(node.ownerElement)
620        return old
621
622    def setNamedItemNS(self, node):
623        return self.setNamedItem(node)
624
625    def __delitem__(self, attname_or_tuple):
626        node = self[attname_or_tuple]
627        _clear_id_cache(node.ownerElement)
628        node.unlink()
629
630    def __getstate__(self):
631        return self._attrs, self._attrsNS, self._ownerElement
632
633    def __setstate__(self, state):
634        self._attrs, self._attrsNS, self._ownerElement = state
635
636defproperty(NamedNodeMap, "length",
637            doc="Number of nodes in the NamedNodeMap.")
638
639AttributeList = NamedNodeMap
640
641
642class TypeInfo(object):
643    __slots__ = 'namespace', 'name'
644
645    def __init__(self, namespace, name):
646        self.namespace = namespace
647        self.name = name
648
649    def __repr__(self):
650        if self.namespace:
651            return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
652                                          self.namespace)
653        else:
654            return "<%s %r>" % (self.__class__.__name__, self.name)
655
656    def _get_name(self):
657        return self.name
658
659    def _get_namespace(self):
660        return self.namespace
661
662_no_type = TypeInfo(None, None)
663
664class Element(Node):
665    __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
666               'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
667               'nextSibling', 'previousSibling')
668    nodeType = Node.ELEMENT_NODE
669    nodeValue = None
670    schemaType = _no_type
671
672    _magic_id_nodes = 0
673
674    _child_node_types = (Node.ELEMENT_NODE,
675                         Node.PROCESSING_INSTRUCTION_NODE,
676                         Node.COMMENT_NODE,
677                         Node.TEXT_NODE,
678                         Node.CDATA_SECTION_NODE,
679                         Node.ENTITY_REFERENCE_NODE)
680
681    def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
682                 localName=None):
683        self.parentNode = None
684        self.tagName = self.nodeName = tagName
685        self.prefix = prefix
686        self.namespaceURI = namespaceURI
687        self.childNodes = NodeList()
688        self.nextSibling = self.previousSibling = None
689
690        # Attribute dictionaries are lazily created
691        # attributes are double-indexed:
692        #    tagName -> Attribute
693        #    URI,localName -> Attribute
694        # in the future: consider lazy generation
695        # of attribute objects this is too tricky
696        # for now because of headaches with
697        # namespaces.
698        self._attrs = None
699        self._attrsNS = None
700
701    def _ensure_attributes(self):
702        if self._attrs is None:
703            self._attrs = {}
704            self._attrsNS = {}
705
706    def _get_localName(self):
707        try:
708            return self._localName
709        except AttributeError:
710            return self.tagName.split(":", 1)[-1]
711
712    def _get_tagName(self):
713        return self.tagName
714
715    def unlink(self):
716        if self._attrs is not None:
717            for attr in list(self._attrs.values()):
718                attr.unlink()
719        self._attrs = None
720        self._attrsNS = None
721        Node.unlink(self)
722
723    def getAttribute(self, attname):
724        """Returns the value of the specified attribute.
725
726        Returns the value of the element's attribute named attname as
727        a string. An empty string is returned if the element does not
728        have such an attribute. Note that an empty string may also be
729        returned as an explicitly given attribute value, use the
730        hasAttribute method to distinguish these two cases.
731        """
732        if self._attrs is None:
733            return ""
734        try:
735            return self._attrs[attname].value
736        except KeyError:
737            return ""
738
739    def getAttributeNS(self, namespaceURI, localName):
740        if self._attrsNS is None:
741            return ""
742        try:
743            return self._attrsNS[(namespaceURI, localName)].value
744        except KeyError:
745            return ""
746
747    def setAttribute(self, attname, value):
748        attr = self.getAttributeNode(attname)
749        if attr is None:
750            attr = Attr(attname)
751            attr.value = value # also sets nodeValue
752            attr.ownerDocument = self.ownerDocument
753            self.setAttributeNode(attr)
754        elif value != attr.value:
755            attr.value = value
756            if attr.isId:
757                _clear_id_cache(self)
758
759    def setAttributeNS(self, namespaceURI, qualifiedName, value):
760        prefix, localname = _nssplit(qualifiedName)
761        attr = self.getAttributeNodeNS(namespaceURI, localname)
762        if attr is None:
763            attr = Attr(qualifiedName, namespaceURI, localname, prefix)
764            attr.value = value
765            attr.ownerDocument = self.ownerDocument
766            self.setAttributeNode(attr)
767        else:
768            if value != attr.value:
769                attr.value = value
770                if attr.isId:
771                    _clear_id_cache(self)
772            if attr.prefix != prefix:
773                attr.prefix = prefix
774                attr.nodeName = qualifiedName
775
776    def getAttributeNode(self, attrname):
777        if self._attrs is None:
778            return None
779        return self._attrs.get(attrname)
780
781    def getAttributeNodeNS(self, namespaceURI, localName):
782        if self._attrsNS is None:
783            return None
784        return self._attrsNS.get((namespaceURI, localName))
785
786    def setAttributeNode(self, attr):
787        if attr.ownerElement not in (None, self):
788            raise xml.dom.InuseAttributeErr("attribute node already owned")
789        self._ensure_attributes()
790        old1 = self._attrs.get(attr.name, None)
791        if old1 is not None:
792            self.removeAttributeNode(old1)
793        old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
794        if old2 is not None and old2 is not old1:
795            self.removeAttributeNode(old2)
796        _set_attribute_node(self, attr)
797
798        if old1 is not attr:
799            # It might have already been part of this node, in which case
800            # it doesn't represent a change, and should not be returned.
801            return old1
802        if old2 is not attr:
803            return old2
804
805    setAttributeNodeNS = setAttributeNode
806
807    def removeAttribute(self, name):
808        if self._attrsNS is None:
809            raise xml.dom.NotFoundErr()
810        try:
811            attr = self._attrs[name]
812        except KeyError:
813            raise xml.dom.NotFoundErr()
814        self.removeAttributeNode(attr)
815
816    def removeAttributeNS(self, namespaceURI, localName):
817        if self._attrsNS is None:
818            raise xml.dom.NotFoundErr()
819        try:
820            attr = self._attrsNS[(namespaceURI, localName)]
821        except KeyError:
822            raise xml.dom.NotFoundErr()
823        self.removeAttributeNode(attr)
824
825    def removeAttributeNode(self, node):
826        if node is None:
827            raise xml.dom.NotFoundErr()
828        try:
829            self._attrs[node.name]
830        except KeyError:
831            raise xml.dom.NotFoundErr()
832        _clear_id_cache(self)
833        node.unlink()
834        # Restore this since the node is still useful and otherwise
835        # unlinked
836        node.ownerDocument = self.ownerDocument
837        return node
838
839    removeAttributeNodeNS = removeAttributeNode
840
841    def hasAttribute(self, name):
842        """Checks whether the element has an attribute with the specified name.
843
844        Returns True if the element has an attribute with the specified name.
845        Otherwise, returns False.
846        """
847        if self._attrs is None:
848            return False
849        return name in self._attrs
850
851    def hasAttributeNS(self, namespaceURI, localName):
852        if self._attrsNS is None:
853            return False
854        return (namespaceURI, localName) in self._attrsNS
855
856    def getElementsByTagName(self, name):
857        """Returns all descendant elements with the given tag name.
858
859        Returns the list of all descendant elements (not direct children
860        only) with the specified tag name.
861        """
862        return _get_elements_by_tagName_helper(self, name, NodeList())
863
864    def getElementsByTagNameNS(self, namespaceURI, localName):
865        return _get_elements_by_tagName_ns_helper(
866            self, namespaceURI, localName, NodeList())
867
868    def __repr__(self):
869        return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
870
871    def writexml(self, writer, indent="", addindent="", newl=""):
872        """Write an XML element to a file-like object
873
874        Write the element to the writer object that must provide
875        a write method (e.g. a file or StringIO object).
876        """
877        # indent = current indentation
878        # addindent = indentation to add to higher levels
879        # newl = newline string
880        writer.write(indent+"<" + self.tagName)
881
882        attrs = self._get_attributes()
883
884        for a_name in attrs.keys():
885            writer.write(" %s=\"" % a_name)
886            _write_data(writer, attrs[a_name].value)
887            writer.write("\"")
888        if self.childNodes:
889            writer.write(">")
890            if (len(self.childNodes) == 1 and
891                self.childNodes[0].nodeType in (
892                        Node.TEXT_NODE, Node.CDATA_SECTION_NODE)):
893                self.childNodes[0].writexml(writer, '', '', '')
894            else:
895                writer.write(newl)
896                for node in self.childNodes:
897                    node.writexml(writer, indent+addindent, addindent, newl)
898                writer.write(indent)
899            writer.write("</%s>%s" % (self.tagName, newl))
900        else:
901            writer.write("/>%s"%(newl))
902
903    def _get_attributes(self):
904        self._ensure_attributes()
905        return NamedNodeMap(self._attrs, self._attrsNS, self)
906
907    def hasAttributes(self):
908        if self._attrs:
909            return True
910        else:
911            return False
912
913    # DOM Level 3 attributes, based on the 22 Oct 2002 draft
914
915    def setIdAttribute(self, name):
916        idAttr = self.getAttributeNode(name)
917        self.setIdAttributeNode(idAttr)
918
919    def setIdAttributeNS(self, namespaceURI, localName):
920        idAttr = self.getAttributeNodeNS(namespaceURI, localName)
921        self.setIdAttributeNode(idAttr)
922
923    def setIdAttributeNode(self, idAttr):
924        if idAttr is None or not self.isSameNode(idAttr.ownerElement):
925            raise xml.dom.NotFoundErr()
926        if _get_containing_entref(self) is not None:
927            raise xml.dom.NoModificationAllowedErr()
928        if not idAttr._is_id:
929            idAttr._is_id = True
930            self._magic_id_nodes += 1
931            self.ownerDocument._magic_id_count += 1
932            _clear_id_cache(self)
933
934defproperty(Element, "attributes",
935            doc="NamedNodeMap of attributes on the element.")
936defproperty(Element, "localName",
937            doc="Namespace-local name of this element.")
938
939
940def _set_attribute_node(element, attr):
941    _clear_id_cache(element)
942    element._ensure_attributes()
943    element._attrs[attr.name] = attr
944    element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
945
946    # This creates a circular reference, but Element.unlink()
947    # breaks the cycle since the references to the attribute
948    # dictionaries are tossed.
949    attr.ownerElement = element
950
951class Childless:
952    """Mixin that makes childless-ness easy to implement and avoids
953    the complexity of the Node methods that deal with children.
954    """
955    __slots__ = ()
956
957    attributes = None
958    childNodes = EmptyNodeList()
959    firstChild = None
960    lastChild = None
961
962    def _get_firstChild(self):
963        return None
964
965    def _get_lastChild(self):
966        return None
967
968    def appendChild(self, node):
969        raise xml.dom.HierarchyRequestErr(
970            self.nodeName + " nodes cannot have children")
971
972    def hasChildNodes(self):
973        return False
974
975    def insertBefore(self, newChild, refChild):
976        raise xml.dom.HierarchyRequestErr(
977            self.nodeName + " nodes do not have children")
978
979    def removeChild(self, oldChild):
980        raise xml.dom.NotFoundErr(
981            self.nodeName + " nodes do not have children")
982
983    def normalize(self):
984        # For childless nodes, normalize() has nothing to do.
985        pass
986
987    def replaceChild(self, newChild, oldChild):
988        raise xml.dom.HierarchyRequestErr(
989            self.nodeName + " nodes do not have children")
990
991
992class ProcessingInstruction(Childless, Node):
993    nodeType = Node.PROCESSING_INSTRUCTION_NODE
994    __slots__ = ('target', 'data')
995
996    def __init__(self, target, data):
997        self.target = target
998        self.data = data
999
1000    # nodeValue is an alias for data
1001    def _get_nodeValue(self):
1002        return self.data
1003    def _set_nodeValue(self, value):
1004        self.data = value
1005    nodeValue = property(_get_nodeValue, _set_nodeValue)
1006
1007    # nodeName is an alias for target
1008    def _get_nodeName(self):
1009        return self.target
1010    def _set_nodeName(self, value):
1011        self.target = value
1012    nodeName = property(_get_nodeName, _set_nodeName)
1013
1014    def writexml(self, writer, indent="", addindent="", newl=""):
1015        writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
1016
1017
1018class CharacterData(Childless, Node):
1019    __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
1020
1021    def __init__(self):
1022        self.ownerDocument = self.parentNode = None
1023        self.previousSibling = self.nextSibling = None
1024        self._data = ''
1025        Node.__init__(self)
1026
1027    def _get_length(self):
1028        return len(self.data)
1029    __len__ = _get_length
1030
1031    def _get_data(self):
1032        return self._data
1033    def _set_data(self, data):
1034        self._data = data
1035
1036    data = nodeValue = property(_get_data, _set_data)
1037
1038    def __repr__(self):
1039        data = self.data
1040        if len(data) > 10:
1041            dotdotdot = "..."
1042        else:
1043            dotdotdot = ""
1044        return '<DOM %s node "%r%s">' % (
1045            self.__class__.__name__, data[0:10], dotdotdot)
1046
1047    def substringData(self, offset, count):
1048        if offset < 0:
1049            raise xml.dom.IndexSizeErr("offset cannot be negative")
1050        if offset >= len(self.data):
1051            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1052        if count < 0:
1053            raise xml.dom.IndexSizeErr("count cannot be negative")
1054        return self.data[offset:offset+count]
1055
1056    def appendData(self, arg):
1057        self.data = self.data + arg
1058
1059    def insertData(self, offset, arg):
1060        if offset < 0:
1061            raise xml.dom.IndexSizeErr("offset cannot be negative")
1062        if offset >= len(self.data):
1063            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1064        if arg:
1065            self.data = "%s%s%s" % (
1066                self.data[:offset], arg, self.data[offset:])
1067
1068    def deleteData(self, offset, count):
1069        if offset < 0:
1070            raise xml.dom.IndexSizeErr("offset cannot be negative")
1071        if offset >= len(self.data):
1072            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1073        if count < 0:
1074            raise xml.dom.IndexSizeErr("count cannot be negative")
1075        if count:
1076            self.data = self.data[:offset] + self.data[offset+count:]
1077
1078    def replaceData(self, offset, count, arg):
1079        if offset < 0:
1080            raise xml.dom.IndexSizeErr("offset cannot be negative")
1081        if offset >= len(self.data):
1082            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1083        if count < 0:
1084            raise xml.dom.IndexSizeErr("count cannot be negative")
1085        if count:
1086            self.data = "%s%s%s" % (
1087                self.data[:offset], arg, self.data[offset+count:])
1088
1089defproperty(CharacterData, "length", doc="Length of the string data.")
1090
1091
1092class Text(CharacterData):
1093    __slots__ = ()
1094
1095    nodeType = Node.TEXT_NODE
1096    nodeName = "#text"
1097    attributes = None
1098
1099    def splitText(self, offset):
1100        if offset < 0 or offset > len(self.data):
1101            raise xml.dom.IndexSizeErr("illegal offset value")
1102        newText = self.__class__()
1103        newText.data = self.data[offset:]
1104        newText.ownerDocument = self.ownerDocument
1105        next = self.nextSibling
1106        if self.parentNode and self in self.parentNode.childNodes:
1107            if next is None:
1108                self.parentNode.appendChild(newText)
1109            else:
1110                self.parentNode.insertBefore(newText, next)
1111        self.data = self.data[:offset]
1112        return newText
1113
1114    def writexml(self, writer, indent="", addindent="", newl=""):
1115        _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1116
1117    # DOM Level 3 (WD 9 April 2002)
1118
1119    def _get_wholeText(self):
1120        L = [self.data]
1121        n = self.previousSibling
1122        while n is not None:
1123            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1124                L.insert(0, n.data)
1125                n = n.previousSibling
1126            else:
1127                break
1128        n = self.nextSibling
1129        while n is not None:
1130            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1131                L.append(n.data)
1132                n = n.nextSibling
1133            else:
1134                break
1135        return ''.join(L)
1136
1137    def replaceWholeText(self, content):
1138        # XXX This needs to be seriously changed if minidom ever
1139        # supports EntityReference nodes.
1140        parent = self.parentNode
1141        n = self.previousSibling
1142        while n is not None:
1143            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1144                next = n.previousSibling
1145                parent.removeChild(n)
1146                n = next
1147            else:
1148                break
1149        n = self.nextSibling
1150        if not content:
1151            parent.removeChild(self)
1152        while n is not None:
1153            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1154                next = n.nextSibling
1155                parent.removeChild(n)
1156                n = next
1157            else:
1158                break
1159        if content:
1160            self.data = content
1161            return self
1162        else:
1163            return None
1164
1165    def _get_isWhitespaceInElementContent(self):
1166        if self.data.strip():
1167            return False
1168        elem = _get_containing_element(self)
1169        if elem is None:
1170            return False
1171        info = self.ownerDocument._get_elem_info(elem)
1172        if info is None:
1173            return False
1174        else:
1175            return info.isElementContent()
1176
1177defproperty(Text, "isWhitespaceInElementContent",
1178            doc="True iff this text node contains only whitespace"
1179                " and is in element content.")
1180defproperty(Text, "wholeText",
1181            doc="The text of all logically-adjacent text nodes.")
1182
1183
1184def _get_containing_element(node):
1185    c = node.parentNode
1186    while c is not None:
1187        if c.nodeType == Node.ELEMENT_NODE:
1188            return c
1189        c = c.parentNode
1190    return None
1191
1192def _get_containing_entref(node):
1193    c = node.parentNode
1194    while c is not None:
1195        if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1196            return c
1197        c = c.parentNode
1198    return None
1199
1200
1201class Comment(CharacterData):
1202    nodeType = Node.COMMENT_NODE
1203    nodeName = "#comment"
1204
1205    def __init__(self, data):
1206        CharacterData.__init__(self)
1207        self._data = data
1208
1209    def writexml(self, writer, indent="", addindent="", newl=""):
1210        if "--" in self.data:
1211            raise ValueError("'--' is not allowed in a comment node")
1212        writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1213
1214
1215class CDATASection(Text):
1216    __slots__ = ()
1217
1218    nodeType = Node.CDATA_SECTION_NODE
1219    nodeName = "#cdata-section"
1220
1221    def writexml(self, writer, indent="", addindent="", newl=""):
1222        if self.data.find("]]>") >= 0:
1223            raise ValueError("']]>' not allowed in a CDATA section")
1224        writer.write("<![CDATA[%s]]>" % self.data)
1225
1226
1227class ReadOnlySequentialNamedNodeMap(object):
1228    __slots__ = '_seq',
1229
1230    def __init__(self, seq=()):
1231        # seq should be a list or tuple
1232        self._seq = seq
1233
1234    def __len__(self):
1235        return len(self._seq)
1236
1237    def _get_length(self):
1238        return len(self._seq)
1239
1240    def getNamedItem(self, name):
1241        for n in self._seq:
1242            if n.nodeName == name:
1243                return n
1244
1245    def getNamedItemNS(self, namespaceURI, localName):
1246        for n in self._seq:
1247            if n.namespaceURI == namespaceURI and n.localName == localName:
1248                return n
1249
1250    def __getitem__(self, name_or_tuple):
1251        if isinstance(name_or_tuple, tuple):
1252            node = self.getNamedItemNS(*name_or_tuple)
1253        else:
1254            node = self.getNamedItem(name_or_tuple)
1255        if node is None:
1256            raise KeyError(name_or_tuple)
1257        return node
1258
1259    def item(self, index):
1260        if index < 0:
1261            return None
1262        try:
1263            return self._seq[index]
1264        except IndexError:
1265            return None
1266
1267    def removeNamedItem(self, name):
1268        raise xml.dom.NoModificationAllowedErr(
1269            "NamedNodeMap instance is read-only")
1270
1271    def removeNamedItemNS(self, namespaceURI, localName):
1272        raise xml.dom.NoModificationAllowedErr(
1273            "NamedNodeMap instance is read-only")
1274
1275    def setNamedItem(self, node):
1276        raise xml.dom.NoModificationAllowedErr(
1277            "NamedNodeMap instance is read-only")
1278
1279    def setNamedItemNS(self, node):
1280        raise xml.dom.NoModificationAllowedErr(
1281            "NamedNodeMap instance is read-only")
1282
1283    def __getstate__(self):
1284        return [self._seq]
1285
1286    def __setstate__(self, state):
1287        self._seq = state[0]
1288
1289defproperty(ReadOnlySequentialNamedNodeMap, "length",
1290            doc="Number of entries in the NamedNodeMap.")
1291
1292
1293class Identified:
1294    """Mix-in class that supports the publicId and systemId attributes."""
1295
1296    __slots__ = 'publicId', 'systemId'
1297
1298    def _identified_mixin_init(self, publicId, systemId):
1299        self.publicId = publicId
1300        self.systemId = systemId
1301
1302    def _get_publicId(self):
1303        return self.publicId
1304
1305    def _get_systemId(self):
1306        return self.systemId
1307
1308class DocumentType(Identified, Childless, Node):
1309    nodeType = Node.DOCUMENT_TYPE_NODE
1310    nodeValue = None
1311    name = None
1312    publicId = None
1313    systemId = None
1314    internalSubset = None
1315
1316    def __init__(self, qualifiedName):
1317        self.entities = ReadOnlySequentialNamedNodeMap()
1318        self.notations = ReadOnlySequentialNamedNodeMap()
1319        if qualifiedName:
1320            prefix, localname = _nssplit(qualifiedName)
1321            self.name = localname
1322        self.nodeName = self.name
1323
1324    def _get_internalSubset(self):
1325        return self.internalSubset
1326
1327    def cloneNode(self, deep):
1328        if self.ownerDocument is None:
1329            # it's ok
1330            clone = DocumentType(None)
1331            clone.name = self.name
1332            clone.nodeName = self.name
1333            operation = xml.dom.UserDataHandler.NODE_CLONED
1334            if deep:
1335                clone.entities._seq = []
1336                clone.notations._seq = []
1337                for n in self.notations._seq:
1338                    notation = Notation(n.nodeName, n.publicId, n.systemId)
1339                    clone.notations._seq.append(notation)
1340                    n._call_user_data_handler(operation, n, notation)
1341                for e in self.entities._seq:
1342                    entity = Entity(e.nodeName, e.publicId, e.systemId,
1343                                    e.notationName)
1344                    entity.actualEncoding = e.actualEncoding
1345                    entity.encoding = e.encoding
1346                    entity.version = e.version
1347                    clone.entities._seq.append(entity)
1348                    e._call_user_data_handler(operation, e, entity)
1349            self._call_user_data_handler(operation, self, clone)
1350            return clone
1351        else:
1352            return None
1353
1354    def writexml(self, writer, indent="", addindent="", newl=""):
1355        writer.write("<!DOCTYPE ")
1356        writer.write(self.name)
1357        if self.publicId:
1358            writer.write("%s  PUBLIC '%s'%s  '%s'"
1359                         % (newl, self.publicId, newl, self.systemId))
1360        elif self.systemId:
1361            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
1362        if self.internalSubset is not None:
1363            writer.write(" [")
1364            writer.write(self.internalSubset)
1365            writer.write("]")
1366        writer.write(">"+newl)
1367
1368class Entity(Identified, Node):
1369    attributes = None
1370    nodeType = Node.ENTITY_NODE
1371    nodeValue = None
1372
1373    actualEncoding = None
1374    encoding = None
1375    version = None
1376
1377    def __init__(self, name, publicId, systemId, notation):
1378        self.nodeName = name
1379        self.notationName = notation
1380        self.childNodes = NodeList()
1381        self._identified_mixin_init(publicId, systemId)
1382
1383    def _get_actualEncoding(self):
1384        return self.actualEncoding
1385
1386    def _get_encoding(self):
1387        return self.encoding
1388
1389    def _get_version(self):
1390        return self.version
1391
1392    def appendChild(self, newChild):
1393        raise xml.dom.HierarchyRequestErr(
1394            "cannot append children to an entity node")
1395
1396    def insertBefore(self, newChild, refChild):
1397        raise xml.dom.HierarchyRequestErr(
1398            "cannot insert children below an entity node")
1399
1400    def removeChild(self, oldChild):
1401        raise xml.dom.HierarchyRequestErr(
1402            "cannot remove children from an entity node")
1403
1404    def replaceChild(self, newChild, oldChild):
1405        raise xml.dom.HierarchyRequestErr(
1406            "cannot replace children of an entity node")
1407
1408class Notation(Identified, Childless, Node):
1409    nodeType = Node.NOTATION_NODE
1410    nodeValue = None
1411
1412    def __init__(self, name, publicId, systemId):
1413        self.nodeName = name
1414        self._identified_mixin_init(publicId, systemId)
1415
1416
1417class DOMImplementation(DOMImplementationLS):
1418    _features = [("core", "1.0"),
1419                 ("core", "2.0"),
1420                 ("core", None),
1421                 ("xml", "1.0"),
1422                 ("xml", "2.0"),
1423                 ("xml", None),
1424                 ("ls-load", "3.0"),
1425                 ("ls-load", None),
1426                 ]
1427
1428    def hasFeature(self, feature, version):
1429        if version == "":
1430            version = None
1431        return (feature.lower(), version) in self._features
1432
1433    def createDocument(self, namespaceURI, qualifiedName, doctype):
1434        if doctype and doctype.parentNode is not None:
1435            raise xml.dom.WrongDocumentErr(
1436                "doctype object owned by another DOM tree")
1437        doc = self._create_document()
1438
1439        add_root_element = not (namespaceURI is None
1440                                and qualifiedName is None
1441                                and doctype is None)
1442
1443        if not qualifiedName and add_root_element:
1444            # The spec is unclear what to raise here; SyntaxErr
1445            # would be the other obvious candidate. Since Xerces raises
1446            # InvalidCharacterErr, and since SyntaxErr is not listed
1447            # for createDocument, that seems to be the better choice.
1448            # XXX: need to check for illegal characters here and in
1449            # createElement.
1450
1451            # DOM Level III clears this up when talking about the return value
1452            # of this function.  If namespaceURI, qName and DocType are
1453            # Null the document is returned without a document element
1454            # Otherwise if doctype or namespaceURI are not None
1455            # Then we go back to the above problem
1456            raise xml.dom.InvalidCharacterErr("Element with no name")
1457
1458        if add_root_element:
1459            prefix, localname = _nssplit(qualifiedName)
1460            if prefix == "xml" \
1461               and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1462                raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1463            if prefix and not namespaceURI:
1464                raise xml.dom.NamespaceErr(
1465                    "illegal use of prefix without namespaces")
1466            element = doc.createElementNS(namespaceURI, qualifiedName)
1467            if doctype:
1468                doc.appendChild(doctype)
1469            doc.appendChild(element)
1470
1471        if doctype:
1472            doctype.parentNode = doctype.ownerDocument = doc
1473
1474        doc.doctype = doctype
1475        doc.implementation = self
1476        return doc
1477
1478    def createDocumentType(self, qualifiedName, publicId, systemId):
1479        doctype = DocumentType(qualifiedName)
1480        doctype.publicId = publicId
1481        doctype.systemId = systemId
1482        return doctype
1483
1484    # DOM Level 3 (WD 9 April 2002)
1485
1486    def getInterface(self, feature):
1487        if self.hasFeature(feature, None):
1488            return self
1489        else:
1490            return None
1491
1492    # internal
1493    def _create_document(self):
1494        return Document()
1495
1496class ElementInfo(object):
1497    """Object that represents content-model information for an element.
1498
1499    This implementation is not expected to be used in practice; DOM
1500    builders should provide implementations which do the right thing
1501    using information available to it.
1502
1503    """
1504
1505    __slots__ = 'tagName',
1506
1507    def __init__(self, name):
1508        self.tagName = name
1509
1510    def getAttributeType(self, aname):
1511        return _no_type
1512
1513    def getAttributeTypeNS(self, namespaceURI, localName):
1514        return _no_type
1515
1516    def isElementContent(self):
1517        return False
1518
1519    def isEmpty(self):
1520        """Returns true iff this element is declared to have an EMPTY
1521        content model."""
1522        return False
1523
1524    def isId(self, aname):
1525        """Returns true iff the named attribute is a DTD-style ID."""
1526        return False
1527
1528    def isIdNS(self, namespaceURI, localName):
1529        """Returns true iff the identified attribute is a DTD-style ID."""
1530        return False
1531
1532    def __getstate__(self):
1533        return self.tagName
1534
1535    def __setstate__(self, state):
1536        self.tagName = state
1537
1538def _clear_id_cache(node):
1539    if node.nodeType == Node.DOCUMENT_NODE:
1540        node._id_cache.clear()
1541        node._id_search_stack = None
1542    elif _in_document(node):
1543        node.ownerDocument._id_cache.clear()
1544        node.ownerDocument._id_search_stack= None
1545
1546class Document(Node, DocumentLS):
1547    __slots__ = ('_elem_info', 'doctype',
1548                 '_id_search_stack', 'childNodes', '_id_cache')
1549    _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1550                         Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1551
1552    implementation = DOMImplementation()
1553    nodeType = Node.DOCUMENT_NODE
1554    nodeName = "#document"
1555    nodeValue = None
1556    attributes = None
1557    parentNode = None
1558    previousSibling = nextSibling = None
1559
1560
1561    # Document attributes from Level 3 (WD 9 April 2002)
1562
1563    actualEncoding = None
1564    encoding = None
1565    standalone = None
1566    version = None
1567    strictErrorChecking = False
1568    errorHandler = None
1569    documentURI = None
1570
1571    _magic_id_count = 0
1572
1573    def __init__(self):
1574        self.doctype = None
1575        self.childNodes = NodeList()
1576        # mapping of (namespaceURI, localName) -> ElementInfo
1577        #        and tagName -> ElementInfo
1578        self._elem_info = {}
1579        self._id_cache = {}
1580        self._id_search_stack = None
1581
1582    def _get_elem_info(self, element):
1583        if element.namespaceURI:
1584            key = element.namespaceURI, element.localName
1585        else:
1586            key = element.tagName
1587        return self._elem_info.get(key)
1588
1589    def _get_actualEncoding(self):
1590        return self.actualEncoding
1591
1592    def _get_doctype(self):
1593        return self.doctype
1594
1595    def _get_documentURI(self):
1596        return self.documentURI
1597
1598    def _get_encoding(self):
1599        return self.encoding
1600
1601    def _get_errorHandler(self):
1602        return self.errorHandler
1603
1604    def _get_standalone(self):
1605        return self.standalone
1606
1607    def _get_strictErrorChecking(self):
1608        return self.strictErrorChecking
1609
1610    def _get_version(self):
1611        return self.version
1612
1613    def appendChild(self, node):
1614        if node.nodeType not in self._child_node_types:
1615            raise xml.dom.HierarchyRequestErr(
1616                "%s cannot be child of %s" % (repr(node), repr(self)))
1617        if node.parentNode is not None:
1618            # This needs to be done before the next test since this
1619            # may *be* the document element, in which case it should
1620            # end up re-ordered to the end.
1621            node.parentNode.removeChild(node)
1622
1623        if node.nodeType == Node.ELEMENT_NODE \
1624           and self._get_documentElement():
1625            raise xml.dom.HierarchyRequestErr(
1626                "two document elements disallowed")
1627        return Node.appendChild(self, node)
1628
1629    def removeChild(self, oldChild):
1630        try:
1631            self.childNodes.remove(oldChild)
1632        except ValueError:
1633            raise xml.dom.NotFoundErr()
1634        oldChild.nextSibling = oldChild.previousSibling = None
1635        oldChild.parentNode = None
1636        if self.documentElement is oldChild:
1637            self.documentElement = None
1638
1639        return oldChild
1640
1641    def _get_documentElement(self):
1642        for node in self.childNodes:
1643            if node.nodeType == Node.ELEMENT_NODE:
1644                return node
1645
1646    def unlink(self):
1647        if self.doctype is not None:
1648            self.doctype.unlink()
1649            self.doctype = None
1650        Node.unlink(self)
1651
1652    def cloneNode(self, deep):
1653        if not deep:
1654            return None
1655        clone = self.implementation.createDocument(None, None, None)
1656        clone.encoding = self.encoding
1657        clone.standalone = self.standalone
1658        clone.version = self.version
1659        for n in self.childNodes:
1660            childclone = _clone_node(n, deep, clone)
1661            assert childclone.ownerDocument.isSameNode(clone)
1662            clone.childNodes.append(childclone)
1663            if childclone.nodeType == Node.DOCUMENT_NODE:
1664                assert clone.documentElement is None
1665            elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1666                assert clone.doctype is None
1667                clone.doctype = childclone
1668            childclone.parentNode = clone
1669        self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1670                                     self, clone)
1671        return clone
1672
1673    def createDocumentFragment(self):
1674        d = DocumentFragment()
1675        d.ownerDocument = self
1676        return d
1677
1678    def createElement(self, tagName):
1679        e = Element(tagName)
1680        e.ownerDocument = self
1681        return e
1682
1683    def createTextNode(self, data):
1684        if not isinstance(data, str):
1685            raise TypeError("node contents must be a string")
1686        t = Text()
1687        t.data = data
1688        t.ownerDocument = self
1689        return t
1690
1691    def createCDATASection(self, data):
1692        if not isinstance(data, str):
1693            raise TypeError("node contents must be a string")
1694        c = CDATASection()
1695        c.data = data
1696        c.ownerDocument = self
1697        return c
1698
1699    def createComment(self, data):
1700        c = Comment(data)
1701        c.ownerDocument = self
1702        return c
1703
1704    def createProcessingInstruction(self, target, data):
1705        p = ProcessingInstruction(target, data)
1706        p.ownerDocument = self
1707        return p
1708
1709    def createAttribute(self, qName):
1710        a = Attr(qName)
1711        a.ownerDocument = self
1712        a.value = ""
1713        return a
1714
1715    def createElementNS(self, namespaceURI, qualifiedName):
1716        prefix, localName = _nssplit(qualifiedName)
1717        e = Element(qualifiedName, namespaceURI, prefix)
1718        e.ownerDocument = self
1719        return e
1720
1721    def createAttributeNS(self, namespaceURI, qualifiedName):
1722        prefix, localName = _nssplit(qualifiedName)
1723        a = Attr(qualifiedName, namespaceURI, localName, prefix)
1724        a.ownerDocument = self
1725        a.value = ""
1726        return a
1727
1728    # A couple of implementation-specific helpers to create node types
1729    # not supported by the W3C DOM specs:
1730
1731    def _create_entity(self, name, publicId, systemId, notationName):
1732        e = Entity(name, publicId, systemId, notationName)
1733        e.ownerDocument = self
1734        return e
1735
1736    def _create_notation(self, name, publicId, systemId):
1737        n = Notation(name, publicId, systemId)
1738        n.ownerDocument = self
1739        return n
1740
1741    def getElementById(self, id):
1742        if id in self._id_cache:
1743            return self._id_cache[id]
1744        if not (self._elem_info or self._magic_id_count):
1745            return None
1746
1747        stack = self._id_search_stack
1748        if stack is None:
1749            # we never searched before, or the cache has been cleared
1750            stack = [self.documentElement]
1751            self._id_search_stack = stack
1752        elif not stack:
1753            # Previous search was completed and cache is still valid;
1754            # no matching node.
1755            return None
1756
1757        result = None
1758        while stack:
1759            node = stack.pop()
1760            # add child elements to stack for continued searching
1761            stack.extend([child for child in node.childNodes
1762                          if child.nodeType in _nodeTypes_with_children])
1763            # check this node
1764            info = self._get_elem_info(node)
1765            if info:
1766                # We have to process all ID attributes before
1767                # returning in order to get all the attributes set to
1768                # be IDs using Element.setIdAttribute*().
1769                for attr in node.attributes.values():
1770                    if attr.namespaceURI:
1771                        if info.isIdNS(attr.namespaceURI, attr.localName):
1772                            self._id_cache[attr.value] = node
1773                            if attr.value == id:
1774                                result = node
1775                            elif not node._magic_id_nodes:
1776                                break
1777                    elif info.isId(attr.name):
1778                        self._id_cache[attr.value] = node
1779                        if attr.value == id:
1780                            result = node
1781                        elif not node._magic_id_nodes:
1782                            break
1783                    elif attr._is_id:
1784                        self._id_cache[attr.value] = node
1785                        if attr.value == id:
1786                            result = node
1787                        elif node._magic_id_nodes == 1:
1788                            break
1789            elif node._magic_id_nodes:
1790                for attr in node.attributes.values():
1791                    if attr._is_id:
1792                        self._id_cache[attr.value] = node
1793                        if attr.value == id:
1794                            result = node
1795            if result is not None:
1796                break
1797        return result
1798
1799    def getElementsByTagName(self, name):
1800        return _get_elements_by_tagName_helper(self, name, NodeList())
1801
1802    def getElementsByTagNameNS(self, namespaceURI, localName):
1803        return _get_elements_by_tagName_ns_helper(
1804            self, namespaceURI, localName, NodeList())
1805
1806    def isSupported(self, feature, version):
1807        return self.implementation.hasFeature(feature, version)
1808
1809    def importNode(self, node, deep):
1810        if node.nodeType == Node.DOCUMENT_NODE:
1811            raise xml.dom.NotSupportedErr("cannot import document nodes")
1812        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1813            raise xml.dom.NotSupportedErr("cannot import document type nodes")
1814        return _clone_node(node, deep, self)
1815
1816    def writexml(self, writer, indent="", addindent="", newl="", encoding=None,
1817                 standalone=None):
1818        declarations = []
1819
1820        if encoding:
1821            declarations.append(f'encoding="{encoding}"')
1822        if standalone is not None:
1823            declarations.append(f'standalone="{"yes" if standalone else "no"}"')
1824
1825        writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}')
1826
1827        for node in self.childNodes:
1828            node.writexml(writer, indent, addindent, newl)
1829
1830    # DOM Level 3 (WD 9 April 2002)
1831
1832    def renameNode(self, n, namespaceURI, name):
1833        if n.ownerDocument is not self:
1834            raise xml.dom.WrongDocumentErr(
1835                "cannot rename nodes from other documents;\n"
1836                "expected %s,\nfound %s" % (self, n.ownerDocument))
1837        if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1838            raise xml.dom.NotSupportedErr(
1839                "renameNode() only applies to element and attribute nodes")
1840        if namespaceURI != EMPTY_NAMESPACE:
1841            if ':' in name:
1842                prefix, localName = name.split(':', 1)
1843                if (  prefix == "xmlns"
1844                      and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1845                    raise xml.dom.NamespaceErr(
1846                        "illegal use of 'xmlns' prefix")
1847            else:
1848                if (  name == "xmlns"
1849                      and namespaceURI != xml.dom.XMLNS_NAMESPACE
1850                      and n.nodeType == Node.ATTRIBUTE_NODE):
1851                    raise xml.dom.NamespaceErr(
1852                        "illegal use of the 'xmlns' attribute")
1853                prefix = None
1854                localName = name
1855        else:
1856            prefix = None
1857            localName = None
1858        if n.nodeType == Node.ATTRIBUTE_NODE:
1859            element = n.ownerElement
1860            if element is not None:
1861                is_id = n._is_id
1862                element.removeAttributeNode(n)
1863        else:
1864            element = None
1865        n.prefix = prefix
1866        n._localName = localName
1867        n.namespaceURI = namespaceURI
1868        n.nodeName = name
1869        if n.nodeType == Node.ELEMENT_NODE:
1870            n.tagName = name
1871        else:
1872            # attribute node
1873            n.name = name
1874            if element is not None:
1875                element.setAttributeNode(n)
1876                if is_id:
1877                    element.setIdAttributeNode(n)
1878        # It's not clear from a semantic perspective whether we should
1879        # call the user data handlers for the NODE_RENAMED event since
1880        # we're re-using the existing node.  The draft spec has been
1881        # interpreted as meaning "no, don't call the handler unless a
1882        # new node is created."
1883        return n
1884
1885defproperty(Document, "documentElement",
1886            doc="Top-level element of this document.")
1887
1888
1889def _clone_node(node, deep, newOwnerDocument):
1890    """
1891    Clone a node and give it the new owner document.
1892    Called by Node.cloneNode and Document.importNode
1893    """
1894    if node.ownerDocument.isSameNode(newOwnerDocument):
1895        operation = xml.dom.UserDataHandler.NODE_CLONED
1896    else:
1897        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1898    if node.nodeType == Node.ELEMENT_NODE:
1899        clone = newOwnerDocument.createElementNS(node.namespaceURI,
1900                                                 node.nodeName)
1901        for attr in node.attributes.values():
1902            clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1903            a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1904            a.specified = attr.specified
1905
1906        if deep:
1907            for child in node.childNodes:
1908                c = _clone_node(child, deep, newOwnerDocument)
1909                clone.appendChild(c)
1910
1911    elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1912        clone = newOwnerDocument.createDocumentFragment()
1913        if deep:
1914            for child in node.childNodes:
1915                c = _clone_node(child, deep, newOwnerDocument)
1916                clone.appendChild(c)
1917
1918    elif node.nodeType == Node.TEXT_NODE:
1919        clone = newOwnerDocument.createTextNode(node.data)
1920    elif node.nodeType == Node.CDATA_SECTION_NODE:
1921        clone = newOwnerDocument.createCDATASection(node.data)
1922    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1923        clone = newOwnerDocument.createProcessingInstruction(node.target,
1924                                                             node.data)
1925    elif node.nodeType == Node.COMMENT_NODE:
1926        clone = newOwnerDocument.createComment(node.data)
1927    elif node.nodeType == Node.ATTRIBUTE_NODE:
1928        clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1929                                                   node.nodeName)
1930        clone.specified = True
1931        clone.value = node.value
1932    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1933        assert node.ownerDocument is not newOwnerDocument
1934        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1935        clone = newOwnerDocument.implementation.createDocumentType(
1936            node.name, node.publicId, node.systemId)
1937        clone.ownerDocument = newOwnerDocument
1938        if deep:
1939            clone.entities._seq = []
1940            clone.notations._seq = []
1941            for n in node.notations._seq:
1942                notation = Notation(n.nodeName, n.publicId, n.systemId)
1943                notation.ownerDocument = newOwnerDocument
1944                clone.notations._seq.append(notation)
1945                if hasattr(n, '_call_user_data_handler'):
1946                    n._call_user_data_handler(operation, n, notation)
1947            for e in node.entities._seq:
1948                entity = Entity(e.nodeName, e.publicId, e.systemId,
1949                                e.notationName)
1950                entity.actualEncoding = e.actualEncoding
1951                entity.encoding = e.encoding
1952                entity.version = e.version
1953                entity.ownerDocument = newOwnerDocument
1954                clone.entities._seq.append(entity)
1955                if hasattr(e, '_call_user_data_handler'):
1956                    e._call_user_data_handler(operation, e, entity)
1957    else:
1958        # Note the cloning of Document and DocumentType nodes is
1959        # implementation specific.  minidom handles those cases
1960        # directly in the cloneNode() methods.
1961        raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1962
1963    # Check for _call_user_data_handler() since this could conceivably
1964    # used with other DOM implementations (one of the FourThought
1965    # DOMs, perhaps?).
1966    if hasattr(node, '_call_user_data_handler'):
1967        node._call_user_data_handler(operation, node, clone)
1968    return clone
1969
1970
1971def _nssplit(qualifiedName):
1972    fields = qualifiedName.split(':', 1)
1973    if len(fields) == 2:
1974        return fields
1975    else:
1976        return (None, fields[0])
1977
1978
1979def _do_pulldom_parse(func, args, kwargs):
1980    events = func(*args, **kwargs)
1981    toktype, rootNode = events.getEvent()
1982    events.expandNode(rootNode)
1983    events.clear()
1984    return rootNode
1985
1986def parse(file, parser=None, bufsize=None):
1987    """Parse a file into a DOM by filename or file object."""
1988    if parser is None and not bufsize:
1989        from xml.dom import expatbuilder
1990        return expatbuilder.parse(file)
1991    else:
1992        from xml.dom import pulldom
1993        return _do_pulldom_parse(pulldom.parse, (file,),
1994            {'parser': parser, 'bufsize': bufsize})
1995
1996def parseString(string, parser=None):
1997    """Parse a file into a DOM from a string."""
1998    if parser is None:
1999        from xml.dom import expatbuilder
2000        return expatbuilder.parseString(string)
2001    else:
2002        from xml.dom import pulldom
2003        return _do_pulldom_parse(pulldom.parseString, (string,),
2004                                 {'parser': parser})
2005
2006def getDOMImplementation(features=None):
2007    if features:
2008        if isinstance(features, str):
2009            features = domreg._parse_feature_string(features)
2010        for f, v in features:
2011            if not Document.implementation.hasFeature(f, v):
2012                return None
2013    return Document.implementation
2014