1"""Simple implementation of the Level 1 DOM. 2 3Namespaces and other minor Level 2 features are also supported. 4 5parse("foo.xml") 6 7parseString("<foo><bar/></foo>") 8 9Todo: 10===== 11 * convenience methods for getting elements and text. 12 * more testing 13 * bring some of the writer and linearizer code into conformance with this 14 interface 15 * SAX 2 namespaces 16""" 17 18import io 19import xml.dom 20 21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 22from xml.dom.minicompat import * 23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 24 25# This is used by the ID-cache invalidation checks; the list isn't 26# actually complete, since the nodes being checked will never be the 27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 28# the node being added or removed, not the node being modified.) 29# 30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 31 xml.dom.Node.ENTITY_REFERENCE_NODE) 32 33 34class Node(xml.dom.Node): 35 namespaceURI = None # this is non-null only for elements and attributes 36 parentNode = None 37 ownerDocument = None 38 nextSibling = None 39 previousSibling = None 40 41 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 42 43 def __bool__(self): 44 return True 45 46 def toxml(self, encoding=None, standalone=None): 47 return self.toprettyxml("", "", encoding, standalone) 48 49 def toprettyxml(self, indent="\t", newl="\n", encoding=None, 50 standalone=None): 51 if encoding is None: 52 writer = io.StringIO() 53 else: 54 writer = io.TextIOWrapper(io.BytesIO(), 55 encoding=encoding, 56 errors="xmlcharrefreplace", 57 newline='\n') 58 if self.nodeType == Node.DOCUMENT_NODE: 59 # Can pass encoding only to document, to put it into XML header 60 self.writexml(writer, "", indent, newl, encoding, standalone) 61 else: 62 self.writexml(writer, "", indent, newl) 63 if encoding is None: 64 return writer.getvalue() 65 else: 66 return writer.detach().getvalue() 67 68 def hasChildNodes(self): 69 return bool(self.childNodes) 70 71 def _get_childNodes(self): 72 return self.childNodes 73 74 def _get_firstChild(self): 75 if self.childNodes: 76 return self.childNodes[0] 77 78 def _get_lastChild(self): 79 if self.childNodes: 80 return self.childNodes[-1] 81 82 def insertBefore(self, newChild, refChild): 83 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 84 for c in tuple(newChild.childNodes): 85 self.insertBefore(c, refChild) 86 ### The DOM does not clearly specify what to return in this case 87 return newChild 88 if newChild.nodeType not in self._child_node_types: 89 raise xml.dom.HierarchyRequestErr( 90 "%s cannot be child of %s" % (repr(newChild), repr(self))) 91 if newChild.parentNode is not None: 92 newChild.parentNode.removeChild(newChild) 93 if refChild is None: 94 self.appendChild(newChild) 95 else: 96 try: 97 index = self.childNodes.index(refChild) 98 except ValueError: 99 raise xml.dom.NotFoundErr() 100 if newChild.nodeType in _nodeTypes_with_children: 101 _clear_id_cache(self) 102 self.childNodes.insert(index, newChild) 103 newChild.nextSibling = refChild 104 refChild.previousSibling = newChild 105 if index: 106 node = self.childNodes[index-1] 107 node.nextSibling = newChild 108 newChild.previousSibling = node 109 else: 110 newChild.previousSibling = None 111 newChild.parentNode = self 112 return newChild 113 114 def appendChild(self, node): 115 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 116 for c in tuple(node.childNodes): 117 self.appendChild(c) 118 ### The DOM does not clearly specify what to return in this case 119 return node 120 if node.nodeType not in self._child_node_types: 121 raise xml.dom.HierarchyRequestErr( 122 "%s cannot be child of %s" % (repr(node), repr(self))) 123 elif node.nodeType in _nodeTypes_with_children: 124 _clear_id_cache(self) 125 if node.parentNode is not None: 126 node.parentNode.removeChild(node) 127 _append_child(self, node) 128 node.nextSibling = None 129 return node 130 131 def replaceChild(self, newChild, oldChild): 132 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 133 refChild = oldChild.nextSibling 134 self.removeChild(oldChild) 135 return self.insertBefore(newChild, refChild) 136 if newChild.nodeType not in self._child_node_types: 137 raise xml.dom.HierarchyRequestErr( 138 "%s cannot be child of %s" % (repr(newChild), repr(self))) 139 if newChild is oldChild: 140 return 141 if newChild.parentNode is not None: 142 newChild.parentNode.removeChild(newChild) 143 try: 144 index = self.childNodes.index(oldChild) 145 except ValueError: 146 raise xml.dom.NotFoundErr() 147 self.childNodes[index] = newChild 148 newChild.parentNode = self 149 oldChild.parentNode = None 150 if (newChild.nodeType in _nodeTypes_with_children 151 or oldChild.nodeType in _nodeTypes_with_children): 152 _clear_id_cache(self) 153 newChild.nextSibling = oldChild.nextSibling 154 newChild.previousSibling = oldChild.previousSibling 155 oldChild.nextSibling = None 156 oldChild.previousSibling = None 157 if newChild.previousSibling: 158 newChild.previousSibling.nextSibling = newChild 159 if newChild.nextSibling: 160 newChild.nextSibling.previousSibling = newChild 161 return oldChild 162 163 def removeChild(self, oldChild): 164 try: 165 self.childNodes.remove(oldChild) 166 except ValueError: 167 raise xml.dom.NotFoundErr() 168 if oldChild.nextSibling is not None: 169 oldChild.nextSibling.previousSibling = oldChild.previousSibling 170 if oldChild.previousSibling is not None: 171 oldChild.previousSibling.nextSibling = oldChild.nextSibling 172 oldChild.nextSibling = oldChild.previousSibling = None 173 if oldChild.nodeType in _nodeTypes_with_children: 174 _clear_id_cache(self) 175 176 oldChild.parentNode = None 177 return oldChild 178 179 def normalize(self): 180 L = [] 181 for child in self.childNodes: 182 if child.nodeType == Node.TEXT_NODE: 183 if not child.data: 184 # empty text node; discard 185 if L: 186 L[-1].nextSibling = child.nextSibling 187 if child.nextSibling: 188 child.nextSibling.previousSibling = child.previousSibling 189 child.unlink() 190 elif L and L[-1].nodeType == child.nodeType: 191 # collapse text node 192 node = L[-1] 193 node.data = node.data + child.data 194 node.nextSibling = child.nextSibling 195 if child.nextSibling: 196 child.nextSibling.previousSibling = node 197 child.unlink() 198 else: 199 L.append(child) 200 else: 201 L.append(child) 202 if child.nodeType == Node.ELEMENT_NODE: 203 child.normalize() 204 self.childNodes[:] = L 205 206 def cloneNode(self, deep): 207 return _clone_node(self, deep, self.ownerDocument or self) 208 209 def isSupported(self, feature, version): 210 return self.ownerDocument.implementation.hasFeature(feature, version) 211 212 def _get_localName(self): 213 # Overridden in Element and Attr where localName can be Non-Null 214 return None 215 216 # Node interfaces from Level 3 (WD 9 April 2002) 217 218 def isSameNode(self, other): 219 return self is other 220 221 def getInterface(self, feature): 222 if self.isSupported(feature, None): 223 return self 224 else: 225 return None 226 227 # The "user data" functions use a dictionary that is only present 228 # if some user data has been set, so be careful not to assume it 229 # exists. 230 231 def getUserData(self, key): 232 try: 233 return self._user_data[key][0] 234 except (AttributeError, KeyError): 235 return None 236 237 def setUserData(self, key, data, handler): 238 old = None 239 try: 240 d = self._user_data 241 except AttributeError: 242 d = {} 243 self._user_data = d 244 if key in d: 245 old = d[key][0] 246 if data is None: 247 # ignore handlers passed for None 248 handler = None 249 if old is not None: 250 del d[key] 251 else: 252 d[key] = (data, handler) 253 return old 254 255 def _call_user_data_handler(self, operation, src, dst): 256 if hasattr(self, "_user_data"): 257 for key, (data, handler) in list(self._user_data.items()): 258 if handler is not None: 259 handler.handle(operation, key, data, src, dst) 260 261 # minidom-specific API: 262 263 def unlink(self): 264 self.parentNode = self.ownerDocument = None 265 if self.childNodes: 266 for child in self.childNodes: 267 child.unlink() 268 self.childNodes = NodeList() 269 self.previousSibling = None 270 self.nextSibling = None 271 272 # A Node is its own context manager, to ensure that an unlink() call occurs. 273 # This is similar to how a file object works. 274 def __enter__(self): 275 return self 276 277 def __exit__(self, et, ev, tb): 278 self.unlink() 279 280defproperty(Node, "firstChild", doc="First child node, or None.") 281defproperty(Node, "lastChild", doc="Last child node, or None.") 282defproperty(Node, "localName", doc="Namespace-local name of this node.") 283 284 285def _append_child(self, node): 286 # fast path with less checks; usable by DOM builders if careful 287 childNodes = self.childNodes 288 if childNodes: 289 last = childNodes[-1] 290 node.previousSibling = last 291 last.nextSibling = node 292 childNodes.append(node) 293 node.parentNode = self 294 295def _in_document(node): 296 # return True iff node is part of a document tree 297 while node is not None: 298 if node.nodeType == Node.DOCUMENT_NODE: 299 return True 300 node = node.parentNode 301 return False 302 303def _write_data(writer, data): 304 "Writes datachars to writer." 305 if data: 306 data = data.replace("&", "&").replace("<", "<"). \ 307 replace("\"", """).replace(">", ">") 308 writer.write(data) 309 310def _get_elements_by_tagName_helper(parent, name, rc): 311 for node in parent.childNodes: 312 if node.nodeType == Node.ELEMENT_NODE and \ 313 (name == "*" or node.tagName == name): 314 rc.append(node) 315 _get_elements_by_tagName_helper(node, name, rc) 316 return rc 317 318def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 319 for node in parent.childNodes: 320 if node.nodeType == Node.ELEMENT_NODE: 321 if ((localName == "*" or node.localName == localName) and 322 (nsURI == "*" or node.namespaceURI == nsURI)): 323 rc.append(node) 324 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 325 return rc 326 327class DocumentFragment(Node): 328 nodeType = Node.DOCUMENT_FRAGMENT_NODE 329 nodeName = "#document-fragment" 330 nodeValue = None 331 attributes = None 332 parentNode = None 333 _child_node_types = (Node.ELEMENT_NODE, 334 Node.TEXT_NODE, 335 Node.CDATA_SECTION_NODE, 336 Node.ENTITY_REFERENCE_NODE, 337 Node.PROCESSING_INSTRUCTION_NODE, 338 Node.COMMENT_NODE, 339 Node.NOTATION_NODE) 340 341 def __init__(self): 342 self.childNodes = NodeList() 343 344 345class Attr(Node): 346 __slots__=('_name', '_value', 'namespaceURI', 347 '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement') 348 nodeType = Node.ATTRIBUTE_NODE 349 attributes = None 350 specified = False 351 _is_id = False 352 353 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 354 355 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 356 prefix=None): 357 self.ownerElement = None 358 self._name = qName 359 self.namespaceURI = namespaceURI 360 self._prefix = prefix 361 if localName is not None: 362 self._localName = localName 363 self.childNodes = NodeList() 364 365 # Add the single child node that represents the value of the attr 366 self.childNodes.append(Text()) 367 368 # nodeValue and value are set elsewhere 369 370 def _get_localName(self): 371 try: 372 return self._localName 373 except AttributeError: 374 return self.nodeName.split(":", 1)[-1] 375 376 def _get_specified(self): 377 return self.specified 378 379 def _get_name(self): 380 return self._name 381 382 def _set_name(self, value): 383 self._name = value 384 if self.ownerElement is not None: 385 _clear_id_cache(self.ownerElement) 386 387 nodeName = name = property(_get_name, _set_name) 388 389 def _get_value(self): 390 return self._value 391 392 def _set_value(self, value): 393 self._value = value 394 self.childNodes[0].data = value 395 if self.ownerElement is not None: 396 _clear_id_cache(self.ownerElement) 397 self.childNodes[0].data = value 398 399 nodeValue = value = property(_get_value, _set_value) 400 401 def _get_prefix(self): 402 return self._prefix 403 404 def _set_prefix(self, prefix): 405 nsuri = self.namespaceURI 406 if prefix == "xmlns": 407 if nsuri and nsuri != XMLNS_NAMESPACE: 408 raise xml.dom.NamespaceErr( 409 "illegal use of 'xmlns' prefix for the wrong namespace") 410 self._prefix = prefix 411 if prefix is None: 412 newName = self.localName 413 else: 414 newName = "%s:%s" % (prefix, self.localName) 415 if self.ownerElement: 416 _clear_id_cache(self.ownerElement) 417 self.name = newName 418 419 prefix = property(_get_prefix, _set_prefix) 420 421 def unlink(self): 422 # This implementation does not call the base implementation 423 # since most of that is not needed, and the expense of the 424 # method call is not warranted. We duplicate the removal of 425 # children, but that's all we needed from the base class. 426 elem = self.ownerElement 427 if elem is not None: 428 del elem._attrs[self.nodeName] 429 del elem._attrsNS[(self.namespaceURI, self.localName)] 430 if self._is_id: 431 self._is_id = False 432 elem._magic_id_nodes -= 1 433 self.ownerDocument._magic_id_count -= 1 434 for child in self.childNodes: 435 child.unlink() 436 del self.childNodes[:] 437 438 def _get_isId(self): 439 if self._is_id: 440 return True 441 doc = self.ownerDocument 442 elem = self.ownerElement 443 if doc is None or elem is None: 444 return False 445 446 info = doc._get_elem_info(elem) 447 if info is None: 448 return False 449 if self.namespaceURI: 450 return info.isIdNS(self.namespaceURI, self.localName) 451 else: 452 return info.isId(self.nodeName) 453 454 def _get_schemaType(self): 455 doc = self.ownerDocument 456 elem = self.ownerElement 457 if doc is None or elem is None: 458 return _no_type 459 460 info = doc._get_elem_info(elem) 461 if info is None: 462 return _no_type 463 if self.namespaceURI: 464 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 465 else: 466 return info.getAttributeType(self.nodeName) 467 468defproperty(Attr, "isId", doc="True if this attribute is an ID.") 469defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 470defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 471 472 473class NamedNodeMap(object): 474 """The attribute list is a transient interface to the underlying 475 dictionaries. Mutations here will change the underlying element's 476 dictionary. 477 478 Ordering is imposed artificially and does not reflect the order of 479 attributes as found in an input document. 480 """ 481 482 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 483 484 def __init__(self, attrs, attrsNS, ownerElement): 485 self._attrs = attrs 486 self._attrsNS = attrsNS 487 self._ownerElement = ownerElement 488 489 def _get_length(self): 490 return len(self._attrs) 491 492 def item(self, index): 493 try: 494 return self[list(self._attrs.keys())[index]] 495 except IndexError: 496 return None 497 498 def items(self): 499 L = [] 500 for node in self._attrs.values(): 501 L.append((node.nodeName, node.value)) 502 return L 503 504 def itemsNS(self): 505 L = [] 506 for node in self._attrs.values(): 507 L.append(((node.namespaceURI, node.localName), node.value)) 508 return L 509 510 def __contains__(self, key): 511 if isinstance(key, str): 512 return key in self._attrs 513 else: 514 return key in self._attrsNS 515 516 def keys(self): 517 return self._attrs.keys() 518 519 def keysNS(self): 520 return self._attrsNS.keys() 521 522 def values(self): 523 return self._attrs.values() 524 525 def get(self, name, value=None): 526 return self._attrs.get(name, value) 527 528 __len__ = _get_length 529 530 def _cmp(self, other): 531 if self._attrs is getattr(other, "_attrs", None): 532 return 0 533 else: 534 return (id(self) > id(other)) - (id(self) < id(other)) 535 536 def __eq__(self, other): 537 return self._cmp(other) == 0 538 539 def __ge__(self, other): 540 return self._cmp(other) >= 0 541 542 def __gt__(self, other): 543 return self._cmp(other) > 0 544 545 def __le__(self, other): 546 return self._cmp(other) <= 0 547 548 def __lt__(self, other): 549 return self._cmp(other) < 0 550 551 def __getitem__(self, attname_or_tuple): 552 if isinstance(attname_or_tuple, tuple): 553 return self._attrsNS[attname_or_tuple] 554 else: 555 return self._attrs[attname_or_tuple] 556 557 # same as set 558 def __setitem__(self, attname, value): 559 if isinstance(value, str): 560 try: 561 node = self._attrs[attname] 562 except KeyError: 563 node = Attr(attname) 564 node.ownerDocument = self._ownerElement.ownerDocument 565 self.setNamedItem(node) 566 node.value = value 567 else: 568 if not isinstance(value, Attr): 569 raise TypeError("value must be a string or Attr object") 570 node = value 571 self.setNamedItem(node) 572 573 def getNamedItem(self, name): 574 try: 575 return self._attrs[name] 576 except KeyError: 577 return None 578 579 def getNamedItemNS(self, namespaceURI, localName): 580 try: 581 return self._attrsNS[(namespaceURI, localName)] 582 except KeyError: 583 return None 584 585 def removeNamedItem(self, name): 586 n = self.getNamedItem(name) 587 if n is not None: 588 _clear_id_cache(self._ownerElement) 589 del self._attrs[n.nodeName] 590 del self._attrsNS[(n.namespaceURI, n.localName)] 591 if hasattr(n, 'ownerElement'): 592 n.ownerElement = None 593 return n 594 else: 595 raise xml.dom.NotFoundErr() 596 597 def removeNamedItemNS(self, namespaceURI, localName): 598 n = self.getNamedItemNS(namespaceURI, localName) 599 if n is not None: 600 _clear_id_cache(self._ownerElement) 601 del self._attrsNS[(n.namespaceURI, n.localName)] 602 del self._attrs[n.nodeName] 603 if hasattr(n, 'ownerElement'): 604 n.ownerElement = None 605 return n 606 else: 607 raise xml.dom.NotFoundErr() 608 609 def setNamedItem(self, node): 610 if not isinstance(node, Attr): 611 raise xml.dom.HierarchyRequestErr( 612 "%s cannot be child of %s" % (repr(node), repr(self))) 613 old = self._attrs.get(node.name) 614 if old: 615 old.unlink() 616 self._attrs[node.name] = node 617 self._attrsNS[(node.namespaceURI, node.localName)] = node 618 node.ownerElement = self._ownerElement 619 _clear_id_cache(node.ownerElement) 620 return old 621 622 def setNamedItemNS(self, node): 623 return self.setNamedItem(node) 624 625 def __delitem__(self, attname_or_tuple): 626 node = self[attname_or_tuple] 627 _clear_id_cache(node.ownerElement) 628 node.unlink() 629 630 def __getstate__(self): 631 return self._attrs, self._attrsNS, self._ownerElement 632 633 def __setstate__(self, state): 634 self._attrs, self._attrsNS, self._ownerElement = state 635 636defproperty(NamedNodeMap, "length", 637 doc="Number of nodes in the NamedNodeMap.") 638 639AttributeList = NamedNodeMap 640 641 642class TypeInfo(object): 643 __slots__ = 'namespace', 'name' 644 645 def __init__(self, namespace, name): 646 self.namespace = namespace 647 self.name = name 648 649 def __repr__(self): 650 if self.namespace: 651 return "<%s %r (from %r)>" % (self.__class__.__name__, self.name, 652 self.namespace) 653 else: 654 return "<%s %r>" % (self.__class__.__name__, self.name) 655 656 def _get_name(self): 657 return self.name 658 659 def _get_namespace(self): 660 return self.namespace 661 662_no_type = TypeInfo(None, None) 663 664class Element(Node): 665 __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix', 666 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS', 667 'nextSibling', 'previousSibling') 668 nodeType = Node.ELEMENT_NODE 669 nodeValue = None 670 schemaType = _no_type 671 672 _magic_id_nodes = 0 673 674 _child_node_types = (Node.ELEMENT_NODE, 675 Node.PROCESSING_INSTRUCTION_NODE, 676 Node.COMMENT_NODE, 677 Node.TEXT_NODE, 678 Node.CDATA_SECTION_NODE, 679 Node.ENTITY_REFERENCE_NODE) 680 681 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 682 localName=None): 683 self.parentNode = None 684 self.tagName = self.nodeName = tagName 685 self.prefix = prefix 686 self.namespaceURI = namespaceURI 687 self.childNodes = NodeList() 688 self.nextSibling = self.previousSibling = None 689 690 # Attribute dictionaries are lazily created 691 # attributes are double-indexed: 692 # tagName -> Attribute 693 # URI,localName -> Attribute 694 # in the future: consider lazy generation 695 # of attribute objects this is too tricky 696 # for now because of headaches with 697 # namespaces. 698 self._attrs = None 699 self._attrsNS = None 700 701 def _ensure_attributes(self): 702 if self._attrs is None: 703 self._attrs = {} 704 self._attrsNS = {} 705 706 def _get_localName(self): 707 try: 708 return self._localName 709 except AttributeError: 710 return self.tagName.split(":", 1)[-1] 711 712 def _get_tagName(self): 713 return self.tagName 714 715 def unlink(self): 716 if self._attrs is not None: 717 for attr in list(self._attrs.values()): 718 attr.unlink() 719 self._attrs = None 720 self._attrsNS = None 721 Node.unlink(self) 722 723 def getAttribute(self, attname): 724 """Returns the value of the specified attribute. 725 726 Returns the value of the element's attribute named attname as 727 a string. An empty string is returned if the element does not 728 have such an attribute. Note that an empty string may also be 729 returned as an explicitly given attribute value, use the 730 hasAttribute method to distinguish these two cases. 731 """ 732 if self._attrs is None: 733 return "" 734 try: 735 return self._attrs[attname].value 736 except KeyError: 737 return "" 738 739 def getAttributeNS(self, namespaceURI, localName): 740 if self._attrsNS is None: 741 return "" 742 try: 743 return self._attrsNS[(namespaceURI, localName)].value 744 except KeyError: 745 return "" 746 747 def setAttribute(self, attname, value): 748 attr = self.getAttributeNode(attname) 749 if attr is None: 750 attr = Attr(attname) 751 attr.value = value # also sets nodeValue 752 attr.ownerDocument = self.ownerDocument 753 self.setAttributeNode(attr) 754 elif value != attr.value: 755 attr.value = value 756 if attr.isId: 757 _clear_id_cache(self) 758 759 def setAttributeNS(self, namespaceURI, qualifiedName, value): 760 prefix, localname = _nssplit(qualifiedName) 761 attr = self.getAttributeNodeNS(namespaceURI, localname) 762 if attr is None: 763 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 764 attr.value = value 765 attr.ownerDocument = self.ownerDocument 766 self.setAttributeNode(attr) 767 else: 768 if value != attr.value: 769 attr.value = value 770 if attr.isId: 771 _clear_id_cache(self) 772 if attr.prefix != prefix: 773 attr.prefix = prefix 774 attr.nodeName = qualifiedName 775 776 def getAttributeNode(self, attrname): 777 if self._attrs is None: 778 return None 779 return self._attrs.get(attrname) 780 781 def getAttributeNodeNS(self, namespaceURI, localName): 782 if self._attrsNS is None: 783 return None 784 return self._attrsNS.get((namespaceURI, localName)) 785 786 def setAttributeNode(self, attr): 787 if attr.ownerElement not in (None, self): 788 raise xml.dom.InuseAttributeErr("attribute node already owned") 789 self._ensure_attributes() 790 old1 = self._attrs.get(attr.name, None) 791 if old1 is not None: 792 self.removeAttributeNode(old1) 793 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 794 if old2 is not None and old2 is not old1: 795 self.removeAttributeNode(old2) 796 _set_attribute_node(self, attr) 797 798 if old1 is not attr: 799 # It might have already been part of this node, in which case 800 # it doesn't represent a change, and should not be returned. 801 return old1 802 if old2 is not attr: 803 return old2 804 805 setAttributeNodeNS = setAttributeNode 806 807 def removeAttribute(self, name): 808 if self._attrsNS is None: 809 raise xml.dom.NotFoundErr() 810 try: 811 attr = self._attrs[name] 812 except KeyError: 813 raise xml.dom.NotFoundErr() 814 self.removeAttributeNode(attr) 815 816 def removeAttributeNS(self, namespaceURI, localName): 817 if self._attrsNS is None: 818 raise xml.dom.NotFoundErr() 819 try: 820 attr = self._attrsNS[(namespaceURI, localName)] 821 except KeyError: 822 raise xml.dom.NotFoundErr() 823 self.removeAttributeNode(attr) 824 825 def removeAttributeNode(self, node): 826 if node is None: 827 raise xml.dom.NotFoundErr() 828 try: 829 self._attrs[node.name] 830 except KeyError: 831 raise xml.dom.NotFoundErr() 832 _clear_id_cache(self) 833 node.unlink() 834 # Restore this since the node is still useful and otherwise 835 # unlinked 836 node.ownerDocument = self.ownerDocument 837 return node 838 839 removeAttributeNodeNS = removeAttributeNode 840 841 def hasAttribute(self, name): 842 """Checks whether the element has an attribute with the specified name. 843 844 Returns True if the element has an attribute with the specified name. 845 Otherwise, returns False. 846 """ 847 if self._attrs is None: 848 return False 849 return name in self._attrs 850 851 def hasAttributeNS(self, namespaceURI, localName): 852 if self._attrsNS is None: 853 return False 854 return (namespaceURI, localName) in self._attrsNS 855 856 def getElementsByTagName(self, name): 857 """Returns all descendant elements with the given tag name. 858 859 Returns the list of all descendant elements (not direct children 860 only) with the specified tag name. 861 """ 862 return _get_elements_by_tagName_helper(self, name, NodeList()) 863 864 def getElementsByTagNameNS(self, namespaceURI, localName): 865 return _get_elements_by_tagName_ns_helper( 866 self, namespaceURI, localName, NodeList()) 867 868 def __repr__(self): 869 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 870 871 def writexml(self, writer, indent="", addindent="", newl=""): 872 """Write an XML element to a file-like object 873 874 Write the element to the writer object that must provide 875 a write method (e.g. a file or StringIO object). 876 """ 877 # indent = current indentation 878 # addindent = indentation to add to higher levels 879 # newl = newline string 880 writer.write(indent+"<" + self.tagName) 881 882 attrs = self._get_attributes() 883 884 for a_name in attrs.keys(): 885 writer.write(" %s=\"" % a_name) 886 _write_data(writer, attrs[a_name].value) 887 writer.write("\"") 888 if self.childNodes: 889 writer.write(">") 890 if (len(self.childNodes) == 1 and 891 self.childNodes[0].nodeType in ( 892 Node.TEXT_NODE, Node.CDATA_SECTION_NODE)): 893 self.childNodes[0].writexml(writer, '', '', '') 894 else: 895 writer.write(newl) 896 for node in self.childNodes: 897 node.writexml(writer, indent+addindent, addindent, newl) 898 writer.write(indent) 899 writer.write("</%s>%s" % (self.tagName, newl)) 900 else: 901 writer.write("/>%s"%(newl)) 902 903 def _get_attributes(self): 904 self._ensure_attributes() 905 return NamedNodeMap(self._attrs, self._attrsNS, self) 906 907 def hasAttributes(self): 908 if self._attrs: 909 return True 910 else: 911 return False 912 913 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 914 915 def setIdAttribute(self, name): 916 idAttr = self.getAttributeNode(name) 917 self.setIdAttributeNode(idAttr) 918 919 def setIdAttributeNS(self, namespaceURI, localName): 920 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 921 self.setIdAttributeNode(idAttr) 922 923 def setIdAttributeNode(self, idAttr): 924 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 925 raise xml.dom.NotFoundErr() 926 if _get_containing_entref(self) is not None: 927 raise xml.dom.NoModificationAllowedErr() 928 if not idAttr._is_id: 929 idAttr._is_id = True 930 self._magic_id_nodes += 1 931 self.ownerDocument._magic_id_count += 1 932 _clear_id_cache(self) 933 934defproperty(Element, "attributes", 935 doc="NamedNodeMap of attributes on the element.") 936defproperty(Element, "localName", 937 doc="Namespace-local name of this element.") 938 939 940def _set_attribute_node(element, attr): 941 _clear_id_cache(element) 942 element._ensure_attributes() 943 element._attrs[attr.name] = attr 944 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 945 946 # This creates a circular reference, but Element.unlink() 947 # breaks the cycle since the references to the attribute 948 # dictionaries are tossed. 949 attr.ownerElement = element 950 951class Childless: 952 """Mixin that makes childless-ness easy to implement and avoids 953 the complexity of the Node methods that deal with children. 954 """ 955 __slots__ = () 956 957 attributes = None 958 childNodes = EmptyNodeList() 959 firstChild = None 960 lastChild = None 961 962 def _get_firstChild(self): 963 return None 964 965 def _get_lastChild(self): 966 return None 967 968 def appendChild(self, node): 969 raise xml.dom.HierarchyRequestErr( 970 self.nodeName + " nodes cannot have children") 971 972 def hasChildNodes(self): 973 return False 974 975 def insertBefore(self, newChild, refChild): 976 raise xml.dom.HierarchyRequestErr( 977 self.nodeName + " nodes do not have children") 978 979 def removeChild(self, oldChild): 980 raise xml.dom.NotFoundErr( 981 self.nodeName + " nodes do not have children") 982 983 def normalize(self): 984 # For childless nodes, normalize() has nothing to do. 985 pass 986 987 def replaceChild(self, newChild, oldChild): 988 raise xml.dom.HierarchyRequestErr( 989 self.nodeName + " nodes do not have children") 990 991 992class ProcessingInstruction(Childless, Node): 993 nodeType = Node.PROCESSING_INSTRUCTION_NODE 994 __slots__ = ('target', 'data') 995 996 def __init__(self, target, data): 997 self.target = target 998 self.data = data 999 1000 # nodeValue is an alias for data 1001 def _get_nodeValue(self): 1002 return self.data 1003 def _set_nodeValue(self, value): 1004 self.data = value 1005 nodeValue = property(_get_nodeValue, _set_nodeValue) 1006 1007 # nodeName is an alias for target 1008 def _get_nodeName(self): 1009 return self.target 1010 def _set_nodeName(self, value): 1011 self.target = value 1012 nodeName = property(_get_nodeName, _set_nodeName) 1013 1014 def writexml(self, writer, indent="", addindent="", newl=""): 1015 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 1016 1017 1018class CharacterData(Childless, Node): 1019 __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling') 1020 1021 def __init__(self): 1022 self.ownerDocument = self.parentNode = None 1023 self.previousSibling = self.nextSibling = None 1024 self._data = '' 1025 Node.__init__(self) 1026 1027 def _get_length(self): 1028 return len(self.data) 1029 __len__ = _get_length 1030 1031 def _get_data(self): 1032 return self._data 1033 def _set_data(self, data): 1034 self._data = data 1035 1036 data = nodeValue = property(_get_data, _set_data) 1037 1038 def __repr__(self): 1039 data = self.data 1040 if len(data) > 10: 1041 dotdotdot = "..." 1042 else: 1043 dotdotdot = "" 1044 return '<DOM %s node "%r%s">' % ( 1045 self.__class__.__name__, data[0:10], dotdotdot) 1046 1047 def substringData(self, offset, count): 1048 if offset < 0: 1049 raise xml.dom.IndexSizeErr("offset cannot be negative") 1050 if offset >= len(self.data): 1051 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1052 if count < 0: 1053 raise xml.dom.IndexSizeErr("count cannot be negative") 1054 return self.data[offset:offset+count] 1055 1056 def appendData(self, arg): 1057 self.data = self.data + arg 1058 1059 def insertData(self, offset, arg): 1060 if offset < 0: 1061 raise xml.dom.IndexSizeErr("offset cannot be negative") 1062 if offset >= len(self.data): 1063 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1064 if arg: 1065 self.data = "%s%s%s" % ( 1066 self.data[:offset], arg, self.data[offset:]) 1067 1068 def deleteData(self, offset, count): 1069 if offset < 0: 1070 raise xml.dom.IndexSizeErr("offset cannot be negative") 1071 if offset >= len(self.data): 1072 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1073 if count < 0: 1074 raise xml.dom.IndexSizeErr("count cannot be negative") 1075 if count: 1076 self.data = self.data[:offset] + self.data[offset+count:] 1077 1078 def replaceData(self, offset, count, arg): 1079 if offset < 0: 1080 raise xml.dom.IndexSizeErr("offset cannot be negative") 1081 if offset >= len(self.data): 1082 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1083 if count < 0: 1084 raise xml.dom.IndexSizeErr("count cannot be negative") 1085 if count: 1086 self.data = "%s%s%s" % ( 1087 self.data[:offset], arg, self.data[offset+count:]) 1088 1089defproperty(CharacterData, "length", doc="Length of the string data.") 1090 1091 1092class Text(CharacterData): 1093 __slots__ = () 1094 1095 nodeType = Node.TEXT_NODE 1096 nodeName = "#text" 1097 attributes = None 1098 1099 def splitText(self, offset): 1100 if offset < 0 or offset > len(self.data): 1101 raise xml.dom.IndexSizeErr("illegal offset value") 1102 newText = self.__class__() 1103 newText.data = self.data[offset:] 1104 newText.ownerDocument = self.ownerDocument 1105 next = self.nextSibling 1106 if self.parentNode and self in self.parentNode.childNodes: 1107 if next is None: 1108 self.parentNode.appendChild(newText) 1109 else: 1110 self.parentNode.insertBefore(newText, next) 1111 self.data = self.data[:offset] 1112 return newText 1113 1114 def writexml(self, writer, indent="", addindent="", newl=""): 1115 _write_data(writer, "%s%s%s" % (indent, self.data, newl)) 1116 1117 # DOM Level 3 (WD 9 April 2002) 1118 1119 def _get_wholeText(self): 1120 L = [self.data] 1121 n = self.previousSibling 1122 while n is not None: 1123 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1124 L.insert(0, n.data) 1125 n = n.previousSibling 1126 else: 1127 break 1128 n = self.nextSibling 1129 while n is not None: 1130 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1131 L.append(n.data) 1132 n = n.nextSibling 1133 else: 1134 break 1135 return ''.join(L) 1136 1137 def replaceWholeText(self, content): 1138 # XXX This needs to be seriously changed if minidom ever 1139 # supports EntityReference nodes. 1140 parent = self.parentNode 1141 n = self.previousSibling 1142 while n is not None: 1143 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1144 next = n.previousSibling 1145 parent.removeChild(n) 1146 n = next 1147 else: 1148 break 1149 n = self.nextSibling 1150 if not content: 1151 parent.removeChild(self) 1152 while n is not None: 1153 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1154 next = n.nextSibling 1155 parent.removeChild(n) 1156 n = next 1157 else: 1158 break 1159 if content: 1160 self.data = content 1161 return self 1162 else: 1163 return None 1164 1165 def _get_isWhitespaceInElementContent(self): 1166 if self.data.strip(): 1167 return False 1168 elem = _get_containing_element(self) 1169 if elem is None: 1170 return False 1171 info = self.ownerDocument._get_elem_info(elem) 1172 if info is None: 1173 return False 1174 else: 1175 return info.isElementContent() 1176 1177defproperty(Text, "isWhitespaceInElementContent", 1178 doc="True iff this text node contains only whitespace" 1179 " and is in element content.") 1180defproperty(Text, "wholeText", 1181 doc="The text of all logically-adjacent text nodes.") 1182 1183 1184def _get_containing_element(node): 1185 c = node.parentNode 1186 while c is not None: 1187 if c.nodeType == Node.ELEMENT_NODE: 1188 return c 1189 c = c.parentNode 1190 return None 1191 1192def _get_containing_entref(node): 1193 c = node.parentNode 1194 while c is not None: 1195 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1196 return c 1197 c = c.parentNode 1198 return None 1199 1200 1201class Comment(CharacterData): 1202 nodeType = Node.COMMENT_NODE 1203 nodeName = "#comment" 1204 1205 def __init__(self, data): 1206 CharacterData.__init__(self) 1207 self._data = data 1208 1209 def writexml(self, writer, indent="", addindent="", newl=""): 1210 if "--" in self.data: 1211 raise ValueError("'--' is not allowed in a comment node") 1212 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1213 1214 1215class CDATASection(Text): 1216 __slots__ = () 1217 1218 nodeType = Node.CDATA_SECTION_NODE 1219 nodeName = "#cdata-section" 1220 1221 def writexml(self, writer, indent="", addindent="", newl=""): 1222 if self.data.find("]]>") >= 0: 1223 raise ValueError("']]>' not allowed in a CDATA section") 1224 writer.write("<![CDATA[%s]]>" % self.data) 1225 1226 1227class ReadOnlySequentialNamedNodeMap(object): 1228 __slots__ = '_seq', 1229 1230 def __init__(self, seq=()): 1231 # seq should be a list or tuple 1232 self._seq = seq 1233 1234 def __len__(self): 1235 return len(self._seq) 1236 1237 def _get_length(self): 1238 return len(self._seq) 1239 1240 def getNamedItem(self, name): 1241 for n in self._seq: 1242 if n.nodeName == name: 1243 return n 1244 1245 def getNamedItemNS(self, namespaceURI, localName): 1246 for n in self._seq: 1247 if n.namespaceURI == namespaceURI and n.localName == localName: 1248 return n 1249 1250 def __getitem__(self, name_or_tuple): 1251 if isinstance(name_or_tuple, tuple): 1252 node = self.getNamedItemNS(*name_or_tuple) 1253 else: 1254 node = self.getNamedItem(name_or_tuple) 1255 if node is None: 1256 raise KeyError(name_or_tuple) 1257 return node 1258 1259 def item(self, index): 1260 if index < 0: 1261 return None 1262 try: 1263 return self._seq[index] 1264 except IndexError: 1265 return None 1266 1267 def removeNamedItem(self, name): 1268 raise xml.dom.NoModificationAllowedErr( 1269 "NamedNodeMap instance is read-only") 1270 1271 def removeNamedItemNS(self, namespaceURI, localName): 1272 raise xml.dom.NoModificationAllowedErr( 1273 "NamedNodeMap instance is read-only") 1274 1275 def setNamedItem(self, node): 1276 raise xml.dom.NoModificationAllowedErr( 1277 "NamedNodeMap instance is read-only") 1278 1279 def setNamedItemNS(self, node): 1280 raise xml.dom.NoModificationAllowedErr( 1281 "NamedNodeMap instance is read-only") 1282 1283 def __getstate__(self): 1284 return [self._seq] 1285 1286 def __setstate__(self, state): 1287 self._seq = state[0] 1288 1289defproperty(ReadOnlySequentialNamedNodeMap, "length", 1290 doc="Number of entries in the NamedNodeMap.") 1291 1292 1293class Identified: 1294 """Mix-in class that supports the publicId and systemId attributes.""" 1295 1296 __slots__ = 'publicId', 'systemId' 1297 1298 def _identified_mixin_init(self, publicId, systemId): 1299 self.publicId = publicId 1300 self.systemId = systemId 1301 1302 def _get_publicId(self): 1303 return self.publicId 1304 1305 def _get_systemId(self): 1306 return self.systemId 1307 1308class DocumentType(Identified, Childless, Node): 1309 nodeType = Node.DOCUMENT_TYPE_NODE 1310 nodeValue = None 1311 name = None 1312 publicId = None 1313 systemId = None 1314 internalSubset = None 1315 1316 def __init__(self, qualifiedName): 1317 self.entities = ReadOnlySequentialNamedNodeMap() 1318 self.notations = ReadOnlySequentialNamedNodeMap() 1319 if qualifiedName: 1320 prefix, localname = _nssplit(qualifiedName) 1321 self.name = localname 1322 self.nodeName = self.name 1323 1324 def _get_internalSubset(self): 1325 return self.internalSubset 1326 1327 def cloneNode(self, deep): 1328 if self.ownerDocument is None: 1329 # it's ok 1330 clone = DocumentType(None) 1331 clone.name = self.name 1332 clone.nodeName = self.name 1333 operation = xml.dom.UserDataHandler.NODE_CLONED 1334 if deep: 1335 clone.entities._seq = [] 1336 clone.notations._seq = [] 1337 for n in self.notations._seq: 1338 notation = Notation(n.nodeName, n.publicId, n.systemId) 1339 clone.notations._seq.append(notation) 1340 n._call_user_data_handler(operation, n, notation) 1341 for e in self.entities._seq: 1342 entity = Entity(e.nodeName, e.publicId, e.systemId, 1343 e.notationName) 1344 entity.actualEncoding = e.actualEncoding 1345 entity.encoding = e.encoding 1346 entity.version = e.version 1347 clone.entities._seq.append(entity) 1348 e._call_user_data_handler(operation, e, entity) 1349 self._call_user_data_handler(operation, self, clone) 1350 return clone 1351 else: 1352 return None 1353 1354 def writexml(self, writer, indent="", addindent="", newl=""): 1355 writer.write("<!DOCTYPE ") 1356 writer.write(self.name) 1357 if self.publicId: 1358 writer.write("%s PUBLIC '%s'%s '%s'" 1359 % (newl, self.publicId, newl, self.systemId)) 1360 elif self.systemId: 1361 writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) 1362 if self.internalSubset is not None: 1363 writer.write(" [") 1364 writer.write(self.internalSubset) 1365 writer.write("]") 1366 writer.write(">"+newl) 1367 1368class Entity(Identified, Node): 1369 attributes = None 1370 nodeType = Node.ENTITY_NODE 1371 nodeValue = None 1372 1373 actualEncoding = None 1374 encoding = None 1375 version = None 1376 1377 def __init__(self, name, publicId, systemId, notation): 1378 self.nodeName = name 1379 self.notationName = notation 1380 self.childNodes = NodeList() 1381 self._identified_mixin_init(publicId, systemId) 1382 1383 def _get_actualEncoding(self): 1384 return self.actualEncoding 1385 1386 def _get_encoding(self): 1387 return self.encoding 1388 1389 def _get_version(self): 1390 return self.version 1391 1392 def appendChild(self, newChild): 1393 raise xml.dom.HierarchyRequestErr( 1394 "cannot append children to an entity node") 1395 1396 def insertBefore(self, newChild, refChild): 1397 raise xml.dom.HierarchyRequestErr( 1398 "cannot insert children below an entity node") 1399 1400 def removeChild(self, oldChild): 1401 raise xml.dom.HierarchyRequestErr( 1402 "cannot remove children from an entity node") 1403 1404 def replaceChild(self, newChild, oldChild): 1405 raise xml.dom.HierarchyRequestErr( 1406 "cannot replace children of an entity node") 1407 1408class Notation(Identified, Childless, Node): 1409 nodeType = Node.NOTATION_NODE 1410 nodeValue = None 1411 1412 def __init__(self, name, publicId, systemId): 1413 self.nodeName = name 1414 self._identified_mixin_init(publicId, systemId) 1415 1416 1417class DOMImplementation(DOMImplementationLS): 1418 _features = [("core", "1.0"), 1419 ("core", "2.0"), 1420 ("core", None), 1421 ("xml", "1.0"), 1422 ("xml", "2.0"), 1423 ("xml", None), 1424 ("ls-load", "3.0"), 1425 ("ls-load", None), 1426 ] 1427 1428 def hasFeature(self, feature, version): 1429 if version == "": 1430 version = None 1431 return (feature.lower(), version) in self._features 1432 1433 def createDocument(self, namespaceURI, qualifiedName, doctype): 1434 if doctype and doctype.parentNode is not None: 1435 raise xml.dom.WrongDocumentErr( 1436 "doctype object owned by another DOM tree") 1437 doc = self._create_document() 1438 1439 add_root_element = not (namespaceURI is None 1440 and qualifiedName is None 1441 and doctype is None) 1442 1443 if not qualifiedName and add_root_element: 1444 # The spec is unclear what to raise here; SyntaxErr 1445 # would be the other obvious candidate. Since Xerces raises 1446 # InvalidCharacterErr, and since SyntaxErr is not listed 1447 # for createDocument, that seems to be the better choice. 1448 # XXX: need to check for illegal characters here and in 1449 # createElement. 1450 1451 # DOM Level III clears this up when talking about the return value 1452 # of this function. If namespaceURI, qName and DocType are 1453 # Null the document is returned without a document element 1454 # Otherwise if doctype or namespaceURI are not None 1455 # Then we go back to the above problem 1456 raise xml.dom.InvalidCharacterErr("Element with no name") 1457 1458 if add_root_element: 1459 prefix, localname = _nssplit(qualifiedName) 1460 if prefix == "xml" \ 1461 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1462 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1463 if prefix and not namespaceURI: 1464 raise xml.dom.NamespaceErr( 1465 "illegal use of prefix without namespaces") 1466 element = doc.createElementNS(namespaceURI, qualifiedName) 1467 if doctype: 1468 doc.appendChild(doctype) 1469 doc.appendChild(element) 1470 1471 if doctype: 1472 doctype.parentNode = doctype.ownerDocument = doc 1473 1474 doc.doctype = doctype 1475 doc.implementation = self 1476 return doc 1477 1478 def createDocumentType(self, qualifiedName, publicId, systemId): 1479 doctype = DocumentType(qualifiedName) 1480 doctype.publicId = publicId 1481 doctype.systemId = systemId 1482 return doctype 1483 1484 # DOM Level 3 (WD 9 April 2002) 1485 1486 def getInterface(self, feature): 1487 if self.hasFeature(feature, None): 1488 return self 1489 else: 1490 return None 1491 1492 # internal 1493 def _create_document(self): 1494 return Document() 1495 1496class ElementInfo(object): 1497 """Object that represents content-model information for an element. 1498 1499 This implementation is not expected to be used in practice; DOM 1500 builders should provide implementations which do the right thing 1501 using information available to it. 1502 1503 """ 1504 1505 __slots__ = 'tagName', 1506 1507 def __init__(self, name): 1508 self.tagName = name 1509 1510 def getAttributeType(self, aname): 1511 return _no_type 1512 1513 def getAttributeTypeNS(self, namespaceURI, localName): 1514 return _no_type 1515 1516 def isElementContent(self): 1517 return False 1518 1519 def isEmpty(self): 1520 """Returns true iff this element is declared to have an EMPTY 1521 content model.""" 1522 return False 1523 1524 def isId(self, aname): 1525 """Returns true iff the named attribute is a DTD-style ID.""" 1526 return False 1527 1528 def isIdNS(self, namespaceURI, localName): 1529 """Returns true iff the identified attribute is a DTD-style ID.""" 1530 return False 1531 1532 def __getstate__(self): 1533 return self.tagName 1534 1535 def __setstate__(self, state): 1536 self.tagName = state 1537 1538def _clear_id_cache(node): 1539 if node.nodeType == Node.DOCUMENT_NODE: 1540 node._id_cache.clear() 1541 node._id_search_stack = None 1542 elif _in_document(node): 1543 node.ownerDocument._id_cache.clear() 1544 node.ownerDocument._id_search_stack= None 1545 1546class Document(Node, DocumentLS): 1547 __slots__ = ('_elem_info', 'doctype', 1548 '_id_search_stack', 'childNodes', '_id_cache') 1549 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1550 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1551 1552 implementation = DOMImplementation() 1553 nodeType = Node.DOCUMENT_NODE 1554 nodeName = "#document" 1555 nodeValue = None 1556 attributes = None 1557 parentNode = None 1558 previousSibling = nextSibling = None 1559 1560 1561 # Document attributes from Level 3 (WD 9 April 2002) 1562 1563 actualEncoding = None 1564 encoding = None 1565 standalone = None 1566 version = None 1567 strictErrorChecking = False 1568 errorHandler = None 1569 documentURI = None 1570 1571 _magic_id_count = 0 1572 1573 def __init__(self): 1574 self.doctype = None 1575 self.childNodes = NodeList() 1576 # mapping of (namespaceURI, localName) -> ElementInfo 1577 # and tagName -> ElementInfo 1578 self._elem_info = {} 1579 self._id_cache = {} 1580 self._id_search_stack = None 1581 1582 def _get_elem_info(self, element): 1583 if element.namespaceURI: 1584 key = element.namespaceURI, element.localName 1585 else: 1586 key = element.tagName 1587 return self._elem_info.get(key) 1588 1589 def _get_actualEncoding(self): 1590 return self.actualEncoding 1591 1592 def _get_doctype(self): 1593 return self.doctype 1594 1595 def _get_documentURI(self): 1596 return self.documentURI 1597 1598 def _get_encoding(self): 1599 return self.encoding 1600 1601 def _get_errorHandler(self): 1602 return self.errorHandler 1603 1604 def _get_standalone(self): 1605 return self.standalone 1606 1607 def _get_strictErrorChecking(self): 1608 return self.strictErrorChecking 1609 1610 def _get_version(self): 1611 return self.version 1612 1613 def appendChild(self, node): 1614 if node.nodeType not in self._child_node_types: 1615 raise xml.dom.HierarchyRequestErr( 1616 "%s cannot be child of %s" % (repr(node), repr(self))) 1617 if node.parentNode is not None: 1618 # This needs to be done before the next test since this 1619 # may *be* the document element, in which case it should 1620 # end up re-ordered to the end. 1621 node.parentNode.removeChild(node) 1622 1623 if node.nodeType == Node.ELEMENT_NODE \ 1624 and self._get_documentElement(): 1625 raise xml.dom.HierarchyRequestErr( 1626 "two document elements disallowed") 1627 return Node.appendChild(self, node) 1628 1629 def removeChild(self, oldChild): 1630 try: 1631 self.childNodes.remove(oldChild) 1632 except ValueError: 1633 raise xml.dom.NotFoundErr() 1634 oldChild.nextSibling = oldChild.previousSibling = None 1635 oldChild.parentNode = None 1636 if self.documentElement is oldChild: 1637 self.documentElement = None 1638 1639 return oldChild 1640 1641 def _get_documentElement(self): 1642 for node in self.childNodes: 1643 if node.nodeType == Node.ELEMENT_NODE: 1644 return node 1645 1646 def unlink(self): 1647 if self.doctype is not None: 1648 self.doctype.unlink() 1649 self.doctype = None 1650 Node.unlink(self) 1651 1652 def cloneNode(self, deep): 1653 if not deep: 1654 return None 1655 clone = self.implementation.createDocument(None, None, None) 1656 clone.encoding = self.encoding 1657 clone.standalone = self.standalone 1658 clone.version = self.version 1659 for n in self.childNodes: 1660 childclone = _clone_node(n, deep, clone) 1661 assert childclone.ownerDocument.isSameNode(clone) 1662 clone.childNodes.append(childclone) 1663 if childclone.nodeType == Node.DOCUMENT_NODE: 1664 assert clone.documentElement is None 1665 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1666 assert clone.doctype is None 1667 clone.doctype = childclone 1668 childclone.parentNode = clone 1669 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1670 self, clone) 1671 return clone 1672 1673 def createDocumentFragment(self): 1674 d = DocumentFragment() 1675 d.ownerDocument = self 1676 return d 1677 1678 def createElement(self, tagName): 1679 e = Element(tagName) 1680 e.ownerDocument = self 1681 return e 1682 1683 def createTextNode(self, data): 1684 if not isinstance(data, str): 1685 raise TypeError("node contents must be a string") 1686 t = Text() 1687 t.data = data 1688 t.ownerDocument = self 1689 return t 1690 1691 def createCDATASection(self, data): 1692 if not isinstance(data, str): 1693 raise TypeError("node contents must be a string") 1694 c = CDATASection() 1695 c.data = data 1696 c.ownerDocument = self 1697 return c 1698 1699 def createComment(self, data): 1700 c = Comment(data) 1701 c.ownerDocument = self 1702 return c 1703 1704 def createProcessingInstruction(self, target, data): 1705 p = ProcessingInstruction(target, data) 1706 p.ownerDocument = self 1707 return p 1708 1709 def createAttribute(self, qName): 1710 a = Attr(qName) 1711 a.ownerDocument = self 1712 a.value = "" 1713 return a 1714 1715 def createElementNS(self, namespaceURI, qualifiedName): 1716 prefix, localName = _nssplit(qualifiedName) 1717 e = Element(qualifiedName, namespaceURI, prefix) 1718 e.ownerDocument = self 1719 return e 1720 1721 def createAttributeNS(self, namespaceURI, qualifiedName): 1722 prefix, localName = _nssplit(qualifiedName) 1723 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1724 a.ownerDocument = self 1725 a.value = "" 1726 return a 1727 1728 # A couple of implementation-specific helpers to create node types 1729 # not supported by the W3C DOM specs: 1730 1731 def _create_entity(self, name, publicId, systemId, notationName): 1732 e = Entity(name, publicId, systemId, notationName) 1733 e.ownerDocument = self 1734 return e 1735 1736 def _create_notation(self, name, publicId, systemId): 1737 n = Notation(name, publicId, systemId) 1738 n.ownerDocument = self 1739 return n 1740 1741 def getElementById(self, id): 1742 if id in self._id_cache: 1743 return self._id_cache[id] 1744 if not (self._elem_info or self._magic_id_count): 1745 return None 1746 1747 stack = self._id_search_stack 1748 if stack is None: 1749 # we never searched before, or the cache has been cleared 1750 stack = [self.documentElement] 1751 self._id_search_stack = stack 1752 elif not stack: 1753 # Previous search was completed and cache is still valid; 1754 # no matching node. 1755 return None 1756 1757 result = None 1758 while stack: 1759 node = stack.pop() 1760 # add child elements to stack for continued searching 1761 stack.extend([child for child in node.childNodes 1762 if child.nodeType in _nodeTypes_with_children]) 1763 # check this node 1764 info = self._get_elem_info(node) 1765 if info: 1766 # We have to process all ID attributes before 1767 # returning in order to get all the attributes set to 1768 # be IDs using Element.setIdAttribute*(). 1769 for attr in node.attributes.values(): 1770 if attr.namespaceURI: 1771 if info.isIdNS(attr.namespaceURI, attr.localName): 1772 self._id_cache[attr.value] = node 1773 if attr.value == id: 1774 result = node 1775 elif not node._magic_id_nodes: 1776 break 1777 elif info.isId(attr.name): 1778 self._id_cache[attr.value] = node 1779 if attr.value == id: 1780 result = node 1781 elif not node._magic_id_nodes: 1782 break 1783 elif attr._is_id: 1784 self._id_cache[attr.value] = node 1785 if attr.value == id: 1786 result = node 1787 elif node._magic_id_nodes == 1: 1788 break 1789 elif node._magic_id_nodes: 1790 for attr in node.attributes.values(): 1791 if attr._is_id: 1792 self._id_cache[attr.value] = node 1793 if attr.value == id: 1794 result = node 1795 if result is not None: 1796 break 1797 return result 1798 1799 def getElementsByTagName(self, name): 1800 return _get_elements_by_tagName_helper(self, name, NodeList()) 1801 1802 def getElementsByTagNameNS(self, namespaceURI, localName): 1803 return _get_elements_by_tagName_ns_helper( 1804 self, namespaceURI, localName, NodeList()) 1805 1806 def isSupported(self, feature, version): 1807 return self.implementation.hasFeature(feature, version) 1808 1809 def importNode(self, node, deep): 1810 if node.nodeType == Node.DOCUMENT_NODE: 1811 raise xml.dom.NotSupportedErr("cannot import document nodes") 1812 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1813 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1814 return _clone_node(node, deep, self) 1815 1816 def writexml(self, writer, indent="", addindent="", newl="", encoding=None, 1817 standalone=None): 1818 declarations = [] 1819 1820 if encoding: 1821 declarations.append(f'encoding="{encoding}"') 1822 if standalone is not None: 1823 declarations.append(f'standalone="{"yes" if standalone else "no"}"') 1824 1825 writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}') 1826 1827 for node in self.childNodes: 1828 node.writexml(writer, indent, addindent, newl) 1829 1830 # DOM Level 3 (WD 9 April 2002) 1831 1832 def renameNode(self, n, namespaceURI, name): 1833 if n.ownerDocument is not self: 1834 raise xml.dom.WrongDocumentErr( 1835 "cannot rename nodes from other documents;\n" 1836 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1837 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1838 raise xml.dom.NotSupportedErr( 1839 "renameNode() only applies to element and attribute nodes") 1840 if namespaceURI != EMPTY_NAMESPACE: 1841 if ':' in name: 1842 prefix, localName = name.split(':', 1) 1843 if ( prefix == "xmlns" 1844 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1845 raise xml.dom.NamespaceErr( 1846 "illegal use of 'xmlns' prefix") 1847 else: 1848 if ( name == "xmlns" 1849 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1850 and n.nodeType == Node.ATTRIBUTE_NODE): 1851 raise xml.dom.NamespaceErr( 1852 "illegal use of the 'xmlns' attribute") 1853 prefix = None 1854 localName = name 1855 else: 1856 prefix = None 1857 localName = None 1858 if n.nodeType == Node.ATTRIBUTE_NODE: 1859 element = n.ownerElement 1860 if element is not None: 1861 is_id = n._is_id 1862 element.removeAttributeNode(n) 1863 else: 1864 element = None 1865 n.prefix = prefix 1866 n._localName = localName 1867 n.namespaceURI = namespaceURI 1868 n.nodeName = name 1869 if n.nodeType == Node.ELEMENT_NODE: 1870 n.tagName = name 1871 else: 1872 # attribute node 1873 n.name = name 1874 if element is not None: 1875 element.setAttributeNode(n) 1876 if is_id: 1877 element.setIdAttributeNode(n) 1878 # It's not clear from a semantic perspective whether we should 1879 # call the user data handlers for the NODE_RENAMED event since 1880 # we're re-using the existing node. The draft spec has been 1881 # interpreted as meaning "no, don't call the handler unless a 1882 # new node is created." 1883 return n 1884 1885defproperty(Document, "documentElement", 1886 doc="Top-level element of this document.") 1887 1888 1889def _clone_node(node, deep, newOwnerDocument): 1890 """ 1891 Clone a node and give it the new owner document. 1892 Called by Node.cloneNode and Document.importNode 1893 """ 1894 if node.ownerDocument.isSameNode(newOwnerDocument): 1895 operation = xml.dom.UserDataHandler.NODE_CLONED 1896 else: 1897 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1898 if node.nodeType == Node.ELEMENT_NODE: 1899 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1900 node.nodeName) 1901 for attr in node.attributes.values(): 1902 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1903 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1904 a.specified = attr.specified 1905 1906 if deep: 1907 for child in node.childNodes: 1908 c = _clone_node(child, deep, newOwnerDocument) 1909 clone.appendChild(c) 1910 1911 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1912 clone = newOwnerDocument.createDocumentFragment() 1913 if deep: 1914 for child in node.childNodes: 1915 c = _clone_node(child, deep, newOwnerDocument) 1916 clone.appendChild(c) 1917 1918 elif node.nodeType == Node.TEXT_NODE: 1919 clone = newOwnerDocument.createTextNode(node.data) 1920 elif node.nodeType == Node.CDATA_SECTION_NODE: 1921 clone = newOwnerDocument.createCDATASection(node.data) 1922 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1923 clone = newOwnerDocument.createProcessingInstruction(node.target, 1924 node.data) 1925 elif node.nodeType == Node.COMMENT_NODE: 1926 clone = newOwnerDocument.createComment(node.data) 1927 elif node.nodeType == Node.ATTRIBUTE_NODE: 1928 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1929 node.nodeName) 1930 clone.specified = True 1931 clone.value = node.value 1932 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1933 assert node.ownerDocument is not newOwnerDocument 1934 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1935 clone = newOwnerDocument.implementation.createDocumentType( 1936 node.name, node.publicId, node.systemId) 1937 clone.ownerDocument = newOwnerDocument 1938 if deep: 1939 clone.entities._seq = [] 1940 clone.notations._seq = [] 1941 for n in node.notations._seq: 1942 notation = Notation(n.nodeName, n.publicId, n.systemId) 1943 notation.ownerDocument = newOwnerDocument 1944 clone.notations._seq.append(notation) 1945 if hasattr(n, '_call_user_data_handler'): 1946 n._call_user_data_handler(operation, n, notation) 1947 for e in node.entities._seq: 1948 entity = Entity(e.nodeName, e.publicId, e.systemId, 1949 e.notationName) 1950 entity.actualEncoding = e.actualEncoding 1951 entity.encoding = e.encoding 1952 entity.version = e.version 1953 entity.ownerDocument = newOwnerDocument 1954 clone.entities._seq.append(entity) 1955 if hasattr(e, '_call_user_data_handler'): 1956 e._call_user_data_handler(operation, e, entity) 1957 else: 1958 # Note the cloning of Document and DocumentType nodes is 1959 # implementation specific. minidom handles those cases 1960 # directly in the cloneNode() methods. 1961 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1962 1963 # Check for _call_user_data_handler() since this could conceivably 1964 # used with other DOM implementations (one of the FourThought 1965 # DOMs, perhaps?). 1966 if hasattr(node, '_call_user_data_handler'): 1967 node._call_user_data_handler(operation, node, clone) 1968 return clone 1969 1970 1971def _nssplit(qualifiedName): 1972 fields = qualifiedName.split(':', 1) 1973 if len(fields) == 2: 1974 return fields 1975 else: 1976 return (None, fields[0]) 1977 1978 1979def _do_pulldom_parse(func, args, kwargs): 1980 events = func(*args, **kwargs) 1981 toktype, rootNode = events.getEvent() 1982 events.expandNode(rootNode) 1983 events.clear() 1984 return rootNode 1985 1986def parse(file, parser=None, bufsize=None): 1987 """Parse a file into a DOM by filename or file object.""" 1988 if parser is None and not bufsize: 1989 from xml.dom import expatbuilder 1990 return expatbuilder.parse(file) 1991 else: 1992 from xml.dom import pulldom 1993 return _do_pulldom_parse(pulldom.parse, (file,), 1994 {'parser': parser, 'bufsize': bufsize}) 1995 1996def parseString(string, parser=None): 1997 """Parse a file into a DOM from a string.""" 1998 if parser is None: 1999 from xml.dom import expatbuilder 2000 return expatbuilder.parseString(string) 2001 else: 2002 from xml.dom import pulldom 2003 return _do_pulldom_parse(pulldom.parseString, (string,), 2004 {'parser': parser}) 2005 2006def getDOMImplementation(features=None): 2007 if features: 2008 if isinstance(features, str): 2009 features = domreg._parse_feature_string(features) 2010 for f, v in features: 2011 if not Document.implementation.hasFeature(f, v): 2012 return None 2013 return Document.implementation 2014