1 package org.jsoup.nodes; 2 3 import org.jsoup.SerializationException; 4 import org.jsoup.helper.Validate; 5 import org.jsoup.internal.StringUtil; 6 import org.jsoup.select.NodeFilter; 7 import org.jsoup.select.NodeTraversor; 8 import org.jsoup.select.NodeVisitor; 9 import org.jspecify.annotations.Nullable; 10 11 import java.io.IOException; 12 import java.util.ArrayList; 13 import java.util.Arrays; 14 import java.util.Collections; 15 import java.util.Iterator; 16 import java.util.LinkedList; 17 import java.util.List; 18 import java.util.function.Consumer; 19 import java.util.stream.Stream; 20 21 /** 22 The base, abstract Node model. {@link Element}, {@link Document}, {@link Comment}, {@link TextNode}, et al., 23 are instances of Node. 24 25 @author Jonathan Hedley, [email protected] */ 26 public abstract class Node implements Cloneable { 27 static final List<Node> EmptyNodes = Collections.emptyList(); 28 static final String EmptyString = ""; 29 @Nullable Node parentNode; // Nodes don't always have parents 30 int siblingIndex; 31 32 /** 33 * Default constructor. Doesn't set up base uri, children, or attributes; use with caution. 34 */ Node()35 protected Node() { 36 } 37 38 /** 39 Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof). 40 @return node name 41 */ nodeName()42 public abstract String nodeName(); 43 44 /** 45 Get the normalized name of this node. For node types other than Element, this is the same as {@link #nodeName()}. 46 For an Element, will be the lower-cased tag name. 47 @return normalized node name 48 @since 1.15.4. 49 */ normalName()50 public String normalName() { 51 return nodeName(); 52 } 53 54 /** 55 Test if this node has the specified normalized name, in any namespace. 56 * @param normalName a normalized element name (e.g. {@code div}). 57 * @return true if the element's normal name matches exactly 58 * @since 1.17.2 59 */ nameIs(String normalName)60 public boolean nameIs(String normalName) { 61 return normalName().equals(normalName); 62 } 63 64 /** 65 Test if this node's parent has the specified normalized name. 66 * @param normalName a normalized name (e.g. {@code div}). 67 * @return true if the parent element's normal name matches exactly 68 * @since 1.17.2 69 */ parentNameIs(String normalName)70 public boolean parentNameIs(String normalName) { 71 return parentNode != null && parentNode.normalName().equals(normalName); 72 } 73 74 /** 75 Test if this node's parent is an Element with the specified normalized name and namespace. 76 * @param normalName a normalized element name (e.g. {@code div}). 77 * @param namespace the namespace 78 * @return true if the parent element's normal name matches exactly, and that element is in the specified namespace 79 * @since 1.17.2 80 */ parentElementIs(String normalName, String namespace)81 public boolean parentElementIs(String normalName, String namespace) { 82 return parentNode != null && parentNode instanceof Element 83 && ((Element) parentNode).elementIs(normalName, namespace); 84 } 85 86 /** 87 * Check if this Node has an actual Attributes object. 88 */ hasAttributes()89 protected abstract boolean hasAttributes(); 90 91 /** 92 Checks if this node has a parent. Nodes won't have parents if (e.g.) they are newly created and not added as a child 93 to an existing node, or if they are a {@link #shallowClone()}. In such cases, {@link #parent()} will return {@code null}. 94 @return if this node has a parent. 95 */ hasParent()96 public boolean hasParent() { 97 return parentNode != null; 98 } 99 100 /** 101 * Get an attribute's value by its key. <b>Case insensitive</b> 102 * <p> 103 * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs:</b></code>, 104 * which is a shortcut to the {@link #absUrl} method. 105 * </p> 106 * E.g.: 107 * <blockquote><code>String url = a.attr("abs:href");</code></blockquote> 108 * 109 * @param attributeKey The attribute key. 110 * @return The attribute, or empty string if not present (to avoid nulls). 111 * @see #attributes() 112 * @see #hasAttr(String) 113 * @see #absUrl(String) 114 */ attr(String attributeKey)115 public String attr(String attributeKey) { 116 Validate.notNull(attributeKey); 117 if (!hasAttributes()) 118 return EmptyString; 119 120 String val = attributes().getIgnoreCase(attributeKey); 121 if (val.length() > 0) 122 return val; 123 else if (attributeKey.startsWith("abs:")) 124 return absUrl(attributeKey.substring("abs:".length())); 125 else return ""; 126 } 127 128 /** 129 * Get each of the element's attributes. 130 * @return attributes (which implements iterable, in same order as presented in original HTML). 131 */ attributes()132 public abstract Attributes attributes(); 133 134 /** 135 Get the number of attributes that this Node has. 136 @return the number of attributes 137 @since 1.14.2 138 */ attributesSize()139 public int attributesSize() { 140 // added so that we can test how many attributes exist without implicitly creating the Attributes object 141 return hasAttributes() ? attributes().size() : 0; 142 } 143 144 /** 145 * Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is 146 * <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings. 147 * @param attributeKey The attribute key. 148 * @param attributeValue The attribute value. 149 * @return this (for chaining) 150 */ attr(String attributeKey, String attributeValue)151 public Node attr(String attributeKey, String attributeValue) { 152 attributeKey = NodeUtils.parser(this).settings().normalizeAttribute(attributeKey); 153 attributes().putIgnoreCase(attributeKey, attributeValue); 154 return this; 155 } 156 157 /** 158 * Test if this Node has an attribute. <b>Case insensitive</b>. 159 * @param attributeKey The attribute key to check. 160 * @return true if the attribute exists, false if not. 161 */ hasAttr(String attributeKey)162 public boolean hasAttr(String attributeKey) { 163 Validate.notNull(attributeKey); 164 if (!hasAttributes()) 165 return false; 166 167 if (attributeKey.startsWith("abs:")) { 168 String key = attributeKey.substring("abs:".length()); 169 if (attributes().hasKeyIgnoreCase(key) && !absUrl(key).isEmpty()) 170 return true; 171 } 172 return attributes().hasKeyIgnoreCase(attributeKey); 173 } 174 175 /** 176 * Remove an attribute from this node. 177 * @param attributeKey The attribute to remove. 178 * @return this (for chaining) 179 */ removeAttr(String attributeKey)180 public Node removeAttr(String attributeKey) { 181 Validate.notNull(attributeKey); 182 if (hasAttributes()) 183 attributes().removeIgnoreCase(attributeKey); 184 return this; 185 } 186 187 /** 188 * Clear (remove) each of the attributes in this node. 189 * @return this, for chaining 190 */ clearAttributes()191 public Node clearAttributes() { 192 if (hasAttributes()) { 193 Iterator<Attribute> it = attributes().iterator(); 194 while (it.hasNext()) { 195 it.next(); 196 it.remove(); 197 } 198 } 199 return this; 200 } 201 202 /** 203 Get the base URI that applies to this node. Will return an empty string if not defined. Used to make relative links 204 absolute. 205 206 @return base URI 207 @see #absUrl 208 */ baseUri()209 public abstract String baseUri(); 210 211 /** 212 * Set the baseUri for just this node (not its descendants), if this Node tracks base URIs. 213 * @param baseUri new URI 214 */ doSetBaseUri(String baseUri)215 protected abstract void doSetBaseUri(String baseUri); 216 217 /** 218 Update the base URI of this node and all of its descendants. 219 @param baseUri base URI to set 220 */ setBaseUri(final String baseUri)221 public void setBaseUri(final String baseUri) { 222 Validate.notNull(baseUri); 223 doSetBaseUri(baseUri); 224 } 225 226 /** 227 * Get an absolute URL from a URL attribute that may be relative (such as an <code><a href></code> or 228 * <code><img src></code>). 229 * <p> 230 * E.g.: <code>String absUrl = linkEl.absUrl("href");</code> 231 * </p> 232 * <p> 233 * If the attribute value is already absolute (i.e. it starts with a protocol, like 234 * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is 235 * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made 236 * absolute using that. 237 * </p> 238 * <p> 239 * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.: 240 * <code>String absUrl = linkEl.attr("abs:href");</code> 241 * </p> 242 * 243 * @param attributeKey The attribute key 244 * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or 245 * could not be made successfully into a URL. 246 * @see #attr 247 * @see java.net.URL#URL(java.net.URL, String) 248 */ absUrl(String attributeKey)249 public String absUrl(String attributeKey) { 250 Validate.notEmpty(attributeKey); 251 if (!(hasAttributes() && attributes().hasKeyIgnoreCase(attributeKey))) // not using hasAttr, so that we don't recurse down hasAttr->absUrl 252 return ""; 253 254 return StringUtil.resolve(baseUri(), attributes().getIgnoreCase(attributeKey)); 255 } 256 ensureChildNodes()257 protected abstract List<Node> ensureChildNodes(); 258 259 /** 260 Get a child node by its 0-based index. 261 @param index index of child node 262 @return the child node at this index. 263 @throws IndexOutOfBoundsException if the index is out of bounds. 264 */ childNode(int index)265 public Node childNode(int index) { 266 return ensureChildNodes().get(index); 267 } 268 269 /** 270 Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes 271 themselves can be manipulated. 272 @return list of children. If no children, returns an empty list. 273 */ childNodes()274 public List<Node> childNodes() { 275 if (childNodeSize() == 0) 276 return EmptyNodes; 277 278 List<Node> children = ensureChildNodes(); 279 List<Node> rewrap = new ArrayList<>(children.size()); // wrapped so that looping and moving will not throw a CME as the source changes 280 rewrap.addAll(children); 281 return Collections.unmodifiableList(rewrap); 282 } 283 284 /** 285 * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original 286 * nodes 287 * @return a deep copy of this node's children 288 */ childNodesCopy()289 public List<Node> childNodesCopy() { 290 final List<Node> nodes = ensureChildNodes(); 291 final ArrayList<Node> children = new ArrayList<>(nodes.size()); 292 for (Node node : nodes) { 293 children.add(node.clone()); 294 } 295 return children; 296 } 297 298 /** 299 * Get the number of child nodes that this node holds. 300 * @return the number of child nodes that this node holds. 301 */ childNodeSize()302 public abstract int childNodeSize(); 303 childNodesAsArray()304 protected Node[] childNodesAsArray() { 305 return ensureChildNodes().toArray(new Node[0]); 306 } 307 308 /** 309 * Delete all this node's children. 310 * @return this node, for chaining 311 */ empty()312 public abstract Node empty(); 313 314 315 /** 316 Gets this node's parent node. 317 @return parent node; or null if no parent. 318 @see #hasParent() 319 */ parent()320 public @Nullable Node parent() { 321 return parentNode; 322 } 323 324 /** 325 Gets this node's parent node. Not overridable by extending classes, so useful if you really just need the Node type. 326 @return parent node; or null if no parent. 327 */ parentNode()328 public @Nullable final Node parentNode() { 329 return parentNode; 330 } 331 332 /** 333 * Get this node's root node; that is, its topmost ancestor. If this node is the top ancestor, returns {@code this}. 334 * @return topmost ancestor. 335 */ root()336 public Node root() { 337 Node node = this; 338 while (node.parentNode != null) 339 node = node.parentNode; 340 return node; 341 } 342 343 /** 344 * Gets the Document associated with this Node. 345 * @return the Document associated with this Node, or null if there is no such Document. 346 */ ownerDocument()347 public @Nullable Document ownerDocument() { 348 Node root = root(); 349 return (root instanceof Document) ? (Document) root : null; 350 } 351 352 /** 353 * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. If this node is 354 * an orphan, nothing happens. 355 */ remove()356 public void remove() { 357 if (parentNode != null) 358 parentNode.removeChild(this); 359 } 360 361 /** 362 * Insert the specified HTML into the DOM before this node (as a preceding sibling). 363 * @param html HTML to add before this node 364 * @return this node, for chaining 365 * @see #after(String) 366 */ before(String html)367 public Node before(String html) { 368 addSiblingHtml(siblingIndex, html); 369 return this; 370 } 371 372 /** 373 * Insert the specified node into the DOM before this node (as a preceding sibling). 374 * @param node to add before this node 375 * @return this node, for chaining 376 * @see #after(Node) 377 */ before(Node node)378 public Node before(Node node) { 379 Validate.notNull(node); 380 Validate.notNull(parentNode); 381 382 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 383 if (node.parentNode == parentNode) node.remove(); 384 385 parentNode.addChildren(siblingIndex, node); 386 return this; 387 } 388 389 /** 390 * Insert the specified HTML into the DOM after this node (as a following sibling). 391 * @param html HTML to add after this node 392 * @return this node, for chaining 393 * @see #before(String) 394 */ after(String html)395 public Node after(String html) { 396 addSiblingHtml(siblingIndex + 1, html); 397 return this; 398 } 399 400 /** 401 * Insert the specified node into the DOM after this node (as a following sibling). 402 * @param node to add after this node 403 * @return this node, for chaining 404 * @see #before(Node) 405 */ after(Node node)406 public Node after(Node node) { 407 Validate.notNull(node); 408 Validate.notNull(parentNode); 409 410 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 411 if (node.parentNode == parentNode) node.remove(); 412 413 parentNode.addChildren(siblingIndex + 1, node); 414 return this; 415 } 416 addSiblingHtml(int index, String html)417 private void addSiblingHtml(int index, String html) { 418 Validate.notNull(html); 419 Validate.notNull(parentNode); 420 421 Element context = parent() instanceof Element ? (Element) parent() : null; 422 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 423 parentNode.addChildren(index, nodes.toArray(new Node[0])); 424 } 425 426 /** 427 Wrap the supplied HTML around this node. 428 429 @param html HTML to wrap around this node, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. If 430 the input HTML does not parse to a result starting with an Element, this will be a no-op. 431 @return this node, for chaining. 432 */ wrap(String html)433 public Node wrap(String html) { 434 Validate.notEmpty(html); 435 436 // Parse context - parent (because wrapping), this, or null 437 Element context = 438 parentNode != null && parentNode instanceof Element ? (Element) parentNode : 439 this instanceof Element ? (Element) this : 440 null; 441 List<Node> wrapChildren = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 442 Node wrapNode = wrapChildren.get(0); 443 if (!(wrapNode instanceof Element)) // nothing to wrap with; noop 444 return this; 445 446 Element wrap = (Element) wrapNode; 447 Element deepest = getDeepChild(wrap); 448 if (parentNode != null) 449 parentNode.replaceChild(this, wrap); 450 deepest.addChildren(this); // side effect of tricking wrapChildren to lose first 451 452 // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder 453 if (wrapChildren.size() > 0) { 454 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 455 for (int i = 0; i < wrapChildren.size(); i++) { 456 Node remainder = wrapChildren.get(i); 457 // if no parent, this could be the wrap node, so skip 458 if (wrap == remainder) 459 continue; 460 461 if (remainder.parentNode != null) 462 remainder.parentNode.removeChild(remainder); 463 wrap.after(remainder); 464 } 465 } 466 return this; 467 } 468 469 /** 470 * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping 471 * the node but keeping its children. 472 * <p> 473 * For example, with the input html: 474 * </p> 475 * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p> 476 * Calling {@code element.unwrap()} on the {@code span} element will result in the html: 477 * <p>{@code <div>One Two <b>Three</b></div>}</p> 478 * and the {@code "Two "} {@link TextNode} being returned. 479 * 480 * @return the first child of this node, after the node has been unwrapped. @{code Null} if the node had no children. 481 * @see #remove() 482 * @see #wrap(String) 483 */ unwrap()484 public @Nullable Node unwrap() { 485 Validate.notNull(parentNode); 486 Node firstChild = firstChild(); 487 parentNode.addChildren(siblingIndex, this.childNodesAsArray()); 488 this.remove(); 489 490 return firstChild; 491 } 492 getDeepChild(Element el)493 private Element getDeepChild(Element el) { 494 Element child = el.firstElementChild(); 495 while (child != null) { 496 el = child; 497 child = child.firstElementChild(); 498 } 499 return el; 500 } 501 nodelistChanged()502 void nodelistChanged() { 503 // Element overrides this to clear its shadow children elements 504 } 505 506 /** 507 * Replace this node in the DOM with the supplied node. 508 * @param in the node that will replace the existing node. 509 */ replaceWith(Node in)510 public void replaceWith(Node in) { 511 Validate.notNull(in); 512 Validate.notNull(parentNode); 513 parentNode.replaceChild(this, in); 514 } 515 setParentNode(Node parentNode)516 protected void setParentNode(Node parentNode) { 517 Validate.notNull(parentNode); 518 if (this.parentNode != null) 519 this.parentNode.removeChild(this); 520 this.parentNode = parentNode; 521 } 522 replaceChild(Node out, Node in)523 protected void replaceChild(Node out, Node in) { 524 Validate.isTrue(out.parentNode == this); 525 Validate.notNull(in); 526 if (out == in) return; // no-op self replacement 527 528 if (in.parentNode != null) 529 in.parentNode.removeChild(in); 530 531 final int index = out.siblingIndex; 532 ensureChildNodes().set(index, in); 533 in.parentNode = this; 534 in.setSiblingIndex(index); 535 out.parentNode = null; 536 } 537 removeChild(Node out)538 protected void removeChild(Node out) { 539 Validate.isTrue(out.parentNode == this); 540 final int index = out.siblingIndex; 541 ensureChildNodes().remove(index); 542 reindexChildren(index); 543 out.parentNode = null; 544 } 545 addChildren(Node... children)546 protected void addChildren(Node... children) { 547 //most used. short circuit addChildren(int), which hits reindex children and array copy 548 final List<Node> nodes = ensureChildNodes(); 549 550 for (Node child: children) { 551 reparentChild(child); 552 nodes.add(child); 553 child.setSiblingIndex(nodes.size()-1); 554 } 555 } 556 addChildren(int index, Node... children)557 protected void addChildren(int index, Node... children) { 558 Validate.notNull(children); 559 if (children.length == 0) { 560 return; 561 } 562 final List<Node> nodes = ensureChildNodes(); 563 564 // fast path - if used as a wrap (index=0, children = child[0].parent.children - do inplace 565 final Node firstParent = children[0].parent(); 566 if (firstParent != null && firstParent.childNodeSize() == children.length) { 567 boolean sameList = true; 568 final List<Node> firstParentNodes = firstParent.ensureChildNodes(); 569 // identity check contents to see if same 570 int i = children.length; 571 while (i-- > 0) { 572 if (children[i] != firstParentNodes.get(i)) { 573 sameList = false; 574 break; 575 } 576 } 577 if (sameList) { // moving, so OK to empty firstParent and short-circuit 578 boolean wasEmpty = childNodeSize() == 0; 579 firstParent.empty(); 580 nodes.addAll(index, Arrays.asList(children)); 581 i = children.length; 582 while (i-- > 0) { 583 children[i].parentNode = this; 584 } 585 if (!(wasEmpty && children[0].siblingIndex == 0)) // skip reindexing if we just moved 586 reindexChildren(index); 587 return; 588 } 589 } 590 591 Validate.noNullElements(children); 592 for (Node child : children) { 593 reparentChild(child); 594 } 595 nodes.addAll(index, Arrays.asList(children)); 596 reindexChildren(index); 597 } 598 reparentChild(Node child)599 protected void reparentChild(Node child) { 600 child.setParentNode(this); 601 } 602 reindexChildren(int start)603 private void reindexChildren(int start) { 604 final int size = childNodeSize(); 605 if (size == 0) return; 606 final List<Node> childNodes = ensureChildNodes(); 607 for (int i = start; i < size; i++) { 608 childNodes.get(i).setSiblingIndex(i); 609 } 610 } 611 612 /** 613 Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not 614 include this node (a node is not a sibling of itself). 615 @return node siblings. If the node has no parent, returns an empty list. 616 */ siblingNodes()617 public List<Node> siblingNodes() { 618 if (parentNode == null) 619 return Collections.emptyList(); 620 621 List<Node> nodes = parentNode.ensureChildNodes(); 622 List<Node> siblings = new ArrayList<>(nodes.size() - 1); 623 for (Node node: nodes) 624 if (node != this) 625 siblings.add(node); 626 return siblings; 627 } 628 629 /** 630 Get this node's next sibling. 631 @return next sibling, or {@code null} if this is the last sibling 632 */ nextSibling()633 public @Nullable Node nextSibling() { 634 if (parentNode == null) 635 return null; // root 636 637 final List<Node> siblings = parentNode.ensureChildNodes(); 638 final int index = siblingIndex+1; 639 if (siblings.size() > index) 640 return siblings.get(index); 641 else 642 return null; 643 } 644 645 /** 646 Get this node's previous sibling. 647 @return the previous sibling, or @{code null} if this is the first sibling 648 */ previousSibling()649 public @Nullable Node previousSibling() { 650 if (parentNode == null) 651 return null; // root 652 653 if (siblingIndex > 0) 654 return parentNode.ensureChildNodes().get(siblingIndex-1); 655 else 656 return null; 657 } 658 659 /** 660 * Get the list index of this node in its node sibling list. E.g. if this is the first node 661 * sibling, returns 0. 662 * @return position in node sibling list 663 * @see org.jsoup.nodes.Element#elementSiblingIndex() 664 */ siblingIndex()665 public int siblingIndex() { 666 return siblingIndex; 667 } 668 setSiblingIndex(int siblingIndex)669 protected void setSiblingIndex(int siblingIndex) { 670 this.siblingIndex = siblingIndex; 671 } 672 673 /** 674 Gets the first child node of this node, or {@code null} if there is none. This could be any Node type, such as an 675 Element, TextNode, Comment, etc. Use {@link Element#firstElementChild()} to get the first Element child. 676 @return the first child node, or null if there are no children. 677 @see Element#firstElementChild() 678 @see #lastChild() 679 @since 1.15.2 680 */ firstChild()681 public @Nullable Node firstChild() { 682 if (childNodeSize() == 0) return null; 683 return ensureChildNodes().get(0); 684 } 685 686 /** 687 Gets the last child node of this node, or {@code null} if there is none. 688 @return the last child node, or null if there are no children. 689 @see Element#lastElementChild() 690 @see #firstChild() 691 @since 1.15.2 692 */ lastChild()693 public @Nullable Node lastChild() { 694 final int size = childNodeSize(); 695 if (size == 0) return null; 696 List<Node> children = ensureChildNodes(); 697 return children.get(size - 1); 698 } 699 700 /** 701 * Perform a depth-first traversal through this node and its descendants. 702 * @param nodeVisitor the visitor callbacks to perform on each node 703 * @return this node, for chaining 704 */ traverse(NodeVisitor nodeVisitor)705 public Node traverse(NodeVisitor nodeVisitor) { 706 Validate.notNull(nodeVisitor); 707 NodeTraversor.traverse(nodeVisitor, this); 708 return this; 709 } 710 711 /** 712 Perform the supplied action on this Node and each of its descendants, during a depth-first traversal. Nodes may be 713 inspected, changed, added, replaced, or removed. 714 @param action the function to perform on the node 715 @return this Node, for chaining 716 @see Element#forEach(Consumer) 717 */ forEachNode(Consumer<? super Node> action)718 public Node forEachNode(Consumer<? super Node> action) { 719 Validate.notNull(action); 720 nodeStream().forEach(action); 721 return this; 722 } 723 724 /** 725 * Perform a depth-first filtered traversal through this node and its descendants. 726 * @param nodeFilter the filter callbacks to perform on each node 727 * @return this node, for chaining 728 */ filter(NodeFilter nodeFilter)729 public Node filter(NodeFilter nodeFilter) { 730 Validate.notNull(nodeFilter); 731 NodeTraversor.filter(nodeFilter, this); 732 return this; 733 } 734 735 /** 736 Returns a Stream of this Node and all of its descendant Nodes. The stream has document order. 737 @return a stream of all nodes. 738 @see Element#stream() 739 @since 1.17.1 740 */ nodeStream()741 public Stream<Node> nodeStream() { 742 return NodeUtils.stream(this, Node.class); 743 } 744 745 /** 746 Returns a Stream of this and descendant nodes, containing only nodes of the specified type. The stream has document 747 order. 748 @return a stream of nodes filtered by type. 749 @see Element#stream() 750 @since 1.17.1 751 */ nodeStream(Class<T> type)752 public <T extends Node> Stream<T> nodeStream(Class<T> type) { 753 return NodeUtils.stream(this, type); 754 } 755 756 /** 757 Get the outer HTML of this node. For example, on a {@code p} element, may return {@code <p>Para</p>}. 758 @return outer HTML 759 @see Element#html() 760 @see Element#text() 761 */ outerHtml()762 public String outerHtml() { 763 StringBuilder accum = StringUtil.borrowBuilder(); 764 outerHtml(accum); 765 return StringUtil.releaseBuilder(accum); 766 } 767 outerHtml(Appendable accum)768 protected void outerHtml(Appendable accum) { 769 NodeTraversor.traverse(new OuterHtmlVisitor(accum, NodeUtils.outputSettings(this)), this); 770 } 771 772 /** 773 Get the outer HTML of this node. 774 @param accum accumulator to place HTML into 775 @throws IOException if appending to the given accumulator fails. 776 */ outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out)777 abstract void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException; 778 outerHtmlTail(final Appendable accum, int depth, final Document.OutputSettings out)779 abstract void outerHtmlTail(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException; 780 781 /** 782 * Write this node and its children to the given {@link Appendable}. 783 * 784 * @param appendable the {@link Appendable} to write to. 785 * @return the supplied {@link Appendable}, for chaining. 786 */ html(T appendable)787 public <T extends Appendable> T html(T appendable) { 788 outerHtml(appendable); 789 return appendable; 790 } 791 792 /** 793 Get the source range (start and end positions) in the original input source from which this node was parsed. 794 Position tracking must be enabled prior to parsing the content. For an Element, this will be the positions of the 795 start tag. 796 @return the range for the start of the node, or {@code untracked} if its range was not tracked. 797 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 798 @see Range#isImplicit() 799 @see Element#endSourceRange() 800 @see Attributes#sourceRange(String name) 801 @since 1.15.2 802 */ sourceRange()803 public Range sourceRange() { 804 return Range.of(this, true); 805 } 806 807 /** Test if this node is the first child, or first following blank text. */ isEffectivelyFirst()808 final boolean isEffectivelyFirst() { 809 if (siblingIndex == 0) return true; 810 if (siblingIndex == 1) { 811 final Node prev = previousSibling(); 812 return prev instanceof TextNode && (((TextNode) prev).isBlank()); 813 } 814 return false; 815 } 816 817 /** 818 * Gets this node's outer HTML. 819 * @return outer HTML. 820 * @see #outerHtml() 821 */ toString()822 public String toString() { 823 return outerHtml(); 824 } 825 indent(Appendable accum, int depth, Document.OutputSettings out)826 protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 827 accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth())); 828 } 829 830 /** 831 * Check if this node is the same instance of another (object identity test). 832 * <p>For an node value equality check, see {@link #hasSameValue(Object)}</p> 833 * @param o other object to compare to 834 * @return true if the content of this node is the same as the other 835 * @see Node#hasSameValue(Object) 836 */ 837 @Override equals(@ullable Object o)838 public boolean equals(@Nullable Object o) { 839 // implemented just so that javadoc is clear this is an identity test 840 return this == o; 841 } 842 843 /** 844 Provides a hashCode for this Node, based on its object identity. Changes to the Node's content will not impact the 845 result. 846 @return an object identity based hashcode for this Node 847 */ 848 @Override hashCode()849 public int hashCode() { 850 // implemented so that javadoc and scanners are clear this is an identity test 851 return super.hashCode(); 852 } 853 854 /** 855 * Check if this node has the same content as another node. A node is considered the same if its name, attributes and content match the 856 * other node; particularly its position in the tree does not influence its similarity. 857 * @param o other object to compare to 858 * @return true if the content of this node is the same as the other 859 */ hasSameValue(@ullable Object o)860 public boolean hasSameValue(@Nullable Object o) { 861 if (this == o) return true; 862 if (o == null || getClass() != o.getClass()) return false; 863 864 return this.outerHtml().equals(((Node) o).outerHtml()); 865 } 866 867 /** 868 * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or 869 * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the 870 * original node. 871 * <p> 872 * The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}. 873 * @return a stand-alone cloned node, including clones of any children 874 * @see #shallowClone() 875 */ 876 @SuppressWarnings("MethodDoesntCallSuperMethod") // because it does call super.clone in doClone - analysis just isn't following 877 @Override clone()878 public Node clone() { 879 Node thisClone = doClone(null); // splits for orphan 880 881 // Queue up nodes that need their children cloned (BFS). 882 final LinkedList<Node> nodesToProcess = new LinkedList<>(); 883 nodesToProcess.add(thisClone); 884 885 while (!nodesToProcess.isEmpty()) { 886 Node currParent = nodesToProcess.remove(); 887 888 final int size = currParent.childNodeSize(); 889 for (int i = 0; i < size; i++) { 890 final List<Node> childNodes = currParent.ensureChildNodes(); 891 Node childClone = childNodes.get(i).doClone(currParent); 892 childNodes.set(i, childClone); 893 nodesToProcess.add(childClone); 894 } 895 } 896 897 return thisClone; 898 } 899 900 /** 901 * Create a stand-alone, shallow copy of this node. None of its children (if any) will be cloned, and it will have 902 * no parent or sibling nodes. 903 * @return a single independent copy of this node 904 * @see #clone() 905 */ shallowClone()906 public Node shallowClone() { 907 return doClone(null); 908 } 909 910 /* 911 * Return a clone of the node using the given parent (which can be null). 912 * Not a deep copy of children. 913 */ doClone(@ullable Node parent)914 protected Node doClone(@Nullable Node parent) { 915 Node clone; 916 917 try { 918 clone = (Node) super.clone(); 919 } catch (CloneNotSupportedException e) { 920 throw new RuntimeException(e); 921 } 922 923 clone.parentNode = parent; // can be null, to create an orphan split 924 clone.siblingIndex = parent == null ? 0 : siblingIndex; 925 // if not keeping the parent, shallowClone the ownerDocument to preserve its settings 926 if (parent == null && !(this instanceof Document)) { 927 Document doc = ownerDocument(); 928 if (doc != null) { 929 Document docClone = doc.shallowClone(); 930 clone.parentNode = docClone; 931 docClone.ensureChildNodes().add(clone); 932 } 933 } 934 935 return clone; 936 } 937 938 private static class OuterHtmlVisitor implements NodeVisitor { 939 private final Appendable accum; 940 private final Document.OutputSettings out; 941 OuterHtmlVisitor(Appendable accum, Document.OutputSettings out)942 OuterHtmlVisitor(Appendable accum, Document.OutputSettings out) { 943 this.accum = accum; 944 this.out = out; 945 out.prepareEncoder(); 946 } 947 head(Node node, int depth)948 public void head(Node node, int depth) { 949 try { 950 node.outerHtmlHead(accum, depth, out); 951 } catch (IOException exception) { 952 throw new SerializationException(exception); 953 } 954 } 955 tail(Node node, int depth)956 public void tail(Node node, int depth) { 957 if (!node.nodeName().equals("#text")) { // saves a void hit. 958 try { 959 node.outerHtmlTail(accum, depth, out); 960 } catch (IOException exception) { 961 throw new SerializationException(exception); 962 } 963 } 964 } 965 } 966 } 967