xref: /aosp_15_r20/external/jsoup/src/main/java/org/jsoup/nodes/Node.java (revision 6da8f8c4bc310ad659121b84dd089062417a2ce2)
1 package org.jsoup.nodes;
2 
3 import org.jsoup.SerializationException;
4 import org.jsoup.helper.Validate;
5 import org.jsoup.internal.StringUtil;
6 import org.jsoup.select.NodeFilter;
7 import org.jsoup.select.NodeTraversor;
8 import org.jsoup.select.NodeVisitor;
9 import org.jspecify.annotations.Nullable;
10 
11 import java.io.IOException;
12 import java.util.ArrayList;
13 import java.util.Arrays;
14 import java.util.Collections;
15 import java.util.Iterator;
16 import java.util.LinkedList;
17 import java.util.List;
18 import java.util.function.Consumer;
19 import java.util.stream.Stream;
20 
21 /**
22  The base, abstract Node model. {@link Element}, {@link Document}, {@link Comment}, {@link TextNode}, et al.,
23  are instances of Node.
24 
25  @author Jonathan Hedley, [email protected] */
26 public abstract class Node implements Cloneable {
27     static final List<Node> EmptyNodes = Collections.emptyList();
28     static final String EmptyString = "";
29     @Nullable Node parentNode; // Nodes don't always have parents
30     int siblingIndex;
31 
32     /**
33      * Default constructor. Doesn't set up base uri, children, or attributes; use with caution.
34      */
Node()35     protected Node() {
36     }
37 
38     /**
39      Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
40      @return node name
41      */
nodeName()42     public abstract String nodeName();
43 
44     /**
45      Get the normalized name of this node. For node types other than Element, this is the same as {@link #nodeName()}.
46      For an Element, will be the lower-cased tag name.
47      @return normalized node name
48      @since 1.15.4.
49      */
normalName()50     public String normalName() {
51         return nodeName();
52     }
53 
54     /**
55      Test if this node has the specified normalized name, in any namespace.
56      * @param normalName a normalized element name (e.g. {@code div}).
57      * @return true if the element's normal name matches exactly
58      * @since 1.17.2
59      */
nameIs(String normalName)60     public boolean nameIs(String normalName) {
61         return normalName().equals(normalName);
62     }
63 
64     /**
65      Test if this node's parent has the specified normalized name.
66      * @param normalName a normalized name (e.g. {@code div}).
67      * @return true if the parent element's normal name matches exactly
68      * @since 1.17.2
69      */
parentNameIs(String normalName)70     public boolean parentNameIs(String normalName) {
71         return parentNode != null && parentNode.normalName().equals(normalName);
72     }
73 
74     /**
75      Test if this node's parent is an Element with the specified normalized name and namespace.
76      * @param normalName a normalized element name (e.g. {@code div}).
77      * @param namespace the namespace
78      * @return true if the parent element's normal name matches exactly, and that element is in the specified namespace
79      * @since 1.17.2
80      */
parentElementIs(String normalName, String namespace)81     public boolean parentElementIs(String normalName, String namespace) {
82         return parentNode != null && parentNode instanceof Element
83             && ((Element) parentNode).elementIs(normalName, namespace);
84     }
85 
86     /**
87      * Check if this Node has an actual Attributes object.
88      */
hasAttributes()89     protected abstract boolean hasAttributes();
90 
91     /**
92      Checks if this node has a parent. Nodes won't have parents if (e.g.) they are newly created and not added as a child
93      to an existing node, or if they are a {@link #shallowClone()}. In such cases, {@link #parent()} will return {@code null}.
94      @return if this node has a parent.
95      */
hasParent()96     public boolean hasParent() {
97         return parentNode != null;
98     }
99 
100     /**
101      * Get an attribute's value by its key. <b>Case insensitive</b>
102      * <p>
103      * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs:</b></code>,
104      * which is a shortcut to the {@link #absUrl} method.
105      * </p>
106      * E.g.:
107      * <blockquote><code>String url = a.attr("abs:href");</code></blockquote>
108      *
109      * @param attributeKey The attribute key.
110      * @return The attribute, or empty string if not present (to avoid nulls).
111      * @see #attributes()
112      * @see #hasAttr(String)
113      * @see #absUrl(String)
114      */
attr(String attributeKey)115     public String attr(String attributeKey) {
116         Validate.notNull(attributeKey);
117         if (!hasAttributes())
118             return EmptyString;
119 
120         String val = attributes().getIgnoreCase(attributeKey);
121         if (val.length() > 0)
122             return val;
123         else if (attributeKey.startsWith("abs:"))
124             return absUrl(attributeKey.substring("abs:".length()));
125         else return "";
126     }
127 
128     /**
129      * Get each of the element's attributes.
130      * @return attributes (which implements iterable, in same order as presented in original HTML).
131      */
attributes()132     public abstract Attributes attributes();
133 
134     /**
135      Get the number of attributes that this Node has.
136      @return the number of attributes
137      @since 1.14.2
138      */
attributesSize()139     public int attributesSize() {
140         // added so that we can test how many attributes exist without implicitly creating the Attributes object
141         return hasAttributes() ? attributes().size() : 0;
142     }
143 
144     /**
145      * Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is
146      * <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings.
147      * @param attributeKey The attribute key.
148      * @param attributeValue The attribute value.
149      * @return this (for chaining)
150      */
attr(String attributeKey, String attributeValue)151     public Node attr(String attributeKey, String attributeValue) {
152         attributeKey = NodeUtils.parser(this).settings().normalizeAttribute(attributeKey);
153         attributes().putIgnoreCase(attributeKey, attributeValue);
154         return this;
155     }
156 
157     /**
158      * Test if this Node has an attribute. <b>Case insensitive</b>.
159      * @param attributeKey The attribute key to check.
160      * @return true if the attribute exists, false if not.
161      */
hasAttr(String attributeKey)162     public boolean hasAttr(String attributeKey) {
163         Validate.notNull(attributeKey);
164         if (!hasAttributes())
165             return false;
166 
167         if (attributeKey.startsWith("abs:")) {
168             String key = attributeKey.substring("abs:".length());
169             if (attributes().hasKeyIgnoreCase(key) && !absUrl(key).isEmpty())
170                 return true;
171         }
172         return attributes().hasKeyIgnoreCase(attributeKey);
173     }
174 
175     /**
176      * Remove an attribute from this node.
177      * @param attributeKey The attribute to remove.
178      * @return this (for chaining)
179      */
removeAttr(String attributeKey)180     public Node removeAttr(String attributeKey) {
181         Validate.notNull(attributeKey);
182         if (hasAttributes())
183             attributes().removeIgnoreCase(attributeKey);
184         return this;
185     }
186 
187     /**
188      * Clear (remove) each of the attributes in this node.
189      * @return this, for chaining
190      */
clearAttributes()191     public Node clearAttributes() {
192         if (hasAttributes()) {
193             Iterator<Attribute> it = attributes().iterator();
194             while (it.hasNext()) {
195                 it.next();
196                 it.remove();
197             }
198         }
199         return this;
200     }
201 
202     /**
203      Get the base URI that applies to this node. Will return an empty string if not defined. Used to make relative links
204      absolute.
205 
206      @return base URI
207      @see #absUrl
208      */
baseUri()209     public abstract String baseUri();
210 
211     /**
212      * Set the baseUri for just this node (not its descendants), if this Node tracks base URIs.
213      * @param baseUri new URI
214      */
doSetBaseUri(String baseUri)215     protected abstract void doSetBaseUri(String baseUri);
216 
217     /**
218      Update the base URI of this node and all of its descendants.
219      @param baseUri base URI to set
220      */
setBaseUri(final String baseUri)221     public void setBaseUri(final String baseUri) {
222         Validate.notNull(baseUri);
223         doSetBaseUri(baseUri);
224     }
225 
226     /**
227      * Get an absolute URL from a URL attribute that may be relative (such as an <code>&lt;a href&gt;</code> or
228      * <code>&lt;img src&gt;</code>).
229      * <p>
230      * E.g.: <code>String absUrl = linkEl.absUrl("href");</code>
231      * </p>
232      * <p>
233      * If the attribute value is already absolute (i.e. it starts with a protocol, like
234      * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is
235      * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made
236      * absolute using that.
237      * </p>
238      * <p>
239      * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.:
240      * <code>String absUrl = linkEl.attr("abs:href");</code>
241      * </p>
242      *
243      * @param attributeKey The attribute key
244      * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or
245      * could not be made successfully into a URL.
246      * @see #attr
247      * @see java.net.URL#URL(java.net.URL, String)
248      */
absUrl(String attributeKey)249     public String absUrl(String attributeKey) {
250         Validate.notEmpty(attributeKey);
251         if (!(hasAttributes() && attributes().hasKeyIgnoreCase(attributeKey))) // not using hasAttr, so that we don't recurse down hasAttr->absUrl
252             return "";
253 
254         return StringUtil.resolve(baseUri(), attributes().getIgnoreCase(attributeKey));
255     }
256 
ensureChildNodes()257     protected abstract List<Node> ensureChildNodes();
258 
259     /**
260      Get a child node by its 0-based index.
261      @param index index of child node
262      @return the child node at this index.
263      @throws IndexOutOfBoundsException if the index is out of bounds.
264      */
childNode(int index)265     public Node childNode(int index) {
266         return ensureChildNodes().get(index);
267     }
268 
269     /**
270      Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes
271      themselves can be manipulated.
272      @return list of children. If no children, returns an empty list.
273      */
childNodes()274     public List<Node> childNodes() {
275         if (childNodeSize() == 0)
276             return EmptyNodes;
277 
278         List<Node> children = ensureChildNodes();
279         List<Node> rewrap = new ArrayList<>(children.size()); // wrapped so that looping and moving will not throw a CME as the source changes
280         rewrap.addAll(children);
281         return Collections.unmodifiableList(rewrap);
282     }
283 
284     /**
285      * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original
286      * nodes
287      * @return a deep copy of this node's children
288      */
childNodesCopy()289     public List<Node> childNodesCopy() {
290         final List<Node> nodes = ensureChildNodes();
291         final ArrayList<Node> children = new ArrayList<>(nodes.size());
292         for (Node node : nodes) {
293             children.add(node.clone());
294         }
295         return children;
296     }
297 
298     /**
299      * Get the number of child nodes that this node holds.
300      * @return the number of child nodes that this node holds.
301      */
childNodeSize()302     public abstract int childNodeSize();
303 
childNodesAsArray()304     protected Node[] childNodesAsArray() {
305         return ensureChildNodes().toArray(new Node[0]);
306     }
307 
308     /**
309      * Delete all this node's children.
310      * @return this node, for chaining
311      */
empty()312     public abstract Node empty();
313 
314 
315     /**
316      Gets this node's parent node.
317      @return parent node; or null if no parent.
318      @see #hasParent()
319      */
parent()320     public @Nullable Node parent() {
321         return parentNode;
322     }
323 
324     /**
325      Gets this node's parent node. Not overridable by extending classes, so useful if you really just need the Node type.
326      @return parent node; or null if no parent.
327      */
parentNode()328     public @Nullable final Node parentNode() {
329         return parentNode;
330     }
331 
332     /**
333      * Get this node's root node; that is, its topmost ancestor. If this node is the top ancestor, returns {@code this}.
334      * @return topmost ancestor.
335      */
root()336     public Node root() {
337         Node node = this;
338         while (node.parentNode != null)
339             node = node.parentNode;
340         return node;
341     }
342 
343     /**
344      * Gets the Document associated with this Node.
345      * @return the Document associated with this Node, or null if there is no such Document.
346      */
ownerDocument()347     public @Nullable Document ownerDocument() {
348         Node root = root();
349         return (root instanceof Document) ? (Document) root : null;
350     }
351 
352     /**
353      * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. If this node is
354      * an orphan, nothing happens.
355      */
remove()356     public void remove() {
357         if (parentNode != null)
358             parentNode.removeChild(this);
359     }
360 
361     /**
362      * Insert the specified HTML into the DOM before this node (as a preceding sibling).
363      * @param html HTML to add before this node
364      * @return this node, for chaining
365      * @see #after(String)
366      */
before(String html)367     public Node before(String html) {
368         addSiblingHtml(siblingIndex, html);
369         return this;
370     }
371 
372     /**
373      * Insert the specified node into the DOM before this node (as a preceding sibling).
374      * @param node to add before this node
375      * @return this node, for chaining
376      * @see #after(Node)
377      */
before(Node node)378     public Node before(Node node) {
379         Validate.notNull(node);
380         Validate.notNull(parentNode);
381 
382         // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add
383         if (node.parentNode == parentNode) node.remove();
384 
385         parentNode.addChildren(siblingIndex, node);
386         return this;
387     }
388 
389     /**
390      * Insert the specified HTML into the DOM after this node (as a following sibling).
391      * @param html HTML to add after this node
392      * @return this node, for chaining
393      * @see #before(String)
394      */
after(String html)395     public Node after(String html) {
396         addSiblingHtml(siblingIndex + 1, html);
397         return this;
398     }
399 
400     /**
401      * Insert the specified node into the DOM after this node (as a following sibling).
402      * @param node to add after this node
403      * @return this node, for chaining
404      * @see #before(Node)
405      */
after(Node node)406     public Node after(Node node) {
407         Validate.notNull(node);
408         Validate.notNull(parentNode);
409 
410         // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add
411         if (node.parentNode == parentNode) node.remove();
412 
413         parentNode.addChildren(siblingIndex + 1, node);
414         return this;
415     }
416 
addSiblingHtml(int index, String html)417     private void addSiblingHtml(int index, String html) {
418         Validate.notNull(html);
419         Validate.notNull(parentNode);
420 
421         Element context = parent() instanceof Element ? (Element) parent() : null;
422         List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri());
423         parentNode.addChildren(index, nodes.toArray(new Node[0]));
424     }
425 
426     /**
427      Wrap the supplied HTML around this node.
428 
429      @param html HTML to wrap around this node, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. If
430      the input HTML does not parse to a result starting with an Element, this will be a no-op.
431      @return this node, for chaining.
432      */
wrap(String html)433     public Node wrap(String html) {
434         Validate.notEmpty(html);
435 
436         // Parse context - parent (because wrapping), this, or null
437         Element context =
438             parentNode != null && parentNode instanceof Element ? (Element) parentNode :
439                 this instanceof Element ? (Element) this :
440                     null;
441         List<Node> wrapChildren = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri());
442         Node wrapNode = wrapChildren.get(0);
443         if (!(wrapNode instanceof Element)) // nothing to wrap with; noop
444             return this;
445 
446         Element wrap = (Element) wrapNode;
447         Element deepest = getDeepChild(wrap);
448         if (parentNode != null)
449             parentNode.replaceChild(this, wrap);
450         deepest.addChildren(this); // side effect of tricking wrapChildren to lose first
451 
452         // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder
453         if (wrapChildren.size() > 0) {
454             //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
455             for (int i = 0; i < wrapChildren.size(); i++) {
456                 Node remainder = wrapChildren.get(i);
457                 // if no parent, this could be the wrap node, so skip
458                 if (wrap == remainder)
459                     continue;
460 
461                 if (remainder.parentNode != null)
462                     remainder.parentNode.removeChild(remainder);
463                 wrap.after(remainder);
464             }
465         }
466         return this;
467     }
468 
469     /**
470      * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping
471      * the node but keeping its children.
472      * <p>
473      * For example, with the input html:
474      * </p>
475      * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p>
476      * Calling {@code element.unwrap()} on the {@code span} element will result in the html:
477      * <p>{@code <div>One Two <b>Three</b></div>}</p>
478      * and the {@code "Two "} {@link TextNode} being returned.
479      *
480      * @return the first child of this node, after the node has been unwrapped. @{code Null} if the node had no children.
481      * @see #remove()
482      * @see #wrap(String)
483      */
unwrap()484     public @Nullable Node unwrap() {
485         Validate.notNull(parentNode);
486         Node firstChild = firstChild();
487         parentNode.addChildren(siblingIndex, this.childNodesAsArray());
488         this.remove();
489 
490         return firstChild;
491     }
492 
getDeepChild(Element el)493     private Element getDeepChild(Element el) {
494         Element child = el.firstElementChild();
495         while (child != null) {
496             el = child;
497             child = child.firstElementChild();
498         }
499         return el;
500     }
501 
nodelistChanged()502     void nodelistChanged() {
503         // Element overrides this to clear its shadow children elements
504     }
505 
506     /**
507      * Replace this node in the DOM with the supplied node.
508      * @param in the node that will replace the existing node.
509      */
replaceWith(Node in)510     public void replaceWith(Node in) {
511         Validate.notNull(in);
512         Validate.notNull(parentNode);
513         parentNode.replaceChild(this, in);
514     }
515 
setParentNode(Node parentNode)516     protected void setParentNode(Node parentNode) {
517         Validate.notNull(parentNode);
518         if (this.parentNode != null)
519             this.parentNode.removeChild(this);
520         this.parentNode = parentNode;
521     }
522 
replaceChild(Node out, Node in)523     protected void replaceChild(Node out, Node in) {
524         Validate.isTrue(out.parentNode == this);
525         Validate.notNull(in);
526         if (out == in) return; // no-op self replacement
527 
528         if (in.parentNode != null)
529             in.parentNode.removeChild(in);
530 
531         final int index = out.siblingIndex;
532         ensureChildNodes().set(index, in);
533         in.parentNode = this;
534         in.setSiblingIndex(index);
535         out.parentNode = null;
536     }
537 
removeChild(Node out)538     protected void removeChild(Node out) {
539         Validate.isTrue(out.parentNode == this);
540         final int index = out.siblingIndex;
541         ensureChildNodes().remove(index);
542         reindexChildren(index);
543         out.parentNode = null;
544     }
545 
addChildren(Node... children)546     protected void addChildren(Node... children) {
547         //most used. short circuit addChildren(int), which hits reindex children and array copy
548         final List<Node> nodes = ensureChildNodes();
549 
550         for (Node child: children) {
551             reparentChild(child);
552             nodes.add(child);
553             child.setSiblingIndex(nodes.size()-1);
554         }
555     }
556 
addChildren(int index, Node... children)557     protected void addChildren(int index, Node... children) {
558         Validate.notNull(children);
559         if (children.length == 0) {
560             return;
561         }
562         final List<Node> nodes = ensureChildNodes();
563 
564         // fast path - if used as a wrap (index=0, children = child[0].parent.children - do inplace
565         final Node firstParent = children[0].parent();
566         if (firstParent != null && firstParent.childNodeSize() == children.length) {
567             boolean sameList = true;
568             final List<Node> firstParentNodes = firstParent.ensureChildNodes();
569             // identity check contents to see if same
570             int i = children.length;
571             while (i-- > 0) {
572                 if (children[i] != firstParentNodes.get(i)) {
573                     sameList = false;
574                     break;
575                 }
576             }
577             if (sameList) { // moving, so OK to empty firstParent and short-circuit
578                 boolean wasEmpty = childNodeSize() == 0;
579                 firstParent.empty();
580                 nodes.addAll(index, Arrays.asList(children));
581                 i = children.length;
582                 while (i-- > 0) {
583                     children[i].parentNode = this;
584                 }
585                 if (!(wasEmpty && children[0].siblingIndex == 0)) // skip reindexing if we just moved
586                     reindexChildren(index);
587                 return;
588             }
589         }
590 
591         Validate.noNullElements(children);
592         for (Node child : children) {
593             reparentChild(child);
594         }
595         nodes.addAll(index, Arrays.asList(children));
596         reindexChildren(index);
597     }
598 
reparentChild(Node child)599     protected void reparentChild(Node child) {
600         child.setParentNode(this);
601     }
602 
reindexChildren(int start)603     private void reindexChildren(int start) {
604         final int size = childNodeSize();
605         if (size == 0) return;
606         final List<Node> childNodes = ensureChildNodes();
607         for (int i = start; i < size; i++) {
608             childNodes.get(i).setSiblingIndex(i);
609         }
610     }
611 
612     /**
613      Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not
614      include this node (a node is not a sibling of itself).
615      @return node siblings. If the node has no parent, returns an empty list.
616      */
siblingNodes()617     public List<Node> siblingNodes() {
618         if (parentNode == null)
619             return Collections.emptyList();
620 
621         List<Node> nodes = parentNode.ensureChildNodes();
622         List<Node> siblings = new ArrayList<>(nodes.size() - 1);
623         for (Node node: nodes)
624             if (node != this)
625                 siblings.add(node);
626         return siblings;
627     }
628 
629     /**
630      Get this node's next sibling.
631      @return next sibling, or {@code null} if this is the last sibling
632      */
nextSibling()633     public @Nullable Node nextSibling() {
634         if (parentNode == null)
635             return null; // root
636 
637         final List<Node> siblings = parentNode.ensureChildNodes();
638         final int index = siblingIndex+1;
639         if (siblings.size() > index)
640             return siblings.get(index);
641         else
642             return null;
643     }
644 
645     /**
646      Get this node's previous sibling.
647      @return the previous sibling, or @{code null} if this is the first sibling
648      */
previousSibling()649     public @Nullable Node previousSibling() {
650         if (parentNode == null)
651             return null; // root
652 
653         if (siblingIndex > 0)
654             return parentNode.ensureChildNodes().get(siblingIndex-1);
655         else
656             return null;
657     }
658 
659     /**
660      * Get the list index of this node in its node sibling list. E.g. if this is the first node
661      * sibling, returns 0.
662      * @return position in node sibling list
663      * @see org.jsoup.nodes.Element#elementSiblingIndex()
664      */
siblingIndex()665     public int siblingIndex() {
666         return siblingIndex;
667     }
668 
setSiblingIndex(int siblingIndex)669     protected void setSiblingIndex(int siblingIndex) {
670         this.siblingIndex = siblingIndex;
671     }
672 
673     /**
674      Gets the first child node of this node, or {@code null} if there is none. This could be any Node type, such as an
675      Element, TextNode, Comment, etc. Use {@link Element#firstElementChild()} to get the first Element child.
676      @return the first child node, or null if there are no children.
677      @see Element#firstElementChild()
678      @see #lastChild()
679      @since 1.15.2
680      */
firstChild()681     public @Nullable Node firstChild() {
682         if (childNodeSize() == 0) return null;
683         return ensureChildNodes().get(0);
684     }
685 
686     /**
687      Gets the last child node of this node, or {@code null} if there is none.
688      @return the last child node, or null if there are no children.
689      @see Element#lastElementChild()
690      @see #firstChild()
691      @since 1.15.2
692      */
lastChild()693     public @Nullable Node lastChild() {
694         final int size = childNodeSize();
695         if (size == 0) return null;
696         List<Node> children = ensureChildNodes();
697         return children.get(size - 1);
698     }
699 
700     /**
701      * Perform a depth-first traversal through this node and its descendants.
702      * @param nodeVisitor the visitor callbacks to perform on each node
703      * @return this node, for chaining
704      */
traverse(NodeVisitor nodeVisitor)705     public Node traverse(NodeVisitor nodeVisitor) {
706         Validate.notNull(nodeVisitor);
707         NodeTraversor.traverse(nodeVisitor, this);
708         return this;
709     }
710 
711     /**
712      Perform the supplied action on this Node and each of its descendants, during a depth-first traversal. Nodes may be
713      inspected, changed, added, replaced, or removed.
714      @param action the function to perform on the node
715      @return this Node, for chaining
716      @see Element#forEach(Consumer)
717      */
forEachNode(Consumer<? super Node> action)718     public Node forEachNode(Consumer<? super Node> action) {
719         Validate.notNull(action);
720         nodeStream().forEach(action);
721         return this;
722     }
723 
724     /**
725      * Perform a depth-first filtered traversal through this node and its descendants.
726      * @param nodeFilter the filter callbacks to perform on each node
727      * @return this node, for chaining
728      */
filter(NodeFilter nodeFilter)729     public Node filter(NodeFilter nodeFilter) {
730         Validate.notNull(nodeFilter);
731         NodeTraversor.filter(nodeFilter, this);
732         return this;
733     }
734 
735     /**
736      Returns a Stream of this Node and all of its descendant Nodes. The stream has document order.
737      @return a stream of all nodes.
738      @see Element#stream()
739      @since 1.17.1
740      */
nodeStream()741     public Stream<Node> nodeStream() {
742         return NodeUtils.stream(this, Node.class);
743     }
744 
745     /**
746      Returns a Stream of this and descendant nodes, containing only nodes of the specified type. The stream has document
747      order.
748      @return a stream of nodes filtered by type.
749      @see Element#stream()
750      @since 1.17.1
751      */
nodeStream(Class<T> type)752     public <T extends Node> Stream<T> nodeStream(Class<T> type) {
753         return NodeUtils.stream(this, type);
754     }
755 
756     /**
757      Get the outer HTML of this node. For example, on a {@code p} element, may return {@code <p>Para</p>}.
758      @return outer HTML
759      @see Element#html()
760      @see Element#text()
761      */
outerHtml()762     public String outerHtml() {
763         StringBuilder accum = StringUtil.borrowBuilder();
764         outerHtml(accum);
765         return StringUtil.releaseBuilder(accum);
766     }
767 
outerHtml(Appendable accum)768     protected void outerHtml(Appendable accum) {
769         NodeTraversor.traverse(new OuterHtmlVisitor(accum, NodeUtils.outputSettings(this)), this);
770     }
771 
772     /**
773      Get the outer HTML of this node.
774      @param accum accumulator to place HTML into
775      @throws IOException if appending to the given accumulator fails.
776      */
outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out)777     abstract void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException;
778 
outerHtmlTail(final Appendable accum, int depth, final Document.OutputSettings out)779     abstract void outerHtmlTail(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException;
780 
781     /**
782      * Write this node and its children to the given {@link Appendable}.
783      *
784      * @param appendable the {@link Appendable} to write to.
785      * @return the supplied {@link Appendable}, for chaining.
786      */
html(T appendable)787     public <T extends Appendable> T html(T appendable) {
788         outerHtml(appendable);
789         return appendable;
790     }
791 
792     /**
793      Get the source range (start and end positions) in the original input source from which this node was parsed.
794      Position tracking must be enabled prior to parsing the content. For an Element, this will be the positions of the
795      start tag.
796      @return the range for the start of the node, or {@code untracked} if its range was not tracked.
797      @see org.jsoup.parser.Parser#setTrackPosition(boolean)
798      @see Range#isImplicit()
799      @see Element#endSourceRange()
800      @see Attributes#sourceRange(String name)
801      @since 1.15.2
802      */
sourceRange()803     public Range sourceRange() {
804         return Range.of(this, true);
805     }
806 
807     /** Test if this node is the first child, or first following blank text. */
isEffectivelyFirst()808     final boolean isEffectivelyFirst() {
809         if (siblingIndex == 0) return true;
810         if (siblingIndex == 1) {
811             final Node prev = previousSibling();
812             return prev instanceof TextNode && (((TextNode) prev).isBlank());
813         }
814         return false;
815     }
816 
817     /**
818      * Gets this node's outer HTML.
819      * @return outer HTML.
820      * @see #outerHtml()
821      */
toString()822 	public String toString() {
823         return outerHtml();
824     }
825 
indent(Appendable accum, int depth, Document.OutputSettings out)826     protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
827         accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth()));
828     }
829 
830     /**
831      * Check if this node is the same instance of another (object identity test).
832      * <p>For an node value equality check, see {@link #hasSameValue(Object)}</p>
833      * @param o other object to compare to
834      * @return true if the content of this node is the same as the other
835      * @see Node#hasSameValue(Object)
836      */
837     @Override
equals(@ullable Object o)838     public boolean equals(@Nullable Object o) {
839         // implemented just so that javadoc is clear this is an identity test
840         return this == o;
841     }
842 
843     /**
844      Provides a hashCode for this Node, based on its object identity. Changes to the Node's content will not impact the
845      result.
846      @return an object identity based hashcode for this Node
847      */
848     @Override
hashCode()849     public int hashCode() {
850         // implemented so that javadoc and scanners are clear this is an identity test
851         return super.hashCode();
852     }
853 
854     /**
855      * Check if this node has the same content as another node. A node is considered the same if its name, attributes and content match the
856      * other node; particularly its position in the tree does not influence its similarity.
857      * @param o other object to compare to
858      * @return true if the content of this node is the same as the other
859      */
hasSameValue(@ullable Object o)860     public boolean hasSameValue(@Nullable Object o) {
861         if (this == o) return true;
862         if (o == null || getClass() != o.getClass()) return false;
863 
864         return this.outerHtml().equals(((Node) o).outerHtml());
865     }
866 
867     /**
868      * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
869      * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
870      * original node.
871      * <p>
872      * The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
873      * @return a stand-alone cloned node, including clones of any children
874      * @see #shallowClone()
875      */
876     @SuppressWarnings("MethodDoesntCallSuperMethod") // because it does call super.clone in doClone - analysis just isn't following
877     @Override
clone()878     public Node clone() {
879         Node thisClone = doClone(null); // splits for orphan
880 
881         // Queue up nodes that need their children cloned (BFS).
882         final LinkedList<Node> nodesToProcess = new LinkedList<>();
883         nodesToProcess.add(thisClone);
884 
885         while (!nodesToProcess.isEmpty()) {
886             Node currParent = nodesToProcess.remove();
887 
888             final int size = currParent.childNodeSize();
889             for (int i = 0; i < size; i++) {
890                 final List<Node> childNodes = currParent.ensureChildNodes();
891                 Node childClone = childNodes.get(i).doClone(currParent);
892                 childNodes.set(i, childClone);
893                 nodesToProcess.add(childClone);
894             }
895         }
896 
897         return thisClone;
898     }
899 
900     /**
901      * Create a stand-alone, shallow copy of this node. None of its children (if any) will be cloned, and it will have
902      * no parent or sibling nodes.
903      * @return a single independent copy of this node
904      * @see #clone()
905      */
shallowClone()906     public Node shallowClone() {
907         return doClone(null);
908     }
909 
910     /*
911      * Return a clone of the node using the given parent (which can be null).
912      * Not a deep copy of children.
913      */
doClone(@ullable Node parent)914     protected Node doClone(@Nullable Node parent) {
915         Node clone;
916 
917         try {
918             clone = (Node) super.clone();
919         } catch (CloneNotSupportedException e) {
920             throw new RuntimeException(e);
921         }
922 
923         clone.parentNode = parent; // can be null, to create an orphan split
924         clone.siblingIndex = parent == null ? 0 : siblingIndex;
925         // if not keeping the parent, shallowClone the ownerDocument to preserve its settings
926         if (parent == null && !(this instanceof Document)) {
927             Document doc = ownerDocument();
928             if (doc != null) {
929                 Document docClone = doc.shallowClone();
930                 clone.parentNode = docClone;
931                 docClone.ensureChildNodes().add(clone);
932             }
933         }
934 
935         return clone;
936     }
937 
938     private static class OuterHtmlVisitor implements NodeVisitor {
939         private final Appendable accum;
940         private final Document.OutputSettings out;
941 
OuterHtmlVisitor(Appendable accum, Document.OutputSettings out)942         OuterHtmlVisitor(Appendable accum, Document.OutputSettings out) {
943             this.accum = accum;
944             this.out = out;
945             out.prepareEncoder();
946         }
947 
head(Node node, int depth)948         public void head(Node node, int depth) {
949             try {
950 				node.outerHtmlHead(accum, depth, out);
951 			} catch (IOException exception) {
952 				throw new SerializationException(exception);
953 			}
954         }
955 
tail(Node node, int depth)956         public void tail(Node node, int depth) {
957             if (!node.nodeName().equals("#text")) { // saves a void hit.
958 				try {
959 					node.outerHtmlTail(accum, depth, out);
960 				} catch (IOException exception) {
961 					throw new SerializationException(exception);
962 				}
963             }
964         }
965     }
966 }
967