xref: /aosp_15_r20/external/jsoup/src/main/java/org/jsoup/nodes/Element.java (revision 6da8f8c4bc310ad659121b84dd089062417a2ce2)
1 package org.jsoup.nodes;
2 
3 import org.jsoup.helper.ChangeNotifyingArrayList;
4 import org.jsoup.helper.Validate;
5 import org.jsoup.internal.StringUtil;
6 import org.jsoup.parser.ParseSettings;
7 import org.jsoup.parser.Parser;
8 import org.jsoup.parser.Tag;
9 import org.jsoup.select.Collector;
10 import org.jsoup.select.Elements;
11 import org.jsoup.select.Evaluator;
12 import org.jsoup.select.NodeFilter;
13 import org.jsoup.select.NodeTraversor;
14 import org.jsoup.select.NodeVisitor;
15 import org.jsoup.select.QueryParser;
16 import org.jsoup.select.Selector;
17 import org.jspecify.annotations.Nullable;
18 
19 import java.io.IOException;
20 import java.lang.ref.WeakReference;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.Collections;
25 import java.util.LinkedHashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Set;
29 import java.util.concurrent.atomic.AtomicBoolean;
30 import java.util.function.Consumer;
31 import java.util.regex.Pattern;
32 import java.util.regex.PatternSyntaxException;
33 import java.util.stream.Collectors;
34 import java.util.stream.Stream;
35 
36 import static org.jsoup.internal.Normalizer.normalize;
37 import static org.jsoup.nodes.TextNode.lastCharIsWhitespace;
38 import static org.jsoup.parser.Parser.NamespaceHtml;
39 import static org.jsoup.parser.TokenQueue.escapeCssIdentifier;
40 
41 /**
42  An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements).
43  <p>
44  From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
45 */
46 public class Element extends Node {
47     private static final List<Element> EmptyChildren = Collections.emptyList();
48     private static final Pattern ClassSplit = Pattern.compile("\\s+");
49     private static final String BaseUriKey = Attributes.internalKey("baseUri");
50     private Tag tag;
51     private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children
52     List<Node> childNodes;
53     @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null
54 
55     /**
56      * Create a new, standalone element, in the specified namespace.
57      * @param tag tag name
58      * @param namespace namespace for this element
59      */
Element(String tag, String namespace)60     public Element(String tag, String namespace) {
61         this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null);
62     }
63 
64     /**
65      * Create a new, standalone element, in the HTML namespace.
66      * @param tag tag name
67      * @see #Element(String tag, String namespace)
68      */
Element(String tag)69     public Element(String tag) {
70         this(Tag.valueOf(tag, Parser.NamespaceHtml, ParseSettings.preserveCase), "", null);
71     }
72 
73     /**
74      * Create a new, standalone Element. (Standalone in that it has no parent.)
75      *
76      * @param tag tag of this element
77      * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's)
78      * @param attributes initial attributes (optional, may be null)
79      * @see #appendChild(Node)
80      * @see #appendElement(String)
81      */
Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes)82     public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) {
83         Validate.notNull(tag);
84         childNodes = EmptyNodes;
85         this.attributes = attributes;
86         this.tag = tag;
87         if (baseUri != null)
88             this.setBaseUri(baseUri);
89     }
90 
91     /**
92      * Create a new Element from a Tag and a base URI.
93      *
94      * @param tag element tag
95      * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any.
96      * @see Tag#valueOf(String, ParseSettings)
97      */
Element(Tag tag, @Nullable String baseUri)98     public Element(Tag tag, @Nullable String baseUri) {
99         this(tag, baseUri, null);
100     }
101 
102     /**
103      Internal test to check if a nodelist object has been created.
104      */
hasChildNodes()105     protected boolean hasChildNodes() {
106         return childNodes != EmptyNodes;
107     }
108 
ensureChildNodes()109     protected List<Node> ensureChildNodes() {
110         if (childNodes == EmptyNodes) {
111             childNodes = new NodeList(this, 4);
112         }
113         return childNodes;
114     }
115 
116     @Override
hasAttributes()117     protected boolean hasAttributes() {
118         return attributes != null;
119     }
120 
121     @Override
attributes()122     public Attributes attributes() {
123         if (attributes == null) // not using hasAttributes, as doesn't clear warning
124             attributes = new Attributes();
125         return attributes;
126     }
127 
128     @Override
baseUri()129     public String baseUri() {
130         return searchUpForAttribute(this, BaseUriKey);
131     }
132 
searchUpForAttribute(final Element start, final String key)133     private static String searchUpForAttribute(final Element start, final String key) {
134         Element el = start;
135         while (el != null) {
136             if (el.attributes != null && el.attributes.hasKey(key))
137                 return el.attributes.get(key);
138             el = el.parent();
139         }
140         return "";
141     }
142 
143     @Override
doSetBaseUri(String baseUri)144     protected void doSetBaseUri(String baseUri) {
145         attributes().put(BaseUriKey, baseUri);
146     }
147 
148     @Override
childNodeSize()149     public int childNodeSize() {
150         return childNodes.size();
151     }
152 
153     @Override
nodeName()154     public String nodeName() {
155         return tag.getName();
156     }
157 
158     /**
159      * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase
160      * case preserving parsing}, this will return the source's original case.
161      *
162      * @return the tag name
163      */
tagName()164     public String tagName() {
165         return tag.getName();
166     }
167 
168     /**
169      * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless
170      * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a
171      * normal name of {@code div}.
172      * @return normal name
173      */
174     @Override
normalName()175     public String normalName() {
176         return tag.normalName();
177     }
178 
179     /**
180      Test if this Element has the specified normalized name, and is in the specified namespace.
181      * @param normalName a normalized element name (e.g. {@code div}).
182      * @param namespace the namespace
183      * @return true if the element's normal name matches exactly, and is in the specified namespace
184      * @since 1.17.2
185      */
elementIs(String normalName, String namespace)186     public boolean elementIs(String normalName, String namespace) {
187         return tag.normalName().equals(normalName) && tag.namespace().equals(namespace);
188     }
189 
190     /**
191      * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
192      * {@code el.tagName("div");}.
193      *
194      * @param tagName new tag name for this element
195      * @return this element, for chaining
196      * @see Elements#tagName(String)
197      */
tagName(String tagName)198     public Element tagName(String tagName) {
199         return tagName(tagName, tag.namespace());
200     }
201 
202     /**
203      * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
204      * {@code el.tagName("div");}.
205      *
206      * @param tagName new tag name for this element
207      * @param namespace the new namespace for this element
208      * @return this element, for chaining
209      * @see Elements#tagName(String)
210      */
tagName(String tagName, String namespace)211     public Element tagName(String tagName, String namespace) {
212         Validate.notEmptyParam(tagName, "tagName");
213         Validate.notEmptyParam(namespace, "namespace");
214         tag = Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()); // maintains the case option of the original parse
215         return this;
216     }
217 
218     /**
219      * Get the Tag for this element.
220      *
221      * @return the tag object
222      */
tag()223     public Tag tag() {
224         return tag;
225     }
226 
227     /**
228      * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
229      * {@code <span> == false}).
230      *
231      * @return true if block, false if not (and thus inline)
232      */
isBlock()233     public boolean isBlock() {
234         return tag.isBlock();
235     }
236 
237     /**
238      * Get the {@code id} attribute of this element.
239      *
240      * @return The id attribute, if present, or an empty string if not.
241      */
id()242     public String id() {
243         return attributes != null ? attributes.getIgnoreCase("id") :"";
244     }
245 
246     /**
247      Set the {@code id} attribute of this element.
248      @param id the ID value to use
249      @return this Element, for chaining
250      */
id(String id)251     public Element id(String id) {
252         Validate.notNull(id);
253         attr("id", id);
254         return this;
255     }
256 
257     /**
258      * Set an attribute value on this element. If this element already has an attribute with the
259      * key, its value is updated; otherwise, a new attribute is added.
260      *
261      * @return this element
262      */
attr(String attributeKey, String attributeValue)263     public Element attr(String attributeKey, String attributeValue) {
264         super.attr(attributeKey, attributeValue);
265         return this;
266     }
267 
268     /**
269      * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and
270      * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute
271      * with the same key if it exists.
272      *
273      * @param attributeKey the attribute key
274      * @param attributeValue the attribute value
275      *
276      * @return this element
277      */
attr(String attributeKey, boolean attributeValue)278     public Element attr(String attributeKey, boolean attributeValue) {
279         attributes().put(attributeKey, attributeValue);
280         return this;
281     }
282 
283     /**
284      Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc
285      will cascade back to this Element.
286      @param key the (case-sensitive) attribute key
287      @return the Attribute for this key, or null if not present.
288      @since 1.17.2
289      */
attribute(String key)290     public Attribute attribute(String key) {
291         return hasAttributes() ? attributes().attribute(key) : null;
292     }
293 
294     /**
295      * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
296      * starting with "data-" is included the dataset.
297      * <p>
298      * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
299      * {@code package=jsoup, language=java}.
300      * <p>
301      * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
302      * in the other map.
303      * <p>
304      * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
305      * @return a map of {@code key=value} custom data attributes.
306      */
dataset()307     public Map<String, String> dataset() {
308         return attributes().dataset();
309     }
310 
311     @Override @Nullable
parent()312     public final Element parent() {
313         return (Element) parentNode;
314     }
315 
316     /**
317      * Get this element's parent and ancestors, up to the document root.
318      * @return this element's stack of parents, starting with the closest first.
319      */
parents()320     public Elements parents() {
321         Elements parents = new Elements();
322         Element parent = this.parent();
323         while (parent != null && !parent.nameIs("#root")) {
324             parents.add(parent);
325             parent = parent.parent();
326         }
327         return parents;
328     }
329 
330     /**
331      * Get a child element of this element, by its 0-based index number.
332      * <p>
333      * Note that an element can have both mixed Nodes and Elements as children. This method inspects
334      * a filtered list of children that are elements, and the index is based on that filtered list.
335      * </p>
336      *
337      * @param index the index number of the element to retrieve
338      * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException}
339      * @see #childNode(int)
340      */
child(int index)341     public Element child(int index) {
342         return childElementsList().get(index);
343     }
344 
345     /**
346      * Get the number of child nodes of this element that are elements.
347      * <p>
348      * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link
349      * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.)
350      * </p>
351      *
352      * @return the number of child nodes that are elements
353      * @see #children()
354      * @see #child(int)
355      */
childrenSize()356     public int childrenSize() {
357         return childElementsList().size();
358     }
359 
360     /**
361      * Get this element's child elements.
362      * <p>
363      * This is effectively a filter on {@link #childNodes()} to get Element nodes.
364      * </p>
365      * @return child elements. If this element has no children, returns an empty list.
366      * @see #childNodes()
367      */
children()368     public Elements children() {
369         return new Elements(childElementsList());
370     }
371 
372     /**
373      * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated.
374      * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around.
375      * @return a list of child elements
376      */
childElementsList()377     List<Element> childElementsList() {
378         if (childNodeSize() == 0)
379             return EmptyChildren; // short circuit creating empty
380 
381         List<Element> children;
382         if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) {
383             final int size = childNodes.size();
384             children = new ArrayList<>(size);
385             //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
386             for (int i = 0; i < size; i++) {
387                 final Node node = childNodes.get(i);
388                 if (node instanceof Element)
389                     children.add((Element) node);
390             }
391             shadowChildrenRef = new WeakReference<>(children);
392         }
393         return children;
394     }
395 
396     /**
397      * Clears the cached shadow child elements.
398      */
399     @Override
nodelistChanged()400     void nodelistChanged() {
401         super.nodelistChanged();
402         shadowChildrenRef = null;
403     }
404 
405     /**
406      Returns a Stream of this Element and all of its descendant Elements. The stream has document order.
407      @return a stream of this element and its descendants.
408      @see #nodeStream()
409      @since 1.17.1
410      */
stream()411     public Stream<Element> stream() {
412         return NodeUtils.stream(this, Element.class);
413     }
414 
filterNodes(Class<T> clazz)415     private <T> List<T> filterNodes(Class<T> clazz) {
416         return childNodes.stream()
417                 .filter(clazz::isInstance)
418                 .map(clazz::cast)
419                 .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
420     }
421 
422     /**
423      * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
424      * <p>
425      * This is effectively a filter on {@link #childNodes()} to get Text nodes.
426      * @return child text nodes. If this element has no text nodes, returns an
427      * empty list.
428      * </p>
429      * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
430      * <ul>
431      *     <li>{@code p.text()} = {@code "One Two Three Four"}</li>
432      *     <li>{@code p.ownText()} = {@code "One Three Four"}</li>
433      *     <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
434      *     <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
435      *     <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
436      * </ul>
437      */
textNodes()438     public List<TextNode> textNodes() {
439         return filterNodes(TextNode.class);
440     }
441 
442     /**
443      * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
444      * <p>
445      * This is effectively a filter on {@link #childNodes()} to get Data nodes.
446      * </p>
447      * @return child data nodes. If this element has no data nodes, returns an
448      * empty list.
449      * @see #data()
450      */
dataNodes()451     public List<DataNode> dataNodes() {
452         return filterNodes(DataNode.class);
453     }
454 
455     /**
456      * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
457      * may include this element, or any of its children.
458      * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
459      * multiple filters can be combined, e.g.:</p>
460      * <ul>
461      * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
462      * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
463      * </ul>
464      * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p>
465      * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p>
466      *
467      * @param cssQuery a {@link Selector} CSS-like query
468      * @return an {@link Elements} list containing elements that match the query (empty if none match)
469      * @see Selector selector query syntax
470      * @see QueryParser#parse(String)
471      * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
472      */
select(String cssQuery)473     public Elements select(String cssQuery) {
474         return Selector.select(cssQuery, this);
475     }
476 
477     /**
478      * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but
479      * may be useful if you are running the same query many times (on many documents) and want to save the overhead of
480      * repeatedly parsing the CSS query.
481      * @param evaluator an element evaluator
482      * @return an {@link Elements} list containing elements that match the query (empty if none match)
483      */
select(Evaluator evaluator)484     public Elements select(Evaluator evaluator) {
485         return Selector.select(evaluator, this);
486     }
487 
488     /**
489      * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
490      * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
491      * execution stops on the first hit.</p>
492      * <p>Also known as {@code querySelector()} in the Web DOM.</p>
493      * @param cssQuery cssQuery a {@link Selector} CSS-like query
494      * @return the first matching element, or <b>{@code null}</b> if there is no match.
495      * @see #expectFirst(String)
496      */
selectFirst(String cssQuery)497     public @Nullable Element selectFirst(String cssQuery) {
498         return Selector.selectFirst(cssQuery, this);
499     }
500 
501     /**
502      * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or
503      * {@code null} if none match.
504      *
505      * @param evaluator an element evaluator
506      * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none
507      * match.
508      */
selectFirst(Evaluator evaluator)509     public @Nullable Element selectFirst(Evaluator evaluator) {
510         return Collector.findFirst(evaluator, this);
511     }
512 
513     /**
514      Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This
515      is useful if you want to simply abort processing on a failed match.
516      @param cssQuery a {@link Selector} CSS-like query
517      @return the first matching element
518      @throws IllegalArgumentException if no match is found
519      @since 1.15.2
520      */
expectFirst(String cssQuery)521     public Element expectFirst(String cssQuery) {
522         return (Element) Validate.ensureNotNull(
523             Selector.selectFirst(cssQuery, this),
524             parent() != null ?
525                 "No elements matched the query '%s' on element '%s'.":
526                 "No elements matched the query '%s' in the document."
527             , cssQuery, this.tagName()
528         );
529     }
530 
531     /**
532      * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web
533      * DOM.
534      *
535      * @param cssQuery a {@link Selector} CSS query
536      * @return if this element matches the query
537      */
is(String cssQuery)538     public boolean is(String cssQuery) {
539         return is(QueryParser.parse(cssQuery));
540     }
541 
542     /**
543      * Check if this element matches the given evaluator.
544      * @param evaluator an element evaluator
545      * @return if this element matches
546      */
is(Evaluator evaluator)547     public boolean is(Evaluator evaluator) {
548         return evaluator.matches(this.root(), this);
549     }
550 
551     /**
552      * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an
553      * ancestor, or {@code null} if there is no such matching element.
554      * @param cssQuery a {@link Selector} CSS query
555      * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
556      * found.
557      */
closest(String cssQuery)558     public @Nullable Element closest(String cssQuery) {
559         return closest(QueryParser.parse(cssQuery));
560     }
561 
562     /**
563      * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an
564      * ancestor, or {@code null} if there is no such matching element.
565      * @param evaluator a query evaluator
566      * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
567      * found.
568      */
closest(Evaluator evaluator)569     public @Nullable Element closest(Evaluator evaluator) {
570         Validate.notNull(evaluator);
571         Element el = this;
572         final Element root = root();
573         do {
574             if (evaluator.matches(root, el))
575                 return el;
576             el = el.parent();
577         } while (el != null);
578         return null;
579     }
580 
581     /**
582      Find Elements that match the supplied {@index XPath} expression.
583      <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be
584      expressed using the element's local name only.</p>
585      <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an
586      alternate XPathFactory implementation:</p>
587      <ol>
588      <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li>
589      <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br>
590      <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code>
591      </li>
592      </ol>
593 
594      @param xpath XPath expression
595      @return matching elements, or an empty list if none match.
596      @see #selectXpath(String, Class)
597      @since 1.14.3
598      */
selectXpath(String xpath)599     public Elements selectXpath(String xpath) {
600         return new Elements(NodeUtils.selectXpath(xpath, this, Element.class));
601     }
602 
603     /**
604      Find Nodes that match the supplied XPath expression.
605      <p>For example, to select TextNodes under {@code p} elements: </p>
606      <pre>List&lt;TextNode&gt; textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre>
607      <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something
608      like:</p>
609      <pre>List&lt;String&gt; hrefs = doc.selectXpath("//a").eachAttr("href");</pre>
610      @param xpath XPath expression
611      @param nodeType the jsoup node type to return
612      @see #selectXpath(String)
613      @return a list of matching nodes
614      @since 1.14.3
615      */
selectXpath(String xpath, Class<T> nodeType)616     public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) {
617         return NodeUtils.selectXpath(xpath, this, nodeType);
618     }
619 
620     /**
621      * Insert a node to the end of this Element's children. The incoming node will be re-parented.
622      *
623      * @param child node to add.
624      * @return this Element, for chaining
625      * @see #prependChild(Node)
626      * @see #insertChildren(int, Collection)
627      */
appendChild(Node child)628     public Element appendChild(Node child) {
629         Validate.notNull(child);
630 
631         // was - Node#addChildren(child). short-circuits an array create and a loop.
632         reparentChild(child);
633         ensureChildNodes();
634         childNodes.add(child);
635         child.setSiblingIndex(childNodes.size() - 1);
636         return this;
637     }
638 
639     /**
640      Insert the given nodes to the end of this Element's children.
641 
642      @param children nodes to add
643      @return this Element, for chaining
644      @see #insertChildren(int, Collection)
645      */
appendChildren(Collection<? extends Node> children)646     public Element appendChildren(Collection<? extends Node> children) {
647         insertChildren(-1, children);
648         return this;
649     }
650 
651     /**
652      * Add this element to the supplied parent element, as its next child.
653      *
654      * @param parent element to which this element will be appended
655      * @return this element, so that you can continue modifying the element
656      */
appendTo(Element parent)657     public Element appendTo(Element parent) {
658         Validate.notNull(parent);
659         parent.appendChild(this);
660         return this;
661     }
662 
663     /**
664      * Add a node to the start of this element's children.
665      *
666      * @param child node to add.
667      * @return this element, so that you can add more child nodes or elements.
668      */
prependChild(Node child)669     public Element prependChild(Node child) {
670         Validate.notNull(child);
671 
672         addChildren(0, child);
673         return this;
674     }
675 
676     /**
677      Insert the given nodes to the start of this Element's children.
678 
679      @param children nodes to add
680      @return this Element, for chaining
681      @see #insertChildren(int, Collection)
682      */
prependChildren(Collection<? extends Node> children)683     public Element prependChildren(Collection<? extends Node> children) {
684         insertChildren(0, children);
685         return this;
686     }
687 
688 
689     /**
690      * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
691      * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
692      *
693      * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
694      * end
695      * @param children child nodes to insert
696      * @return this element, for chaining.
697      */
insertChildren(int index, Collection<? extends Node> children)698     public Element insertChildren(int index, Collection<? extends Node> children) {
699         Validate.notNull(children, "Children collection to be inserted must not be null.");
700         int currentSize = childNodeSize();
701         if (index < 0) index += currentSize +1; // roll around
702         Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
703 
704         ArrayList<Node> nodes = new ArrayList<>(children);
705         Node[] nodeArray = nodes.toArray(new Node[0]);
706         addChildren(index, nodeArray);
707         return this;
708     }
709 
710     /**
711      * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
712      * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
713      *
714      * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
715      * end
716      * @param children child nodes to insert
717      * @return this element, for chaining.
718      */
insertChildren(int index, Node... children)719     public Element insertChildren(int index, Node... children) {
720         Validate.notNull(children, "Children collection to be inserted must not be null.");
721         int currentSize = childNodeSize();
722         if (index < 0) index += currentSize +1; // roll around
723         Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
724 
725         addChildren(index, children);
726         return this;
727     }
728 
729     /**
730      * Create a new element by tag name, and add it as this Element's last child.
731      *
732      * @param tagName the name of the tag (e.g. {@code div}).
733      * @return the new element, to allow you to add content to it, e.g.:
734      *  {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
735      */
appendElement(String tagName)736     public Element appendElement(String tagName) {
737         return appendElement(tagName, tag.namespace());
738     }
739 
740     /**
741      * Create a new element by tag name and namespace, add it as this Element's last child.
742      *
743      * @param tagName the name of the tag (e.g. {@code div}).
744      * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
745      * @return the new element, in the specified namespace
746      */
appendElement(String tagName, String namespace)747     public Element appendElement(String tagName, String namespace) {
748         Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri());
749         appendChild(child);
750         return child;
751     }
752 
753     /**
754      * Create a new element by tag name, and add it as this Element's first child.
755      *
756      * @param tagName the name of the tag (e.g. {@code div}).
757      * @return the new element, to allow you to add content to it, e.g.:
758      *  {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
759      */
prependElement(String tagName)760     public Element prependElement(String tagName) {
761         return prependElement(tagName, tag.namespace());
762     }
763 
764     /**
765      * Create a new element by tag name and namespace, and add it as this Element's first child.
766      *
767      * @param tagName the name of the tag (e.g. {@code div}).
768      * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
769      * @return the new element, in the specified namespace
770      */
prependElement(String tagName, String namespace)771     public Element prependElement(String tagName, String namespace) {
772         Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri());
773         prependChild(child);
774         return child;
775     }
776 
777     /**
778      * Create and append a new TextNode to this element.
779      *
780      * @param text the (un-encoded) text to add
781      * @return this element
782      */
appendText(String text)783     public Element appendText(String text) {
784         Validate.notNull(text);
785         TextNode node = new TextNode(text);
786         appendChild(node);
787         return this;
788     }
789 
790     /**
791      * Create and prepend a new TextNode to this element.
792      *
793      * @param text the decoded text to add
794      * @return this element
795      */
prependText(String text)796     public Element prependText(String text) {
797         Validate.notNull(text);
798         TextNode node = new TextNode(text);
799         prependChild(node);
800         return this;
801     }
802 
803     /**
804      * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
805      * @param html HTML to add inside this element, after the existing HTML
806      * @return this element
807      * @see #html(String)
808      */
append(String html)809     public Element append(String html) {
810         Validate.notNull(html);
811         List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
812         addChildren(nodes.toArray(new Node[0]));
813         return this;
814     }
815 
816     /**
817      * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
818      * @param html HTML to add inside this element, before the existing HTML
819      * @return this element
820      * @see #html(String)
821      */
prepend(String html)822     public Element prepend(String html) {
823         Validate.notNull(html);
824         List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
825         addChildren(0, nodes.toArray(new Node[0]));
826         return this;
827     }
828 
829     /**
830      * Insert the specified HTML into the DOM before this element (as a preceding sibling).
831      *
832      * @param html HTML to add before this element
833      * @return this element, for chaining
834      * @see #after(String)
835      */
836     @Override
before(String html)837     public Element before(String html) {
838         return (Element) super.before(html);
839     }
840 
841     /**
842      * Insert the specified node into the DOM before this node (as a preceding sibling).
843      * @param node to add before this element
844      * @return this Element, for chaining
845      * @see #after(Node)
846      */
847     @Override
before(Node node)848     public Element before(Node node) {
849         return (Element) super.before(node);
850     }
851 
852     /**
853      * Insert the specified HTML into the DOM after this element (as a following sibling).
854      *
855      * @param html HTML to add after this element
856      * @return this element, for chaining
857      * @see #before(String)
858      */
859     @Override
after(String html)860     public Element after(String html) {
861         return (Element) super.after(html);
862     }
863 
864     /**
865      * Insert the specified node into the DOM after this node (as a following sibling).
866      * @param node to add after this element
867      * @return this element, for chaining
868      * @see #before(Node)
869      */
870     @Override
after(Node node)871     public Element after(Node node) {
872         return (Element) super.after(node);
873     }
874 
875     /**
876      * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to
877      * {@code null}.
878      * @return this element
879      */
880     @Override
empty()881     public Element empty() {
882         // Detach each of the children -> parent links:
883         for (Node child : childNodes) {
884             child.parentNode = null;
885         }
886         childNodes.clear();
887         return this;
888     }
889 
890     /**
891      * Wrap the supplied HTML around this element.
892      *
893      * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
894      * @return this element, for chaining.
895      */
896     @Override
wrap(String html)897     public Element wrap(String html) {
898         return (Element) super.wrap(html);
899     }
900 
901     /**
902      * Get a CSS selector that will uniquely select this element.
903      * <p>
904      * If the element has an ID, returns #id;
905      * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
906      * followed by a unique selector for the element (tag.class.class:nth-child(n)).
907      * </p>
908      *
909      * @return the CSS Path that can be used to retrieve the element in a selector.
910      */
cssSelector()911     public String cssSelector() {
912         if (id().length() > 0) {
913             // prefer to return the ID - but check that it's actually unique first!
914             String idSel = "#" + escapeCssIdentifier(id());
915             Document doc = ownerDocument();
916             if (doc != null) {
917                 Elements els = doc.select(idSel);
918                 if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl
919                     return idSel;
920             } else {
921                 return idSel; // no ownerdoc, return the ID selector
922             }
923         }
924 
925         StringBuilder selector = StringUtil.borrowBuilder();
926         Element el = this;
927         while (el != null && !(el instanceof Document)) {
928             selector.insert(0, el.cssSelectorComponent());
929             el = el.parent();
930         }
931         return StringUtil.releaseBuilder(selector);
932     }
933 
cssSelectorComponent()934     private String cssSelectorComponent() {
935         // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag
936         String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|");
937         StringBuilder selector = StringUtil.borrowBuilder().append(tagName);
938         // String classes = StringUtil.join(classNames().stream().map(TokenQueue::escapeCssIdentifier).iterator(), ".");
939         // todo - replace with ^^ in 1.16.1 when we enable Android support for stream etc
940         StringUtil.StringJoiner escapedClasses = new StringUtil.StringJoiner(".");
941         for (String name : classNames()) escapedClasses.add(escapeCssIdentifier(name));
942         String classes = escapedClasses.complete();
943         if (classes.length() > 0)
944             selector.append('.').append(classes);
945 
946         if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
947             return StringUtil.releaseBuilder(selector);
948 
949         selector.insert(0, " > ");
950         if (parent().select(selector.toString()).size() > 1)
951             selector.append(String.format(
952                 ":nth-child(%d)", elementSiblingIndex() + 1));
953 
954         return StringUtil.releaseBuilder(selector);
955     }
956 
957     /**
958      * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
959      * of itself, so will not be included in the returned list.
960      * @return sibling elements
961      */
siblingElements()962     public Elements siblingElements() {
963         if (parentNode == null)
964             return new Elements(0);
965 
966         List<Element> elements = parent().childElementsList();
967         Elements siblings = new Elements(elements.size() - 1);
968         for (Element el: elements)
969             if (el != this)
970                 siblings.add(el);
971         return siblings;
972     }
973 
974     /**
975      * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
976      * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
977      * <p>
978      * This is similar to {@link #nextSibling()}, but specifically finds only Elements
979      * </p>
980      * @return the next element, or null if there is no next element
981      * @see #previousElementSibling()
982      */
nextElementSibling()983     public @Nullable Element nextElementSibling() {
984         Node next = this;
985         while ((next = next.nextSibling()) != null) {
986             if (next instanceof Element) return (Element) next;
987         }
988         return null;
989     }
990 
991     /**
992      * Get each of the sibling elements that come after this element.
993      *
994      * @return each of the element siblings after this element, or an empty list if there are no next sibling elements
995      */
nextElementSiblings()996     public Elements nextElementSiblings() {
997         return nextElementSiblings(true);
998     }
999 
1000     /**
1001      * Gets the previous element sibling of this element.
1002      * @return the previous element, or null if there is no previous element
1003      * @see #nextElementSibling()
1004      */
previousElementSibling()1005     public @Nullable Element previousElementSibling() {
1006         Node prev = this;
1007         while ((prev = prev.previousSibling()) != null) {
1008             if (prev instanceof Element) return (Element) prev;
1009         }
1010         return null;
1011     }
1012 
1013     /**
1014      * Get each of the element siblings before this element.
1015      *
1016      * @return the previous element siblings, or an empty list if there are none.
1017      */
previousElementSiblings()1018     public Elements previousElementSiblings() {
1019         return nextElementSiblings(false);
1020     }
1021 
nextElementSiblings(boolean next)1022     private Elements nextElementSiblings(boolean next) {
1023         Elements els = new Elements();
1024         if (parentNode == null)
1025             return  els;
1026         els.add(this);
1027         return next ?  els.nextAll() : els.prevAll();
1028     }
1029 
1030     /**
1031      * Gets the first Element sibling of this element. That may be this element.
1032      * @return the first sibling that is an element (aka the parent's first element child)
1033      */
firstElementSibling()1034     public Element firstElementSibling() {
1035         if (parent() != null) {
1036             //noinspection DataFlowIssue (not nullable, would be this is no other sibs)
1037             return parent().firstElementChild();
1038         } else
1039             return this; // orphan is its own first sibling
1040     }
1041 
1042     /**
1043      * Get the list index of this element in its element sibling list. I.e. if this is the first element
1044      * sibling, returns 0.
1045      * @return position in element sibling list
1046      */
elementSiblingIndex()1047     public int elementSiblingIndex() {
1048        if (parent() == null) return 0;
1049        return indexInList(this, parent().childElementsList());
1050     }
1051 
1052     /**
1053      * Gets the last element sibling of this element. That may be this element.
1054      * @return the last sibling that is an element (aka the parent's last element child)
1055      */
lastElementSibling()1056     public Element lastElementSibling() {
1057         if (parent() != null) {
1058             //noinspection DataFlowIssue (not nullable, would be this if no other sibs)
1059             return parent().lastElementChild();
1060         } else
1061             return this;
1062     }
1063 
indexInList(Element search, List<E> elements)1064     private static <E extends Element> int indexInList(Element search, List<E> elements) {
1065         final int size = elements.size();
1066         for (int i = 0; i < size; i++) {
1067             if (elements.get(i) == search)
1068                 return i;
1069         }
1070         return 0;
1071     }
1072 
1073     /**
1074      Gets the first child of this Element that is an Element, or {@code null} if there is none.
1075      @return the first Element child node, or null.
1076      @see #firstChild()
1077      @see #lastElementChild()
1078      @since 1.15.2
1079      */
firstElementChild()1080     public @Nullable Element firstElementChild() {
1081         Node child = firstChild();
1082         while (child != null) {
1083             if (child instanceof Element) return (Element) child;
1084             child = child.nextSibling();
1085         }
1086         return null;
1087     }
1088 
1089     /**
1090      Gets the last child of this Element that is an Element, or @{code null} if there is none.
1091      @return the last Element child node, or null.
1092      @see #lastChild()
1093      @see #firstElementChild()
1094      @since 1.15.2
1095      */
lastElementChild()1096     public @Nullable Element lastElementChild() {
1097         Node child = lastChild();
1098         while (child != null) {
1099             if (child instanceof Element) return (Element) child;
1100             child = child.previousSibling();
1101         }
1102         return null;
1103     }
1104 
1105     // DOM type methods
1106 
1107     /**
1108      * Finds elements, including and recursively under this element, with the specified tag name.
1109      * @param tagName The tag name to search for (case insensitively).
1110      * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
1111      */
getElementsByTag(String tagName)1112     public Elements getElementsByTag(String tagName) {
1113         Validate.notEmpty(tagName);
1114         tagName = normalize(tagName);
1115 
1116         return Collector.collect(new Evaluator.Tag(tagName), this);
1117     }
1118 
1119     /**
1120      * Find an element by ID, including or under this element.
1121      * <p>
1122      * Note that this finds the first matching ID, starting with this element. If you search down from a different
1123      * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
1124      * use {@link Document#getElementById(String)}
1125      * @param id The ID to search for.
1126      * @return The first matching element by ID, starting with this element, or null if none found.
1127      */
getElementById(String id)1128     public @Nullable Element getElementById(String id) {
1129         Validate.notEmpty(id);
1130 
1131         Elements elements = Collector.collect(new Evaluator.Id(id), this);
1132         if (elements.size() > 0)
1133             return elements.get(0);
1134         else
1135             return null;
1136     }
1137 
1138     /**
1139      * Find elements that have this class, including or under this element. Case-insensitive.
1140      * <p>
1141      * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method
1142      * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
1143      *
1144      * @param className the name of the class to search for.
1145      * @return elements with the supplied class name, empty if none
1146      * @see #hasClass(String)
1147      * @see #classNames()
1148      */
getElementsByClass(String className)1149     public Elements getElementsByClass(String className) {
1150         Validate.notEmpty(className);
1151 
1152         return Collector.collect(new Evaluator.Class(className), this);
1153     }
1154 
1155     /**
1156      * Find elements that have a named attribute set. Case-insensitive.
1157      *
1158      * @param key name of the attribute, e.g. {@code href}
1159      * @return elements that have this attribute, empty if none
1160      */
getElementsByAttribute(String key)1161     public Elements getElementsByAttribute(String key) {
1162         Validate.notEmpty(key);
1163         key = key.trim();
1164 
1165         return Collector.collect(new Evaluator.Attribute(key), this);
1166     }
1167 
1168     /**
1169      * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
1170      * that have HTML5 datasets.
1171      * @param keyPrefix name prefix of the attribute e.g. {@code data-}
1172      * @return elements that have attribute names that start with the prefix, empty if none.
1173      */
getElementsByAttributeStarting(String keyPrefix)1174     public Elements getElementsByAttributeStarting(String keyPrefix) {
1175         Validate.notEmpty(keyPrefix);
1176         keyPrefix = keyPrefix.trim();
1177 
1178         return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
1179     }
1180 
1181     /**
1182      * Find elements that have an attribute with the specific value. Case-insensitive.
1183      *
1184      * @param key name of the attribute
1185      * @param value value of the attribute
1186      * @return elements that have this attribute with this value, empty if none
1187      */
getElementsByAttributeValue(String key, String value)1188     public Elements getElementsByAttributeValue(String key, String value) {
1189         return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
1190     }
1191 
1192     /**
1193      * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive.
1194      *
1195      * @param key name of the attribute
1196      * @param value value of the attribute
1197      * @return elements that do not have a matching attribute
1198      */
getElementsByAttributeValueNot(String key, String value)1199     public Elements getElementsByAttributeValueNot(String key, String value) {
1200         return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
1201     }
1202 
1203     /**
1204      * Find elements that have attributes that start with the value prefix. Case-insensitive.
1205      *
1206      * @param key name of the attribute
1207      * @param valuePrefix start of attribute value
1208      * @return elements that have attributes that start with the value prefix
1209      */
getElementsByAttributeValueStarting(String key, String valuePrefix)1210     public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
1211         return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
1212     }
1213 
1214     /**
1215      * Find elements that have attributes that end with the value suffix. Case-insensitive.
1216      *
1217      * @param key name of the attribute
1218      * @param valueSuffix end of the attribute value
1219      * @return elements that have attributes that end with the value suffix
1220      */
getElementsByAttributeValueEnding(String key, String valueSuffix)1221     public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
1222         return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
1223     }
1224 
1225     /**
1226      * Find elements that have attributes whose value contains the match string. Case-insensitive.
1227      *
1228      * @param key name of the attribute
1229      * @param match substring of value to search for
1230      * @return elements that have attributes containing this text
1231      */
getElementsByAttributeValueContaining(String key, String match)1232     public Elements getElementsByAttributeValueContaining(String key, String match) {
1233         return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
1234     }
1235 
1236     /**
1237      * Find elements that have an attribute whose value matches the supplied regular expression.
1238      * @param key name of the attribute
1239      * @param pattern compiled regular expression to match against attribute values
1240      * @return elements that have attributes matching this regular expression
1241      */
getElementsByAttributeValueMatching(String key, Pattern pattern)1242     public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
1243         return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
1244 
1245     }
1246 
1247     /**
1248      * Find elements that have attributes whose values match the supplied regular expression.
1249      * @param key name of the attribute
1250      * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
1251      * @return elements that have attributes matching this regular expression
1252      */
getElementsByAttributeValueMatching(String key, String regex)1253     public Elements getElementsByAttributeValueMatching(String key, String regex) {
1254         Pattern pattern;
1255         try {
1256             pattern = Pattern.compile(regex);
1257         } catch (PatternSyntaxException e) {
1258             throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1259         }
1260         return getElementsByAttributeValueMatching(key, pattern);
1261     }
1262 
1263     /**
1264      * Find elements whose sibling index is less than the supplied index.
1265      * @param index 0-based index
1266      * @return elements less than index
1267      */
getElementsByIndexLessThan(int index)1268     public Elements getElementsByIndexLessThan(int index) {
1269         return Collector.collect(new Evaluator.IndexLessThan(index), this);
1270     }
1271 
1272     /**
1273      * Find elements whose sibling index is greater than the supplied index.
1274      * @param index 0-based index
1275      * @return elements greater than index
1276      */
getElementsByIndexGreaterThan(int index)1277     public Elements getElementsByIndexGreaterThan(int index) {
1278         return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
1279     }
1280 
1281     /**
1282      * Find elements whose sibling index is equal to the supplied index.
1283      * @param index 0-based index
1284      * @return elements equal to index
1285      */
getElementsByIndexEquals(int index)1286     public Elements getElementsByIndexEquals(int index) {
1287         return Collector.collect(new Evaluator.IndexEquals(index), this);
1288     }
1289 
1290     /**
1291      * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly
1292      * in the element, or in any of its descendants.
1293      * @param searchText to look for in the element's text
1294      * @return elements that contain the string, case-insensitive.
1295      * @see Element#text()
1296      */
getElementsContainingText(String searchText)1297     public Elements getElementsContainingText(String searchText) {
1298         return Collector.collect(new Evaluator.ContainsText(searchText), this);
1299     }
1300 
1301     /**
1302      * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly
1303      * in the element, not in any of its descendants.
1304      * @param searchText to look for in the element's own text
1305      * @return elements that contain the string, case-insensitive.
1306      * @see Element#ownText()
1307      */
getElementsContainingOwnText(String searchText)1308     public Elements getElementsContainingOwnText(String searchText) {
1309         return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
1310     }
1311 
1312     /**
1313      * Find elements whose text matches the supplied regular expression.
1314      * @param pattern regular expression to match text against
1315      * @return elements matching the supplied regular expression.
1316      * @see Element#text()
1317      */
getElementsMatchingText(Pattern pattern)1318     public Elements getElementsMatchingText(Pattern pattern) {
1319         return Collector.collect(new Evaluator.Matches(pattern), this);
1320     }
1321 
1322     /**
1323      * Find elements whose text matches the supplied regular expression.
1324      * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
1325      * @return elements matching the supplied regular expression.
1326      * @see Element#text()
1327      */
getElementsMatchingText(String regex)1328     public Elements getElementsMatchingText(String regex) {
1329         Pattern pattern;
1330         try {
1331             pattern = Pattern.compile(regex);
1332         } catch (PatternSyntaxException e) {
1333             throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1334         }
1335         return getElementsMatchingText(pattern);
1336     }
1337 
1338     /**
1339      * Find elements whose own text matches the supplied regular expression.
1340      * @param pattern regular expression to match text against
1341      * @return elements matching the supplied regular expression.
1342      * @see Element#ownText()
1343      */
getElementsMatchingOwnText(Pattern pattern)1344     public Elements getElementsMatchingOwnText(Pattern pattern) {
1345         return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
1346     }
1347 
1348     /**
1349      * Find elements whose own text matches the supplied regular expression.
1350      * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
1351      * @return elements matching the supplied regular expression.
1352      * @see Element#ownText()
1353      */
getElementsMatchingOwnText(String regex)1354     public Elements getElementsMatchingOwnText(String regex) {
1355         Pattern pattern;
1356         try {
1357             pattern = Pattern.compile(regex);
1358         } catch (PatternSyntaxException e) {
1359             throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1360         }
1361         return getElementsMatchingOwnText(pattern);
1362     }
1363 
1364     /**
1365      * Find all elements under this element (including self, and children of children).
1366      *
1367      * @return all elements
1368      */
getAllElements()1369     public Elements getAllElements() {
1370         return Collector.collect(new Evaluator.AllElements(), this);
1371     }
1372 
1373     /**
1374      Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and
1375      trimmed.
1376      <p>For example, given HTML {@code <p>Hello  <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there
1377     now!"}
1378      <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not
1379      children), use {@link #ownText()}
1380      <p>Note that this method returns the textual content that would be presented to a reader. The contents of data
1381      nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve
1382      that content.
1383 
1384      @return decoded, normalized text, or empty string if none.
1385      @see #wholeText()
1386      @see #ownText()
1387      @see #textNodes()
1388      */
text()1389     public String text() {
1390         final StringBuilder accum = StringUtil.borrowBuilder();
1391         NodeTraversor.traverse(new TextAccumulator(accum), this);
1392         return StringUtil.releaseBuilder(accum).trim();
1393     }
1394 
1395     private static class TextAccumulator implements NodeVisitor {
1396         private final StringBuilder accum;
1397 
TextAccumulator(StringBuilder accum)1398         public TextAccumulator(StringBuilder accum) {
1399             this.accum = accum;
1400         }
1401 
head(Node node, int depth)1402         public void head(Node node, int depth) {
1403             if (node instanceof TextNode) {
1404                 TextNode textNode = (TextNode) node;
1405                 appendNormalisedText(accum, textNode);
1406             } else if (node instanceof Element) {
1407                 Element element = (Element) node;
1408                 if (accum.length() > 0 &&
1409                     (element.isBlock() || element.nameIs("br")) &&
1410                     !lastCharIsWhitespace(accum))
1411                     accum.append(' ');
1412             }
1413         }
1414 
tail(Node node, int depth)1415         public void tail(Node node, int depth) {
1416             // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two".
1417             if (node instanceof Element) {
1418                 Element element = (Element) node;
1419                 Node next = node.nextSibling();
1420                 if (element.isBlock() && (next instanceof TextNode || next instanceof Element && !((Element) next).tag.formatAsBlock()) && !lastCharIsWhitespace(accum))
1421                     accum.append(' ');
1422             }
1423 
1424         }
1425     }
1426 
1427     /**
1428      Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces
1429      present in the original source.
1430      @return decoded, non-normalized text
1431      @see #text()
1432      @see #wholeOwnText()
1433      */
wholeText()1434     public String wholeText() {
1435         final StringBuilder accum = StringUtil.borrowBuilder();
1436         nodeStream().forEach(node -> appendWholeText(node, accum));
1437         return StringUtil.releaseBuilder(accum);
1438     }
1439 
appendWholeText(Node node, StringBuilder accum)1440     private static void appendWholeText(Node node, StringBuilder accum) {
1441         if (node instanceof TextNode) {
1442             accum.append(((TextNode) node).getWholeText());
1443         } else if (node.nameIs("br")) {
1444             accum.append("\n");
1445         }
1446     }
1447 
1448     /**
1449      Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any
1450      newlines and spaces present in the original source.
1451      @return decoded, non-normalized text that is a direct child of this Element
1452      @see #text()
1453      @see #wholeText()
1454      @see #ownText()
1455      @since 1.15.1
1456      */
wholeOwnText()1457     public String wholeOwnText() {
1458         final StringBuilder accum = StringUtil.borrowBuilder();
1459         final int size = childNodeSize();
1460         for (int i = 0; i < size; i++) {
1461             Node node = childNodes.get(i);
1462             appendWholeText(node, accum);
1463         }
1464 
1465         return StringUtil.releaseBuilder(accum);
1466     }
1467 
1468     /**
1469      * Gets the (normalized) text owned by this element only; does not get the combined text of all children.
1470      * <p>
1471      * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
1472      * whereas {@code p.text()} returns {@code "Hello there now!"}.
1473      * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
1474      *
1475      * @return decoded text, or empty string if none.
1476      * @see #text()
1477      * @see #textNodes()
1478      */
ownText()1479     public String ownText() {
1480         StringBuilder sb = StringUtil.borrowBuilder();
1481         ownText(sb);
1482         return StringUtil.releaseBuilder(sb).trim();
1483     }
1484 
ownText(StringBuilder accum)1485     private void ownText(StringBuilder accum) {
1486         for (int i = 0; i < childNodeSize(); i++) {
1487             Node child = childNodes.get(i);
1488             if (child instanceof TextNode) {
1489                 TextNode textNode = (TextNode) child;
1490                 appendNormalisedText(accum, textNode);
1491             } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) {
1492                 accum.append(" ");
1493             }
1494         }
1495     }
1496 
appendNormalisedText(StringBuilder accum, TextNode textNode)1497     private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
1498         String text = textNode.getWholeText();
1499         if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode)
1500             accum.append(text);
1501         else
1502             StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum));
1503     }
1504 
preserveWhitespace(@ullable Node node)1505     static boolean preserveWhitespace(@Nullable Node node) {
1506         // looks only at this element and five levels up, to prevent recursion & needless stack searches
1507         if (node instanceof Element) {
1508             Element el = (Element) node;
1509             int i = 0;
1510             do {
1511                 if (el.tag.preserveWhitespace())
1512                     return true;
1513                 el = el.parent();
1514                 i++;
1515             } while (i < 6 && el != null);
1516         }
1517         return false;
1518     }
1519 
1520     /**
1521      * Set the text of this element. Any existing contents (text or elements) will be cleared.
1522      * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data,
1523      * not visible text.</p>
1524      * @param text decoded text
1525      * @return this element
1526      */
text(String text)1527     public Element text(String text) {
1528         Validate.notNull(text);
1529         empty();
1530         // special case for script/style in HTML: should be data node
1531         Document owner = ownerDocument();
1532         // an alternate impl would be to run through the parser
1533         if (owner != null && owner.parser().isContentForTagData(normalName()))
1534             appendChild(new DataNode(text));
1535         else
1536             appendChild(new TextNode(text));
1537 
1538         return this;
1539     }
1540 
1541     /**
1542      Checks if the current element or any of its child elements contain non-whitespace text.
1543      @return {@code true} if the element has non-blank text content, {@code false} otherwise.
1544      */
hasText()1545     public boolean hasText() {
1546         AtomicBoolean hasText = new AtomicBoolean(false);
1547         filter((node, depth) -> {
1548             if (node instanceof TextNode) {
1549                 TextNode textNode = (TextNode) node;
1550                 if (!textNode.isBlank()) {
1551                     hasText.set(true);
1552                     return NodeFilter.FilterResult.STOP;
1553                 }
1554             }
1555             return NodeFilter.FilterResult.CONTINUE;
1556         });
1557         return hasText.get();
1558     }
1559 
1560     /**
1561      * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the
1562      * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()}
1563      * for the contents of scripts, comments, CSS styles, etc.
1564      *
1565      * @return the data, or empty string if none
1566      *
1567      * @see #dataNodes()
1568      */
data()1569     public String data() {
1570         StringBuilder sb = StringUtil.borrowBuilder();
1571         traverse((childNode, depth) -> {
1572             if (childNode instanceof DataNode) {
1573                 DataNode data = (DataNode) childNode;
1574                 sb.append(data.getWholeData());
1575             } else if (childNode instanceof Comment) {
1576                 Comment comment = (Comment) childNode;
1577                 sb.append(comment.getData());
1578             } else if (childNode instanceof CDataNode) {
1579                 // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script.
1580                 // but in case another type gets through.
1581                 CDataNode cDataNode = (CDataNode) childNode;
1582                 sb.append(cDataNode.getWholeText());
1583             }
1584         });
1585         return StringUtil.releaseBuilder(sb);
1586     }
1587 
1588     /**
1589      * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
1590      * separated. (E.g. on <code>&lt;div class="header gray"&gt;</code> returns, "<code>header gray</code>")
1591      * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
1592      */
className()1593     public String className() {
1594         return attr("class").trim();
1595     }
1596 
1597     /**
1598      * Get each of the element's class names. E.g. on element {@code <div class="header gray">},
1599      * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
1600      * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
1601      * @return set of classnames, empty if no class attribute
1602      */
classNames()1603     public Set<String> classNames() {
1604     	String[] names = ClassSplit.split(className());
1605     	Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names));
1606     	classNames.remove(""); // if classNames() was empty, would include an empty class
1607 
1608         return classNames;
1609     }
1610 
1611     /**
1612      Set the element's {@code class} attribute to the supplied class names.
1613      @param classNames set of classes
1614      @return this element, for chaining
1615      */
classNames(Set<String> classNames)1616     public Element classNames(Set<String> classNames) {
1617         Validate.notNull(classNames);
1618         if (classNames.isEmpty()) {
1619             attributes().remove("class");
1620         } else {
1621             attributes().put("class", StringUtil.join(classNames, " "));
1622         }
1623         return this;
1624     }
1625 
1626     /**
1627      * Tests if this element has a class. Case-insensitive.
1628      * @param className name of class to check for
1629      * @return true if it does, false if not
1630      */
1631     // performance sensitive
hasClass(String className)1632     public boolean hasClass(String className) {
1633         if (attributes == null)
1634             return false;
1635 
1636         final String classAttr = attributes.getIgnoreCase("class");
1637         final int len = classAttr.length();
1638         final int wantLen = className.length();
1639 
1640         if (len == 0 || len < wantLen) {
1641             return false;
1642         }
1643 
1644         // if both lengths are equal, only need compare the className with the attribute
1645         if (len == wantLen) {
1646             return className.equalsIgnoreCase(classAttr);
1647         }
1648 
1649         // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations)
1650         boolean inClass = false;
1651         int start = 0;
1652         for (int i = 0; i < len; i++) {
1653             if (Character.isWhitespace(classAttr.charAt(i))) {
1654                 if (inClass) {
1655                     // white space ends a class name, compare it with the requested one, ignore case
1656                     if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) {
1657                         return true;
1658                     }
1659                     inClass = false;
1660                 }
1661             } else {
1662                 if (!inClass) {
1663                     // we're in a class name : keep the start of the substring
1664                     inClass = true;
1665                     start = i;
1666                 }
1667             }
1668         }
1669 
1670         // check the last entry
1671         if (inClass && len - start == wantLen) {
1672             return classAttr.regionMatches(true, start, className, 0, wantLen);
1673         }
1674 
1675         return false;
1676     }
1677 
1678     /**
1679      Add a class name to this element's {@code class} attribute.
1680      @param className class name to add
1681      @return this element
1682      */
addClass(String className)1683     public Element addClass(String className) {
1684         Validate.notNull(className);
1685 
1686         Set<String> classes = classNames();
1687         classes.add(className);
1688         classNames(classes);
1689 
1690         return this;
1691     }
1692 
1693     /**
1694      Remove a class name from this element's {@code class} attribute.
1695      @param className class name to remove
1696      @return this element
1697      */
removeClass(String className)1698     public Element removeClass(String className) {
1699         Validate.notNull(className);
1700 
1701         Set<String> classes = classNames();
1702         classes.remove(className);
1703         classNames(classes);
1704 
1705         return this;
1706     }
1707 
1708     /**
1709      Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
1710      @param className class name to toggle
1711      @return this element
1712      */
toggleClass(String className)1713     public Element toggleClass(String className) {
1714         Validate.notNull(className);
1715 
1716         Set<String> classes = classNames();
1717         if (classes.contains(className))
1718             classes.remove(className);
1719         else
1720             classes.add(className);
1721         classNames(classes);
1722 
1723         return this;
1724     }
1725 
1726     /**
1727      * Get the value of a form element (input, textarea, etc).
1728      * @return the value of the form element, or empty string if not set.
1729      */
val()1730     public String val() {
1731         if (elementIs("textarea", NamespaceHtml))
1732             return text();
1733         else
1734             return attr("value");
1735     }
1736 
1737     /**
1738      * Set the value of a form element (input, textarea, etc).
1739      * @param value value to set
1740      * @return this element (for chaining)
1741      */
val(String value)1742     public Element val(String value) {
1743         if (elementIs("textarea", NamespaceHtml))
1744             text(value);
1745         else
1746             attr("value", value);
1747         return this;
1748     }
1749 
1750     /**
1751      Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be
1752      enabled prior to parsing the content.
1753      @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked.
1754      @see org.jsoup.parser.Parser#setTrackPosition(boolean)
1755      @see Node#sourceRange()
1756      @see Range#isImplicit()
1757      @since 1.15.2
1758      */
endSourceRange()1759     public Range endSourceRange() {
1760         return Range.of(this, false);
1761     }
1762 
shouldIndent(final Document.OutputSettings out)1763     boolean shouldIndent(final Document.OutputSettings out) {
1764         return out.prettyPrint() && isFormatAsBlock(out) && !isInlineable(out) && !preserveWhitespace(parentNode);
1765     }
1766 
1767     @Override
outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out)1768     void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException {
1769         if (shouldIndent(out)) {
1770             if (accum instanceof StringBuilder) {
1771                 if (((StringBuilder) accum).length() > 0)
1772                     indent(accum, depth, out);
1773             } else {
1774                 indent(accum, depth, out);
1775             }
1776         }
1777         accum.append('<').append(tagName());
1778         if (attributes != null) attributes.html(accum, out);
1779 
1780         // selfclosing includes unknown tags, isEmpty defines tags that are always empty
1781         if (childNodes.isEmpty() && tag.isSelfClosing()) {
1782             if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty())
1783                 accum.append('>');
1784             else
1785                 accum.append(" />"); // <img> in html, <img /> in xml
1786         }
1787         else
1788             accum.append('>');
1789     }
1790 
1791     @Override
outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out)1792     void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
1793         if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
1794             if (out.prettyPrint() && (!childNodes.isEmpty() && (
1795                 (tag.formatAsBlock() && !preserveWhitespace(parentNode)) ||
1796                     (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && (childNodes.get(0) instanceof Element))))
1797             )))
1798                 indent(accum, depth, out);
1799             accum.append("</").append(tagName()).append('>');
1800         }
1801     }
1802 
1803     /**
1804      * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
1805      * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
1806      *
1807      * @return String of HTML.
1808      * @see #outerHtml()
1809      */
html()1810     public String html() {
1811         StringBuilder accum = StringUtil.borrowBuilder();
1812         html(accum);
1813         String html = StringUtil.releaseBuilder(accum);
1814         return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html;
1815     }
1816 
1817     @Override
html(T appendable)1818     public <T extends Appendable> T html(T appendable) {
1819         final int size = childNodes.size();
1820         for (int i = 0; i < size; i++)
1821             childNodes.get(i).outerHtml(appendable);
1822 
1823         return appendable;
1824     }
1825 
1826     /**
1827      * Set this element's inner HTML. Clears the existing HTML first.
1828      * @param html HTML to parse and set into this element
1829      * @return this element
1830      * @see #append(String)
1831      */
html(String html)1832     public Element html(String html) {
1833         empty();
1834         append(html);
1835         return this;
1836     }
1837 
1838     @Override
clone()1839     public Element clone() {
1840         return (Element) super.clone();
1841     }
1842 
1843     @Override
shallowClone()1844     public Element shallowClone() {
1845         // simpler than implementing a clone version with no child copy
1846         String baseUri = baseUri();
1847         if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute
1848         return new Element(tag, baseUri, attributes == null ? null : attributes.clone());
1849     }
1850 
1851     @Override
doClone(@ullable Node parent)1852     protected Element doClone(@Nullable Node parent) {
1853         Element clone = (Element) super.doClone(parent);
1854         clone.attributes = attributes != null ? attributes.clone() : null;
1855         clone.childNodes = new NodeList(clone, childNodes.size());
1856         clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone
1857 
1858         return clone;
1859     }
1860 
1861     // overrides of Node for call chaining
1862     @Override
clearAttributes()1863     public Element clearAttributes() {
1864         if (attributes != null) {
1865             super.clearAttributes(); // keeps internal attributes via iterator
1866             if (attributes.size() == 0)
1867                 attributes = null; // only remove entirely if no internal attributes
1868         }
1869 
1870         return this;
1871     }
1872 
1873     @Override
removeAttr(String attributeKey)1874     public Element removeAttr(String attributeKey) {
1875         return (Element) super.removeAttr(attributeKey);
1876     }
1877 
1878     @Override
root()1879     public Element root() {
1880         return (Element) super.root(); // probably a document, but always at least an element
1881     }
1882 
1883     @Override
traverse(NodeVisitor nodeVisitor)1884     public Element traverse(NodeVisitor nodeVisitor) {
1885         return (Element) super.traverse(nodeVisitor);
1886     }
1887 
1888     @Override
forEachNode(Consumer<? super Node> action)1889     public Element forEachNode(Consumer<? super Node> action) {
1890         return (Element) super.forEachNode(action);
1891     }
1892 
1893     /**
1894      Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal.
1895      Elements may be inspected, changed, added, replaced, or removed.
1896      @param action the function to perform on the element
1897      @return this Element, for chaining
1898      @see Node#forEachNode(Consumer)
1899      @deprecated use {@link #stream()}.{@link Stream#forEach(Consumer) forEach(Consumer)} instead. (Removing this method
1900      so Element can implement Iterable, which this signature conflicts with due to the non-void return.)
1901      */
1902     @Deprecated
forEach(Consumer<? super Element> action)1903     public Element forEach(Consumer<? super Element> action) {
1904         stream().forEach(action);
1905         return this;
1906     }
1907 
1908     @Override
filter(NodeFilter nodeFilter)1909     public Element filter(NodeFilter nodeFilter) {
1910         return  (Element) super.filter(nodeFilter);
1911     }
1912 
1913     private static final class NodeList extends ChangeNotifyingArrayList<Node> {
1914         private final Element owner;
1915 
NodeList(Element owner, int initialCapacity)1916         NodeList(Element owner, int initialCapacity) {
1917             super(initialCapacity);
1918             this.owner = owner;
1919         }
1920 
onContentsChanged()1921         public void onContentsChanged() {
1922             owner.nodelistChanged();
1923         }
1924     }
1925 
isFormatAsBlock(Document.OutputSettings out)1926     private boolean isFormatAsBlock(Document.OutputSettings out) {
1927         return tag.isBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline();
1928     }
1929 
isInlineable(Document.OutputSettings out)1930     private boolean isInlineable(Document.OutputSettings out) {
1931         if (!tag.isInline())
1932             return false;
1933         return (parent() == null || parent().isBlock())
1934             && !isEffectivelyFirst()
1935             && !out.outline()
1936             && !nameIs("br");
1937     }
1938 }
1939