1 package org.jsoup.nodes; 2 3 import org.jsoup.helper.ChangeNotifyingArrayList; 4 import org.jsoup.helper.Validate; 5 import org.jsoup.internal.StringUtil; 6 import org.jsoup.parser.ParseSettings; 7 import org.jsoup.parser.Parser; 8 import org.jsoup.parser.Tag; 9 import org.jsoup.select.Collector; 10 import org.jsoup.select.Elements; 11 import org.jsoup.select.Evaluator; 12 import org.jsoup.select.NodeFilter; 13 import org.jsoup.select.NodeTraversor; 14 import org.jsoup.select.NodeVisitor; 15 import org.jsoup.select.QueryParser; 16 import org.jsoup.select.Selector; 17 import org.jspecify.annotations.Nullable; 18 19 import java.io.IOException; 20 import java.lang.ref.WeakReference; 21 import java.util.ArrayList; 22 import java.util.Arrays; 23 import java.util.Collection; 24 import java.util.Collections; 25 import java.util.LinkedHashSet; 26 import java.util.List; 27 import java.util.Map; 28 import java.util.Set; 29 import java.util.concurrent.atomic.AtomicBoolean; 30 import java.util.function.Consumer; 31 import java.util.regex.Pattern; 32 import java.util.regex.PatternSyntaxException; 33 import java.util.stream.Collectors; 34 import java.util.stream.Stream; 35 36 import static org.jsoup.internal.Normalizer.normalize; 37 import static org.jsoup.nodes.TextNode.lastCharIsWhitespace; 38 import static org.jsoup.parser.Parser.NamespaceHtml; 39 import static org.jsoup.parser.TokenQueue.escapeCssIdentifier; 40 41 /** 42 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements). 43 <p> 44 From an Element, you can extract data, traverse the node graph, and manipulate the HTML. 45 */ 46 public class Element extends Node { 47 private static final List<Element> EmptyChildren = Collections.emptyList(); 48 private static final Pattern ClassSplit = Pattern.compile("\\s+"); 49 private static final String BaseUriKey = Attributes.internalKey("baseUri"); 50 private Tag tag; 51 private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children 52 List<Node> childNodes; 53 @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null 54 55 /** 56 * Create a new, standalone element, in the specified namespace. 57 * @param tag tag name 58 * @param namespace namespace for this element 59 */ Element(String tag, String namespace)60 public Element(String tag, String namespace) { 61 this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null); 62 } 63 64 /** 65 * Create a new, standalone element, in the HTML namespace. 66 * @param tag tag name 67 * @see #Element(String tag, String namespace) 68 */ Element(String tag)69 public Element(String tag) { 70 this(Tag.valueOf(tag, Parser.NamespaceHtml, ParseSettings.preserveCase), "", null); 71 } 72 73 /** 74 * Create a new, standalone Element. (Standalone in that it has no parent.) 75 * 76 * @param tag tag of this element 77 * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's) 78 * @param attributes initial attributes (optional, may be null) 79 * @see #appendChild(Node) 80 * @see #appendElement(String) 81 */ Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes)82 public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) { 83 Validate.notNull(tag); 84 childNodes = EmptyNodes; 85 this.attributes = attributes; 86 this.tag = tag; 87 if (baseUri != null) 88 this.setBaseUri(baseUri); 89 } 90 91 /** 92 * Create a new Element from a Tag and a base URI. 93 * 94 * @param tag element tag 95 * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any. 96 * @see Tag#valueOf(String, ParseSettings) 97 */ Element(Tag tag, @Nullable String baseUri)98 public Element(Tag tag, @Nullable String baseUri) { 99 this(tag, baseUri, null); 100 } 101 102 /** 103 Internal test to check if a nodelist object has been created. 104 */ hasChildNodes()105 protected boolean hasChildNodes() { 106 return childNodes != EmptyNodes; 107 } 108 ensureChildNodes()109 protected List<Node> ensureChildNodes() { 110 if (childNodes == EmptyNodes) { 111 childNodes = new NodeList(this, 4); 112 } 113 return childNodes; 114 } 115 116 @Override hasAttributes()117 protected boolean hasAttributes() { 118 return attributes != null; 119 } 120 121 @Override attributes()122 public Attributes attributes() { 123 if (attributes == null) // not using hasAttributes, as doesn't clear warning 124 attributes = new Attributes(); 125 return attributes; 126 } 127 128 @Override baseUri()129 public String baseUri() { 130 return searchUpForAttribute(this, BaseUriKey); 131 } 132 searchUpForAttribute(final Element start, final String key)133 private static String searchUpForAttribute(final Element start, final String key) { 134 Element el = start; 135 while (el != null) { 136 if (el.attributes != null && el.attributes.hasKey(key)) 137 return el.attributes.get(key); 138 el = el.parent(); 139 } 140 return ""; 141 } 142 143 @Override doSetBaseUri(String baseUri)144 protected void doSetBaseUri(String baseUri) { 145 attributes().put(BaseUriKey, baseUri); 146 } 147 148 @Override childNodeSize()149 public int childNodeSize() { 150 return childNodes.size(); 151 } 152 153 @Override nodeName()154 public String nodeName() { 155 return tag.getName(); 156 } 157 158 /** 159 * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase 160 * case preserving parsing}, this will return the source's original case. 161 * 162 * @return the tag name 163 */ tagName()164 public String tagName() { 165 return tag.getName(); 166 } 167 168 /** 169 * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless 170 * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a 171 * normal name of {@code div}. 172 * @return normal name 173 */ 174 @Override normalName()175 public String normalName() { 176 return tag.normalName(); 177 } 178 179 /** 180 Test if this Element has the specified normalized name, and is in the specified namespace. 181 * @param normalName a normalized element name (e.g. {@code div}). 182 * @param namespace the namespace 183 * @return true if the element's normal name matches exactly, and is in the specified namespace 184 * @since 1.17.2 185 */ elementIs(String normalName, String namespace)186 public boolean elementIs(String normalName, String namespace) { 187 return tag.normalName().equals(normalName) && tag.namespace().equals(namespace); 188 } 189 190 /** 191 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 192 * {@code el.tagName("div");}. 193 * 194 * @param tagName new tag name for this element 195 * @return this element, for chaining 196 * @see Elements#tagName(String) 197 */ tagName(String tagName)198 public Element tagName(String tagName) { 199 return tagName(tagName, tag.namespace()); 200 } 201 202 /** 203 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 204 * {@code el.tagName("div");}. 205 * 206 * @param tagName new tag name for this element 207 * @param namespace the new namespace for this element 208 * @return this element, for chaining 209 * @see Elements#tagName(String) 210 */ tagName(String tagName, String namespace)211 public Element tagName(String tagName, String namespace) { 212 Validate.notEmptyParam(tagName, "tagName"); 213 Validate.notEmptyParam(namespace, "namespace"); 214 tag = Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()); // maintains the case option of the original parse 215 return this; 216 } 217 218 /** 219 * Get the Tag for this element. 220 * 221 * @return the tag object 222 */ tag()223 public Tag tag() { 224 return tag; 225 } 226 227 /** 228 * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element 229 * {@code <span> == false}). 230 * 231 * @return true if block, false if not (and thus inline) 232 */ isBlock()233 public boolean isBlock() { 234 return tag.isBlock(); 235 } 236 237 /** 238 * Get the {@code id} attribute of this element. 239 * 240 * @return The id attribute, if present, or an empty string if not. 241 */ id()242 public String id() { 243 return attributes != null ? attributes.getIgnoreCase("id") :""; 244 } 245 246 /** 247 Set the {@code id} attribute of this element. 248 @param id the ID value to use 249 @return this Element, for chaining 250 */ id(String id)251 public Element id(String id) { 252 Validate.notNull(id); 253 attr("id", id); 254 return this; 255 } 256 257 /** 258 * Set an attribute value on this element. If this element already has an attribute with the 259 * key, its value is updated; otherwise, a new attribute is added. 260 * 261 * @return this element 262 */ attr(String attributeKey, String attributeValue)263 public Element attr(String attributeKey, String attributeValue) { 264 super.attr(attributeKey, attributeValue); 265 return this; 266 } 267 268 /** 269 * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and 270 * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute 271 * with the same key if it exists. 272 * 273 * @param attributeKey the attribute key 274 * @param attributeValue the attribute value 275 * 276 * @return this element 277 */ attr(String attributeKey, boolean attributeValue)278 public Element attr(String attributeKey, boolean attributeValue) { 279 attributes().put(attributeKey, attributeValue); 280 return this; 281 } 282 283 /** 284 Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc 285 will cascade back to this Element. 286 @param key the (case-sensitive) attribute key 287 @return the Attribute for this key, or null if not present. 288 @since 1.17.2 289 */ attribute(String key)290 public Attribute attribute(String key) { 291 return hasAttributes() ? attributes().attribute(key) : null; 292 } 293 294 /** 295 * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key 296 * starting with "data-" is included the dataset. 297 * <p> 298 * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset 299 * {@code package=jsoup, language=java}. 300 * <p> 301 * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected 302 * in the other map. 303 * <p> 304 * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. 305 * @return a map of {@code key=value} custom data attributes. 306 */ dataset()307 public Map<String, String> dataset() { 308 return attributes().dataset(); 309 } 310 311 @Override @Nullable parent()312 public final Element parent() { 313 return (Element) parentNode; 314 } 315 316 /** 317 * Get this element's parent and ancestors, up to the document root. 318 * @return this element's stack of parents, starting with the closest first. 319 */ parents()320 public Elements parents() { 321 Elements parents = new Elements(); 322 Element parent = this.parent(); 323 while (parent != null && !parent.nameIs("#root")) { 324 parents.add(parent); 325 parent = parent.parent(); 326 } 327 return parents; 328 } 329 330 /** 331 * Get a child element of this element, by its 0-based index number. 332 * <p> 333 * Note that an element can have both mixed Nodes and Elements as children. This method inspects 334 * a filtered list of children that are elements, and the index is based on that filtered list. 335 * </p> 336 * 337 * @param index the index number of the element to retrieve 338 * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} 339 * @see #childNode(int) 340 */ child(int index)341 public Element child(int index) { 342 return childElementsList().get(index); 343 } 344 345 /** 346 * Get the number of child nodes of this element that are elements. 347 * <p> 348 * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link 349 * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.) 350 * </p> 351 * 352 * @return the number of child nodes that are elements 353 * @see #children() 354 * @see #child(int) 355 */ childrenSize()356 public int childrenSize() { 357 return childElementsList().size(); 358 } 359 360 /** 361 * Get this element's child elements. 362 * <p> 363 * This is effectively a filter on {@link #childNodes()} to get Element nodes. 364 * </p> 365 * @return child elements. If this element has no children, returns an empty list. 366 * @see #childNodes() 367 */ children()368 public Elements children() { 369 return new Elements(childElementsList()); 370 } 371 372 /** 373 * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. 374 * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around. 375 * @return a list of child elements 376 */ childElementsList()377 List<Element> childElementsList() { 378 if (childNodeSize() == 0) 379 return EmptyChildren; // short circuit creating empty 380 381 List<Element> children; 382 if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) { 383 final int size = childNodes.size(); 384 children = new ArrayList<>(size); 385 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 386 for (int i = 0; i < size; i++) { 387 final Node node = childNodes.get(i); 388 if (node instanceof Element) 389 children.add((Element) node); 390 } 391 shadowChildrenRef = new WeakReference<>(children); 392 } 393 return children; 394 } 395 396 /** 397 * Clears the cached shadow child elements. 398 */ 399 @Override nodelistChanged()400 void nodelistChanged() { 401 super.nodelistChanged(); 402 shadowChildrenRef = null; 403 } 404 405 /** 406 Returns a Stream of this Element and all of its descendant Elements. The stream has document order. 407 @return a stream of this element and its descendants. 408 @see #nodeStream() 409 @since 1.17.1 410 */ stream()411 public Stream<Element> stream() { 412 return NodeUtils.stream(this, Element.class); 413 } 414 filterNodes(Class<T> clazz)415 private <T> List<T> filterNodes(Class<T> clazz) { 416 return childNodes.stream() 417 .filter(clazz::isInstance) 418 .map(clazz::cast) 419 .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); 420 } 421 422 /** 423 * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. 424 * <p> 425 * This is effectively a filter on {@link #childNodes()} to get Text nodes. 426 * @return child text nodes. If this element has no text nodes, returns an 427 * empty list. 428 * </p> 429 * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected: 430 * <ul> 431 * <li>{@code p.text()} = {@code "One Two Three Four"}</li> 432 * <li>{@code p.ownText()} = {@code "One Three Four"}</li> 433 * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li> 434 * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li> 435 * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li> 436 * </ul> 437 */ textNodes()438 public List<TextNode> textNodes() { 439 return filterNodes(TextNode.class); 440 } 441 442 /** 443 * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. 444 * <p> 445 * This is effectively a filter on {@link #childNodes()} to get Data nodes. 446 * </p> 447 * @return child data nodes. If this element has no data nodes, returns an 448 * empty list. 449 * @see #data() 450 */ dataNodes()451 public List<DataNode> dataNodes() { 452 return filterNodes(DataNode.class); 453 } 454 455 /** 456 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements 457 * may include this element, or any of its children. 458 * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because 459 * multiple filters can be combined, e.g.:</p> 460 * <ul> 461 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) 462 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) 463 * </ul> 464 * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p> 465 * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p> 466 * 467 * @param cssQuery a {@link Selector} CSS-like query 468 * @return an {@link Elements} list containing elements that match the query (empty if none match) 469 * @see Selector selector query syntax 470 * @see QueryParser#parse(String) 471 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 472 */ select(String cssQuery)473 public Elements select(String cssQuery) { 474 return Selector.select(cssQuery, this); 475 } 476 477 /** 478 * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but 479 * may be useful if you are running the same query many times (on many documents) and want to save the overhead of 480 * repeatedly parsing the CSS query. 481 * @param evaluator an element evaluator 482 * @return an {@link Elements} list containing elements that match the query (empty if none match) 483 */ select(Evaluator evaluator)484 public Elements select(Evaluator evaluator) { 485 return Selector.select(evaluator, this); 486 } 487 488 /** 489 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. 490 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query 491 * execution stops on the first hit.</p> 492 * <p>Also known as {@code querySelector()} in the Web DOM.</p> 493 * @param cssQuery cssQuery a {@link Selector} CSS-like query 494 * @return the first matching element, or <b>{@code null}</b> if there is no match. 495 * @see #expectFirst(String) 496 */ selectFirst(String cssQuery)497 public @Nullable Element selectFirst(String cssQuery) { 498 return Selector.selectFirst(cssQuery, this); 499 } 500 501 /** 502 * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or 503 * {@code null} if none match. 504 * 505 * @param evaluator an element evaluator 506 * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none 507 * match. 508 */ selectFirst(Evaluator evaluator)509 public @Nullable Element selectFirst(Evaluator evaluator) { 510 return Collector.findFirst(evaluator, this); 511 } 512 513 /** 514 Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This 515 is useful if you want to simply abort processing on a failed match. 516 @param cssQuery a {@link Selector} CSS-like query 517 @return the first matching element 518 @throws IllegalArgumentException if no match is found 519 @since 1.15.2 520 */ expectFirst(String cssQuery)521 public Element expectFirst(String cssQuery) { 522 return (Element) Validate.ensureNotNull( 523 Selector.selectFirst(cssQuery, this), 524 parent() != null ? 525 "No elements matched the query '%s' on element '%s'.": 526 "No elements matched the query '%s' in the document." 527 , cssQuery, this.tagName() 528 ); 529 } 530 531 /** 532 * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web 533 * DOM. 534 * 535 * @param cssQuery a {@link Selector} CSS query 536 * @return if this element matches the query 537 */ is(String cssQuery)538 public boolean is(String cssQuery) { 539 return is(QueryParser.parse(cssQuery)); 540 } 541 542 /** 543 * Check if this element matches the given evaluator. 544 * @param evaluator an element evaluator 545 * @return if this element matches 546 */ is(Evaluator evaluator)547 public boolean is(Evaluator evaluator) { 548 return evaluator.matches(this.root(), this); 549 } 550 551 /** 552 * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an 553 * ancestor, or {@code null} if there is no such matching element. 554 * @param cssQuery a {@link Selector} CSS query 555 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 556 * found. 557 */ closest(String cssQuery)558 public @Nullable Element closest(String cssQuery) { 559 return closest(QueryParser.parse(cssQuery)); 560 } 561 562 /** 563 * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an 564 * ancestor, or {@code null} if there is no such matching element. 565 * @param evaluator a query evaluator 566 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 567 * found. 568 */ closest(Evaluator evaluator)569 public @Nullable Element closest(Evaluator evaluator) { 570 Validate.notNull(evaluator); 571 Element el = this; 572 final Element root = root(); 573 do { 574 if (evaluator.matches(root, el)) 575 return el; 576 el = el.parent(); 577 } while (el != null); 578 return null; 579 } 580 581 /** 582 Find Elements that match the supplied {@index XPath} expression. 583 <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be 584 expressed using the element's local name only.</p> 585 <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an 586 alternate XPathFactory implementation:</p> 587 <ol> 588 <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li> 589 <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br> 590 <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code> 591 </li> 592 </ol> 593 594 @param xpath XPath expression 595 @return matching elements, or an empty list if none match. 596 @see #selectXpath(String, Class) 597 @since 1.14.3 598 */ selectXpath(String xpath)599 public Elements selectXpath(String xpath) { 600 return new Elements(NodeUtils.selectXpath(xpath, this, Element.class)); 601 } 602 603 /** 604 Find Nodes that match the supplied XPath expression. 605 <p>For example, to select TextNodes under {@code p} elements: </p> 606 <pre>List<TextNode> textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre> 607 <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something 608 like:</p> 609 <pre>List<String> hrefs = doc.selectXpath("//a").eachAttr("href");</pre> 610 @param xpath XPath expression 611 @param nodeType the jsoup node type to return 612 @see #selectXpath(String) 613 @return a list of matching nodes 614 @since 1.14.3 615 */ selectXpath(String xpath, Class<T> nodeType)616 public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) { 617 return NodeUtils.selectXpath(xpath, this, nodeType); 618 } 619 620 /** 621 * Insert a node to the end of this Element's children. The incoming node will be re-parented. 622 * 623 * @param child node to add. 624 * @return this Element, for chaining 625 * @see #prependChild(Node) 626 * @see #insertChildren(int, Collection) 627 */ appendChild(Node child)628 public Element appendChild(Node child) { 629 Validate.notNull(child); 630 631 // was - Node#addChildren(child). short-circuits an array create and a loop. 632 reparentChild(child); 633 ensureChildNodes(); 634 childNodes.add(child); 635 child.setSiblingIndex(childNodes.size() - 1); 636 return this; 637 } 638 639 /** 640 Insert the given nodes to the end of this Element's children. 641 642 @param children nodes to add 643 @return this Element, for chaining 644 @see #insertChildren(int, Collection) 645 */ appendChildren(Collection<? extends Node> children)646 public Element appendChildren(Collection<? extends Node> children) { 647 insertChildren(-1, children); 648 return this; 649 } 650 651 /** 652 * Add this element to the supplied parent element, as its next child. 653 * 654 * @param parent element to which this element will be appended 655 * @return this element, so that you can continue modifying the element 656 */ appendTo(Element parent)657 public Element appendTo(Element parent) { 658 Validate.notNull(parent); 659 parent.appendChild(this); 660 return this; 661 } 662 663 /** 664 * Add a node to the start of this element's children. 665 * 666 * @param child node to add. 667 * @return this element, so that you can add more child nodes or elements. 668 */ prependChild(Node child)669 public Element prependChild(Node child) { 670 Validate.notNull(child); 671 672 addChildren(0, child); 673 return this; 674 } 675 676 /** 677 Insert the given nodes to the start of this Element's children. 678 679 @param children nodes to add 680 @return this Element, for chaining 681 @see #insertChildren(int, Collection) 682 */ prependChildren(Collection<? extends Node> children)683 public Element prependChildren(Collection<? extends Node> children) { 684 insertChildren(0, children); 685 return this; 686 } 687 688 689 /** 690 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 691 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 692 * 693 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 694 * end 695 * @param children child nodes to insert 696 * @return this element, for chaining. 697 */ insertChildren(int index, Collection<? extends Node> children)698 public Element insertChildren(int index, Collection<? extends Node> children) { 699 Validate.notNull(children, "Children collection to be inserted must not be null."); 700 int currentSize = childNodeSize(); 701 if (index < 0) index += currentSize +1; // roll around 702 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 703 704 ArrayList<Node> nodes = new ArrayList<>(children); 705 Node[] nodeArray = nodes.toArray(new Node[0]); 706 addChildren(index, nodeArray); 707 return this; 708 } 709 710 /** 711 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 712 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 713 * 714 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 715 * end 716 * @param children child nodes to insert 717 * @return this element, for chaining. 718 */ insertChildren(int index, Node... children)719 public Element insertChildren(int index, Node... children) { 720 Validate.notNull(children, "Children collection to be inserted must not be null."); 721 int currentSize = childNodeSize(); 722 if (index < 0) index += currentSize +1; // roll around 723 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 724 725 addChildren(index, children); 726 return this; 727 } 728 729 /** 730 * Create a new element by tag name, and add it as this Element's last child. 731 * 732 * @param tagName the name of the tag (e.g. {@code div}). 733 * @return the new element, to allow you to add content to it, e.g.: 734 * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} 735 */ appendElement(String tagName)736 public Element appendElement(String tagName) { 737 return appendElement(tagName, tag.namespace()); 738 } 739 740 /** 741 * Create a new element by tag name and namespace, add it as this Element's last child. 742 * 743 * @param tagName the name of the tag (e.g. {@code div}). 744 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 745 * @return the new element, in the specified namespace 746 */ appendElement(String tagName, String namespace)747 public Element appendElement(String tagName, String namespace) { 748 Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri()); 749 appendChild(child); 750 return child; 751 } 752 753 /** 754 * Create a new element by tag name, and add it as this Element's first child. 755 * 756 * @param tagName the name of the tag (e.g. {@code div}). 757 * @return the new element, to allow you to add content to it, e.g.: 758 * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} 759 */ prependElement(String tagName)760 public Element prependElement(String tagName) { 761 return prependElement(tagName, tag.namespace()); 762 } 763 764 /** 765 * Create a new element by tag name and namespace, and add it as this Element's first child. 766 * 767 * @param tagName the name of the tag (e.g. {@code div}). 768 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 769 * @return the new element, in the specified namespace 770 */ prependElement(String tagName, String namespace)771 public Element prependElement(String tagName, String namespace) { 772 Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri()); 773 prependChild(child); 774 return child; 775 } 776 777 /** 778 * Create and append a new TextNode to this element. 779 * 780 * @param text the (un-encoded) text to add 781 * @return this element 782 */ appendText(String text)783 public Element appendText(String text) { 784 Validate.notNull(text); 785 TextNode node = new TextNode(text); 786 appendChild(node); 787 return this; 788 } 789 790 /** 791 * Create and prepend a new TextNode to this element. 792 * 793 * @param text the decoded text to add 794 * @return this element 795 */ prependText(String text)796 public Element prependText(String text) { 797 Validate.notNull(text); 798 TextNode node = new TextNode(text); 799 prependChild(node); 800 return this; 801 } 802 803 /** 804 * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. 805 * @param html HTML to add inside this element, after the existing HTML 806 * @return this element 807 * @see #html(String) 808 */ append(String html)809 public Element append(String html) { 810 Validate.notNull(html); 811 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 812 addChildren(nodes.toArray(new Node[0])); 813 return this; 814 } 815 816 /** 817 * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. 818 * @param html HTML to add inside this element, before the existing HTML 819 * @return this element 820 * @see #html(String) 821 */ prepend(String html)822 public Element prepend(String html) { 823 Validate.notNull(html); 824 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 825 addChildren(0, nodes.toArray(new Node[0])); 826 return this; 827 } 828 829 /** 830 * Insert the specified HTML into the DOM before this element (as a preceding sibling). 831 * 832 * @param html HTML to add before this element 833 * @return this element, for chaining 834 * @see #after(String) 835 */ 836 @Override before(String html)837 public Element before(String html) { 838 return (Element) super.before(html); 839 } 840 841 /** 842 * Insert the specified node into the DOM before this node (as a preceding sibling). 843 * @param node to add before this element 844 * @return this Element, for chaining 845 * @see #after(Node) 846 */ 847 @Override before(Node node)848 public Element before(Node node) { 849 return (Element) super.before(node); 850 } 851 852 /** 853 * Insert the specified HTML into the DOM after this element (as a following sibling). 854 * 855 * @param html HTML to add after this element 856 * @return this element, for chaining 857 * @see #before(String) 858 */ 859 @Override after(String html)860 public Element after(String html) { 861 return (Element) super.after(html); 862 } 863 864 /** 865 * Insert the specified node into the DOM after this node (as a following sibling). 866 * @param node to add after this element 867 * @return this element, for chaining 868 * @see #before(Node) 869 */ 870 @Override after(Node node)871 public Element after(Node node) { 872 return (Element) super.after(node); 873 } 874 875 /** 876 * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to 877 * {@code null}. 878 * @return this element 879 */ 880 @Override empty()881 public Element empty() { 882 // Detach each of the children -> parent links: 883 for (Node child : childNodes) { 884 child.parentNode = null; 885 } 886 childNodes.clear(); 887 return this; 888 } 889 890 /** 891 * Wrap the supplied HTML around this element. 892 * 893 * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 894 * @return this element, for chaining. 895 */ 896 @Override wrap(String html)897 public Element wrap(String html) { 898 return (Element) super.wrap(html); 899 } 900 901 /** 902 * Get a CSS selector that will uniquely select this element. 903 * <p> 904 * If the element has an ID, returns #id; 905 * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, 906 * followed by a unique selector for the element (tag.class.class:nth-child(n)). 907 * </p> 908 * 909 * @return the CSS Path that can be used to retrieve the element in a selector. 910 */ cssSelector()911 public String cssSelector() { 912 if (id().length() > 0) { 913 // prefer to return the ID - but check that it's actually unique first! 914 String idSel = "#" + escapeCssIdentifier(id()); 915 Document doc = ownerDocument(); 916 if (doc != null) { 917 Elements els = doc.select(idSel); 918 if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl 919 return idSel; 920 } else { 921 return idSel; // no ownerdoc, return the ID selector 922 } 923 } 924 925 StringBuilder selector = StringUtil.borrowBuilder(); 926 Element el = this; 927 while (el != null && !(el instanceof Document)) { 928 selector.insert(0, el.cssSelectorComponent()); 929 el = el.parent(); 930 } 931 return StringUtil.releaseBuilder(selector); 932 } 933 cssSelectorComponent()934 private String cssSelectorComponent() { 935 // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag 936 String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|"); 937 StringBuilder selector = StringUtil.borrowBuilder().append(tagName); 938 // String classes = StringUtil.join(classNames().stream().map(TokenQueue::escapeCssIdentifier).iterator(), "."); 939 // todo - replace with ^^ in 1.16.1 when we enable Android support for stream etc 940 StringUtil.StringJoiner escapedClasses = new StringUtil.StringJoiner("."); 941 for (String name : classNames()) escapedClasses.add(escapeCssIdentifier(name)); 942 String classes = escapedClasses.complete(); 943 if (classes.length() > 0) 944 selector.append('.').append(classes); 945 946 if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node 947 return StringUtil.releaseBuilder(selector); 948 949 selector.insert(0, " > "); 950 if (parent().select(selector.toString()).size() > 1) 951 selector.append(String.format( 952 ":nth-child(%d)", elementSiblingIndex() + 1)); 953 954 return StringUtil.releaseBuilder(selector); 955 } 956 957 /** 958 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling 959 * of itself, so will not be included in the returned list. 960 * @return sibling elements 961 */ siblingElements()962 public Elements siblingElements() { 963 if (parentNode == null) 964 return new Elements(0); 965 966 List<Element> elements = parent().childElementsList(); 967 Elements siblings = new Elements(elements.size() - 1); 968 for (Element el: elements) 969 if (el != this) 970 siblings.add(el); 971 return siblings; 972 } 973 974 /** 975 * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, 976 * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. 977 * <p> 978 * This is similar to {@link #nextSibling()}, but specifically finds only Elements 979 * </p> 980 * @return the next element, or null if there is no next element 981 * @see #previousElementSibling() 982 */ nextElementSibling()983 public @Nullable Element nextElementSibling() { 984 Node next = this; 985 while ((next = next.nextSibling()) != null) { 986 if (next instanceof Element) return (Element) next; 987 } 988 return null; 989 } 990 991 /** 992 * Get each of the sibling elements that come after this element. 993 * 994 * @return each of the element siblings after this element, or an empty list if there are no next sibling elements 995 */ nextElementSiblings()996 public Elements nextElementSiblings() { 997 return nextElementSiblings(true); 998 } 999 1000 /** 1001 * Gets the previous element sibling of this element. 1002 * @return the previous element, or null if there is no previous element 1003 * @see #nextElementSibling() 1004 */ previousElementSibling()1005 public @Nullable Element previousElementSibling() { 1006 Node prev = this; 1007 while ((prev = prev.previousSibling()) != null) { 1008 if (prev instanceof Element) return (Element) prev; 1009 } 1010 return null; 1011 } 1012 1013 /** 1014 * Get each of the element siblings before this element. 1015 * 1016 * @return the previous element siblings, or an empty list if there are none. 1017 */ previousElementSiblings()1018 public Elements previousElementSiblings() { 1019 return nextElementSiblings(false); 1020 } 1021 nextElementSiblings(boolean next)1022 private Elements nextElementSiblings(boolean next) { 1023 Elements els = new Elements(); 1024 if (parentNode == null) 1025 return els; 1026 els.add(this); 1027 return next ? els.nextAll() : els.prevAll(); 1028 } 1029 1030 /** 1031 * Gets the first Element sibling of this element. That may be this element. 1032 * @return the first sibling that is an element (aka the parent's first element child) 1033 */ firstElementSibling()1034 public Element firstElementSibling() { 1035 if (parent() != null) { 1036 //noinspection DataFlowIssue (not nullable, would be this is no other sibs) 1037 return parent().firstElementChild(); 1038 } else 1039 return this; // orphan is its own first sibling 1040 } 1041 1042 /** 1043 * Get the list index of this element in its element sibling list. I.e. if this is the first element 1044 * sibling, returns 0. 1045 * @return position in element sibling list 1046 */ elementSiblingIndex()1047 public int elementSiblingIndex() { 1048 if (parent() == null) return 0; 1049 return indexInList(this, parent().childElementsList()); 1050 } 1051 1052 /** 1053 * Gets the last element sibling of this element. That may be this element. 1054 * @return the last sibling that is an element (aka the parent's last element child) 1055 */ lastElementSibling()1056 public Element lastElementSibling() { 1057 if (parent() != null) { 1058 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 1059 return parent().lastElementChild(); 1060 } else 1061 return this; 1062 } 1063 indexInList(Element search, List<E> elements)1064 private static <E extends Element> int indexInList(Element search, List<E> elements) { 1065 final int size = elements.size(); 1066 for (int i = 0; i < size; i++) { 1067 if (elements.get(i) == search) 1068 return i; 1069 } 1070 return 0; 1071 } 1072 1073 /** 1074 Gets the first child of this Element that is an Element, or {@code null} if there is none. 1075 @return the first Element child node, or null. 1076 @see #firstChild() 1077 @see #lastElementChild() 1078 @since 1.15.2 1079 */ firstElementChild()1080 public @Nullable Element firstElementChild() { 1081 Node child = firstChild(); 1082 while (child != null) { 1083 if (child instanceof Element) return (Element) child; 1084 child = child.nextSibling(); 1085 } 1086 return null; 1087 } 1088 1089 /** 1090 Gets the last child of this Element that is an Element, or @{code null} if there is none. 1091 @return the last Element child node, or null. 1092 @see #lastChild() 1093 @see #firstElementChild() 1094 @since 1.15.2 1095 */ lastElementChild()1096 public @Nullable Element lastElementChild() { 1097 Node child = lastChild(); 1098 while (child != null) { 1099 if (child instanceof Element) return (Element) child; 1100 child = child.previousSibling(); 1101 } 1102 return null; 1103 } 1104 1105 // DOM type methods 1106 1107 /** 1108 * Finds elements, including and recursively under this element, with the specified tag name. 1109 * @param tagName The tag name to search for (case insensitively). 1110 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. 1111 */ getElementsByTag(String tagName)1112 public Elements getElementsByTag(String tagName) { 1113 Validate.notEmpty(tagName); 1114 tagName = normalize(tagName); 1115 1116 return Collector.collect(new Evaluator.Tag(tagName), this); 1117 } 1118 1119 /** 1120 * Find an element by ID, including or under this element. 1121 * <p> 1122 * Note that this finds the first matching ID, starting with this element. If you search down from a different 1123 * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, 1124 * use {@link Document#getElementById(String)} 1125 * @param id The ID to search for. 1126 * @return The first matching element by ID, starting with this element, or null if none found. 1127 */ getElementById(String id)1128 public @Nullable Element getElementById(String id) { 1129 Validate.notEmpty(id); 1130 1131 Elements elements = Collector.collect(new Evaluator.Id(id), this); 1132 if (elements.size() > 0) 1133 return elements.get(0); 1134 else 1135 return null; 1136 } 1137 1138 /** 1139 * Find elements that have this class, including or under this element. Case-insensitive. 1140 * <p> 1141 * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method 1142 * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. 1143 * 1144 * @param className the name of the class to search for. 1145 * @return elements with the supplied class name, empty if none 1146 * @see #hasClass(String) 1147 * @see #classNames() 1148 */ getElementsByClass(String className)1149 public Elements getElementsByClass(String className) { 1150 Validate.notEmpty(className); 1151 1152 return Collector.collect(new Evaluator.Class(className), this); 1153 } 1154 1155 /** 1156 * Find elements that have a named attribute set. Case-insensitive. 1157 * 1158 * @param key name of the attribute, e.g. {@code href} 1159 * @return elements that have this attribute, empty if none 1160 */ getElementsByAttribute(String key)1161 public Elements getElementsByAttribute(String key) { 1162 Validate.notEmpty(key); 1163 key = key.trim(); 1164 1165 return Collector.collect(new Evaluator.Attribute(key), this); 1166 } 1167 1168 /** 1169 * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements 1170 * that have HTML5 datasets. 1171 * @param keyPrefix name prefix of the attribute e.g. {@code data-} 1172 * @return elements that have attribute names that start with the prefix, empty if none. 1173 */ getElementsByAttributeStarting(String keyPrefix)1174 public Elements getElementsByAttributeStarting(String keyPrefix) { 1175 Validate.notEmpty(keyPrefix); 1176 keyPrefix = keyPrefix.trim(); 1177 1178 return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); 1179 } 1180 1181 /** 1182 * Find elements that have an attribute with the specific value. Case-insensitive. 1183 * 1184 * @param key name of the attribute 1185 * @param value value of the attribute 1186 * @return elements that have this attribute with this value, empty if none 1187 */ getElementsByAttributeValue(String key, String value)1188 public Elements getElementsByAttributeValue(String key, String value) { 1189 return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); 1190 } 1191 1192 /** 1193 * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive. 1194 * 1195 * @param key name of the attribute 1196 * @param value value of the attribute 1197 * @return elements that do not have a matching attribute 1198 */ getElementsByAttributeValueNot(String key, String value)1199 public Elements getElementsByAttributeValueNot(String key, String value) { 1200 return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); 1201 } 1202 1203 /** 1204 * Find elements that have attributes that start with the value prefix. Case-insensitive. 1205 * 1206 * @param key name of the attribute 1207 * @param valuePrefix start of attribute value 1208 * @return elements that have attributes that start with the value prefix 1209 */ getElementsByAttributeValueStarting(String key, String valuePrefix)1210 public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { 1211 return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); 1212 } 1213 1214 /** 1215 * Find elements that have attributes that end with the value suffix. Case-insensitive. 1216 * 1217 * @param key name of the attribute 1218 * @param valueSuffix end of the attribute value 1219 * @return elements that have attributes that end with the value suffix 1220 */ getElementsByAttributeValueEnding(String key, String valueSuffix)1221 public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { 1222 return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); 1223 } 1224 1225 /** 1226 * Find elements that have attributes whose value contains the match string. Case-insensitive. 1227 * 1228 * @param key name of the attribute 1229 * @param match substring of value to search for 1230 * @return elements that have attributes containing this text 1231 */ getElementsByAttributeValueContaining(String key, String match)1232 public Elements getElementsByAttributeValueContaining(String key, String match) { 1233 return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); 1234 } 1235 1236 /** 1237 * Find elements that have an attribute whose value matches the supplied regular expression. 1238 * @param key name of the attribute 1239 * @param pattern compiled regular expression to match against attribute values 1240 * @return elements that have attributes matching this regular expression 1241 */ getElementsByAttributeValueMatching(String key, Pattern pattern)1242 public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { 1243 return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); 1244 1245 } 1246 1247 /** 1248 * Find elements that have attributes whose values match the supplied regular expression. 1249 * @param key name of the attribute 1250 * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. 1251 * @return elements that have attributes matching this regular expression 1252 */ getElementsByAttributeValueMatching(String key, String regex)1253 public Elements getElementsByAttributeValueMatching(String key, String regex) { 1254 Pattern pattern; 1255 try { 1256 pattern = Pattern.compile(regex); 1257 } catch (PatternSyntaxException e) { 1258 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1259 } 1260 return getElementsByAttributeValueMatching(key, pattern); 1261 } 1262 1263 /** 1264 * Find elements whose sibling index is less than the supplied index. 1265 * @param index 0-based index 1266 * @return elements less than index 1267 */ getElementsByIndexLessThan(int index)1268 public Elements getElementsByIndexLessThan(int index) { 1269 return Collector.collect(new Evaluator.IndexLessThan(index), this); 1270 } 1271 1272 /** 1273 * Find elements whose sibling index is greater than the supplied index. 1274 * @param index 0-based index 1275 * @return elements greater than index 1276 */ getElementsByIndexGreaterThan(int index)1277 public Elements getElementsByIndexGreaterThan(int index) { 1278 return Collector.collect(new Evaluator.IndexGreaterThan(index), this); 1279 } 1280 1281 /** 1282 * Find elements whose sibling index is equal to the supplied index. 1283 * @param index 0-based index 1284 * @return elements equal to index 1285 */ getElementsByIndexEquals(int index)1286 public Elements getElementsByIndexEquals(int index) { 1287 return Collector.collect(new Evaluator.IndexEquals(index), this); 1288 } 1289 1290 /** 1291 * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly 1292 * in the element, or in any of its descendants. 1293 * @param searchText to look for in the element's text 1294 * @return elements that contain the string, case-insensitive. 1295 * @see Element#text() 1296 */ getElementsContainingText(String searchText)1297 public Elements getElementsContainingText(String searchText) { 1298 return Collector.collect(new Evaluator.ContainsText(searchText), this); 1299 } 1300 1301 /** 1302 * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly 1303 * in the element, not in any of its descendants. 1304 * @param searchText to look for in the element's own text 1305 * @return elements that contain the string, case-insensitive. 1306 * @see Element#ownText() 1307 */ getElementsContainingOwnText(String searchText)1308 public Elements getElementsContainingOwnText(String searchText) { 1309 return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); 1310 } 1311 1312 /** 1313 * Find elements whose text matches the supplied regular expression. 1314 * @param pattern regular expression to match text against 1315 * @return elements matching the supplied regular expression. 1316 * @see Element#text() 1317 */ getElementsMatchingText(Pattern pattern)1318 public Elements getElementsMatchingText(Pattern pattern) { 1319 return Collector.collect(new Evaluator.Matches(pattern), this); 1320 } 1321 1322 /** 1323 * Find elements whose text matches the supplied regular expression. 1324 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. 1325 * @return elements matching the supplied regular expression. 1326 * @see Element#text() 1327 */ getElementsMatchingText(String regex)1328 public Elements getElementsMatchingText(String regex) { 1329 Pattern pattern; 1330 try { 1331 pattern = Pattern.compile(regex); 1332 } catch (PatternSyntaxException e) { 1333 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1334 } 1335 return getElementsMatchingText(pattern); 1336 } 1337 1338 /** 1339 * Find elements whose own text matches the supplied regular expression. 1340 * @param pattern regular expression to match text against 1341 * @return elements matching the supplied regular expression. 1342 * @see Element#ownText() 1343 */ getElementsMatchingOwnText(Pattern pattern)1344 public Elements getElementsMatchingOwnText(Pattern pattern) { 1345 return Collector.collect(new Evaluator.MatchesOwn(pattern), this); 1346 } 1347 1348 /** 1349 * Find elements whose own text matches the supplied regular expression. 1350 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options. 1351 * @return elements matching the supplied regular expression. 1352 * @see Element#ownText() 1353 */ getElementsMatchingOwnText(String regex)1354 public Elements getElementsMatchingOwnText(String regex) { 1355 Pattern pattern; 1356 try { 1357 pattern = Pattern.compile(regex); 1358 } catch (PatternSyntaxException e) { 1359 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1360 } 1361 return getElementsMatchingOwnText(pattern); 1362 } 1363 1364 /** 1365 * Find all elements under this element (including self, and children of children). 1366 * 1367 * @return all elements 1368 */ getAllElements()1369 public Elements getAllElements() { 1370 return Collector.collect(new Evaluator.AllElements(), this); 1371 } 1372 1373 /** 1374 Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and 1375 trimmed. 1376 <p>For example, given HTML {@code <p>Hello <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there 1377 now!"} 1378 <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not 1379 children), use {@link #ownText()} 1380 <p>Note that this method returns the textual content that would be presented to a reader. The contents of data 1381 nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve 1382 that content. 1383 1384 @return decoded, normalized text, or empty string if none. 1385 @see #wholeText() 1386 @see #ownText() 1387 @see #textNodes() 1388 */ text()1389 public String text() { 1390 final StringBuilder accum = StringUtil.borrowBuilder(); 1391 NodeTraversor.traverse(new TextAccumulator(accum), this); 1392 return StringUtil.releaseBuilder(accum).trim(); 1393 } 1394 1395 private static class TextAccumulator implements NodeVisitor { 1396 private final StringBuilder accum; 1397 TextAccumulator(StringBuilder accum)1398 public TextAccumulator(StringBuilder accum) { 1399 this.accum = accum; 1400 } 1401 head(Node node, int depth)1402 public void head(Node node, int depth) { 1403 if (node instanceof TextNode) { 1404 TextNode textNode = (TextNode) node; 1405 appendNormalisedText(accum, textNode); 1406 } else if (node instanceof Element) { 1407 Element element = (Element) node; 1408 if (accum.length() > 0 && 1409 (element.isBlock() || element.nameIs("br")) && 1410 !lastCharIsWhitespace(accum)) 1411 accum.append(' '); 1412 } 1413 } 1414 tail(Node node, int depth)1415 public void tail(Node node, int depth) { 1416 // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two". 1417 if (node instanceof Element) { 1418 Element element = (Element) node; 1419 Node next = node.nextSibling(); 1420 if (element.isBlock() && (next instanceof TextNode || next instanceof Element && !((Element) next).tag.formatAsBlock()) && !lastCharIsWhitespace(accum)) 1421 accum.append(' '); 1422 } 1423 1424 } 1425 } 1426 1427 /** 1428 Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces 1429 present in the original source. 1430 @return decoded, non-normalized text 1431 @see #text() 1432 @see #wholeOwnText() 1433 */ wholeText()1434 public String wholeText() { 1435 final StringBuilder accum = StringUtil.borrowBuilder(); 1436 nodeStream().forEach(node -> appendWholeText(node, accum)); 1437 return StringUtil.releaseBuilder(accum); 1438 } 1439 appendWholeText(Node node, StringBuilder accum)1440 private static void appendWholeText(Node node, StringBuilder accum) { 1441 if (node instanceof TextNode) { 1442 accum.append(((TextNode) node).getWholeText()); 1443 } else if (node.nameIs("br")) { 1444 accum.append("\n"); 1445 } 1446 } 1447 1448 /** 1449 Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any 1450 newlines and spaces present in the original source. 1451 @return decoded, non-normalized text that is a direct child of this Element 1452 @see #text() 1453 @see #wholeText() 1454 @see #ownText() 1455 @since 1.15.1 1456 */ wholeOwnText()1457 public String wholeOwnText() { 1458 final StringBuilder accum = StringUtil.borrowBuilder(); 1459 final int size = childNodeSize(); 1460 for (int i = 0; i < size; i++) { 1461 Node node = childNodes.get(i); 1462 appendWholeText(node, accum); 1463 } 1464 1465 return StringUtil.releaseBuilder(accum); 1466 } 1467 1468 /** 1469 * Gets the (normalized) text owned by this element only; does not get the combined text of all children. 1470 * <p> 1471 * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"}, 1472 * whereas {@code p.text()} returns {@code "Hello there now!"}. 1473 * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. 1474 * 1475 * @return decoded text, or empty string if none. 1476 * @see #text() 1477 * @see #textNodes() 1478 */ ownText()1479 public String ownText() { 1480 StringBuilder sb = StringUtil.borrowBuilder(); 1481 ownText(sb); 1482 return StringUtil.releaseBuilder(sb).trim(); 1483 } 1484 ownText(StringBuilder accum)1485 private void ownText(StringBuilder accum) { 1486 for (int i = 0; i < childNodeSize(); i++) { 1487 Node child = childNodes.get(i); 1488 if (child instanceof TextNode) { 1489 TextNode textNode = (TextNode) child; 1490 appendNormalisedText(accum, textNode); 1491 } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) { 1492 accum.append(" "); 1493 } 1494 } 1495 } 1496 appendNormalisedText(StringBuilder accum, TextNode textNode)1497 private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { 1498 String text = textNode.getWholeText(); 1499 if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode) 1500 accum.append(text); 1501 else 1502 StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); 1503 } 1504 preserveWhitespace(@ullable Node node)1505 static boolean preserveWhitespace(@Nullable Node node) { 1506 // looks only at this element and five levels up, to prevent recursion & needless stack searches 1507 if (node instanceof Element) { 1508 Element el = (Element) node; 1509 int i = 0; 1510 do { 1511 if (el.tag.preserveWhitespace()) 1512 return true; 1513 el = el.parent(); 1514 i++; 1515 } while (i < 6 && el != null); 1516 } 1517 return false; 1518 } 1519 1520 /** 1521 * Set the text of this element. Any existing contents (text or elements) will be cleared. 1522 * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data, 1523 * not visible text.</p> 1524 * @param text decoded text 1525 * @return this element 1526 */ text(String text)1527 public Element text(String text) { 1528 Validate.notNull(text); 1529 empty(); 1530 // special case for script/style in HTML: should be data node 1531 Document owner = ownerDocument(); 1532 // an alternate impl would be to run through the parser 1533 if (owner != null && owner.parser().isContentForTagData(normalName())) 1534 appendChild(new DataNode(text)); 1535 else 1536 appendChild(new TextNode(text)); 1537 1538 return this; 1539 } 1540 1541 /** 1542 Checks if the current element or any of its child elements contain non-whitespace text. 1543 @return {@code true} if the element has non-blank text content, {@code false} otherwise. 1544 */ hasText()1545 public boolean hasText() { 1546 AtomicBoolean hasText = new AtomicBoolean(false); 1547 filter((node, depth) -> { 1548 if (node instanceof TextNode) { 1549 TextNode textNode = (TextNode) node; 1550 if (!textNode.isBlank()) { 1551 hasText.set(true); 1552 return NodeFilter.FilterResult.STOP; 1553 } 1554 } 1555 return NodeFilter.FilterResult.CONTINUE; 1556 }); 1557 return hasText.get(); 1558 } 1559 1560 /** 1561 * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the 1562 * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()} 1563 * for the contents of scripts, comments, CSS styles, etc. 1564 * 1565 * @return the data, or empty string if none 1566 * 1567 * @see #dataNodes() 1568 */ data()1569 public String data() { 1570 StringBuilder sb = StringUtil.borrowBuilder(); 1571 traverse((childNode, depth) -> { 1572 if (childNode instanceof DataNode) { 1573 DataNode data = (DataNode) childNode; 1574 sb.append(data.getWholeData()); 1575 } else if (childNode instanceof Comment) { 1576 Comment comment = (Comment) childNode; 1577 sb.append(comment.getData()); 1578 } else if (childNode instanceof CDataNode) { 1579 // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script. 1580 // but in case another type gets through. 1581 CDataNode cDataNode = (CDataNode) childNode; 1582 sb.append(cDataNode.getWholeText()); 1583 } 1584 }); 1585 return StringUtil.releaseBuilder(sb); 1586 } 1587 1588 /** 1589 * Gets the literal value of this element's "class" attribute, which may include multiple class names, space 1590 * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>") 1591 * @return The literal class attribute, or <b>empty string</b> if no class attribute set. 1592 */ className()1593 public String className() { 1594 return attr("class").trim(); 1595 } 1596 1597 /** 1598 * Get each of the element's class names. E.g. on element {@code <div class="header gray">}, 1599 * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to 1600 * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. 1601 * @return set of classnames, empty if no class attribute 1602 */ classNames()1603 public Set<String> classNames() { 1604 String[] names = ClassSplit.split(className()); 1605 Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names)); 1606 classNames.remove(""); // if classNames() was empty, would include an empty class 1607 1608 return classNames; 1609 } 1610 1611 /** 1612 Set the element's {@code class} attribute to the supplied class names. 1613 @param classNames set of classes 1614 @return this element, for chaining 1615 */ classNames(Set<String> classNames)1616 public Element classNames(Set<String> classNames) { 1617 Validate.notNull(classNames); 1618 if (classNames.isEmpty()) { 1619 attributes().remove("class"); 1620 } else { 1621 attributes().put("class", StringUtil.join(classNames, " ")); 1622 } 1623 return this; 1624 } 1625 1626 /** 1627 * Tests if this element has a class. Case-insensitive. 1628 * @param className name of class to check for 1629 * @return true if it does, false if not 1630 */ 1631 // performance sensitive hasClass(String className)1632 public boolean hasClass(String className) { 1633 if (attributes == null) 1634 return false; 1635 1636 final String classAttr = attributes.getIgnoreCase("class"); 1637 final int len = classAttr.length(); 1638 final int wantLen = className.length(); 1639 1640 if (len == 0 || len < wantLen) { 1641 return false; 1642 } 1643 1644 // if both lengths are equal, only need compare the className with the attribute 1645 if (len == wantLen) { 1646 return className.equalsIgnoreCase(classAttr); 1647 } 1648 1649 // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations) 1650 boolean inClass = false; 1651 int start = 0; 1652 for (int i = 0; i < len; i++) { 1653 if (Character.isWhitespace(classAttr.charAt(i))) { 1654 if (inClass) { 1655 // white space ends a class name, compare it with the requested one, ignore case 1656 if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) { 1657 return true; 1658 } 1659 inClass = false; 1660 } 1661 } else { 1662 if (!inClass) { 1663 // we're in a class name : keep the start of the substring 1664 inClass = true; 1665 start = i; 1666 } 1667 } 1668 } 1669 1670 // check the last entry 1671 if (inClass && len - start == wantLen) { 1672 return classAttr.regionMatches(true, start, className, 0, wantLen); 1673 } 1674 1675 return false; 1676 } 1677 1678 /** 1679 Add a class name to this element's {@code class} attribute. 1680 @param className class name to add 1681 @return this element 1682 */ addClass(String className)1683 public Element addClass(String className) { 1684 Validate.notNull(className); 1685 1686 Set<String> classes = classNames(); 1687 classes.add(className); 1688 classNames(classes); 1689 1690 return this; 1691 } 1692 1693 /** 1694 Remove a class name from this element's {@code class} attribute. 1695 @param className class name to remove 1696 @return this element 1697 */ removeClass(String className)1698 public Element removeClass(String className) { 1699 Validate.notNull(className); 1700 1701 Set<String> classes = classNames(); 1702 classes.remove(className); 1703 classNames(classes); 1704 1705 return this; 1706 } 1707 1708 /** 1709 Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. 1710 @param className class name to toggle 1711 @return this element 1712 */ toggleClass(String className)1713 public Element toggleClass(String className) { 1714 Validate.notNull(className); 1715 1716 Set<String> classes = classNames(); 1717 if (classes.contains(className)) 1718 classes.remove(className); 1719 else 1720 classes.add(className); 1721 classNames(classes); 1722 1723 return this; 1724 } 1725 1726 /** 1727 * Get the value of a form element (input, textarea, etc). 1728 * @return the value of the form element, or empty string if not set. 1729 */ val()1730 public String val() { 1731 if (elementIs("textarea", NamespaceHtml)) 1732 return text(); 1733 else 1734 return attr("value"); 1735 } 1736 1737 /** 1738 * Set the value of a form element (input, textarea, etc). 1739 * @param value value to set 1740 * @return this element (for chaining) 1741 */ val(String value)1742 public Element val(String value) { 1743 if (elementIs("textarea", NamespaceHtml)) 1744 text(value); 1745 else 1746 attr("value", value); 1747 return this; 1748 } 1749 1750 /** 1751 Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be 1752 enabled prior to parsing the content. 1753 @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked. 1754 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 1755 @see Node#sourceRange() 1756 @see Range#isImplicit() 1757 @since 1.15.2 1758 */ endSourceRange()1759 public Range endSourceRange() { 1760 return Range.of(this, false); 1761 } 1762 shouldIndent(final Document.OutputSettings out)1763 boolean shouldIndent(final Document.OutputSettings out) { 1764 return out.prettyPrint() && isFormatAsBlock(out) && !isInlineable(out) && !preserveWhitespace(parentNode); 1765 } 1766 1767 @Override outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out)1768 void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException { 1769 if (shouldIndent(out)) { 1770 if (accum instanceof StringBuilder) { 1771 if (((StringBuilder) accum).length() > 0) 1772 indent(accum, depth, out); 1773 } else { 1774 indent(accum, depth, out); 1775 } 1776 } 1777 accum.append('<').append(tagName()); 1778 if (attributes != null) attributes.html(accum, out); 1779 1780 // selfclosing includes unknown tags, isEmpty defines tags that are always empty 1781 if (childNodes.isEmpty() && tag.isSelfClosing()) { 1782 if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty()) 1783 accum.append('>'); 1784 else 1785 accum.append(" />"); // <img> in html, <img /> in xml 1786 } 1787 else 1788 accum.append('>'); 1789 } 1790 1791 @Override outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out)1792 void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 1793 if (!(childNodes.isEmpty() && tag.isSelfClosing())) { 1794 if (out.prettyPrint() && (!childNodes.isEmpty() && ( 1795 (tag.formatAsBlock() && !preserveWhitespace(parentNode)) || 1796 (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && (childNodes.get(0) instanceof Element)))) 1797 ))) 1798 indent(accum, depth, out); 1799 accum.append("</").append(tagName()).append('>'); 1800 } 1801 } 1802 1803 /** 1804 * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return 1805 * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) 1806 * 1807 * @return String of HTML. 1808 * @see #outerHtml() 1809 */ html()1810 public String html() { 1811 StringBuilder accum = StringUtil.borrowBuilder(); 1812 html(accum); 1813 String html = StringUtil.releaseBuilder(accum); 1814 return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html; 1815 } 1816 1817 @Override html(T appendable)1818 public <T extends Appendable> T html(T appendable) { 1819 final int size = childNodes.size(); 1820 for (int i = 0; i < size; i++) 1821 childNodes.get(i).outerHtml(appendable); 1822 1823 return appendable; 1824 } 1825 1826 /** 1827 * Set this element's inner HTML. Clears the existing HTML first. 1828 * @param html HTML to parse and set into this element 1829 * @return this element 1830 * @see #append(String) 1831 */ html(String html)1832 public Element html(String html) { 1833 empty(); 1834 append(html); 1835 return this; 1836 } 1837 1838 @Override clone()1839 public Element clone() { 1840 return (Element) super.clone(); 1841 } 1842 1843 @Override shallowClone()1844 public Element shallowClone() { 1845 // simpler than implementing a clone version with no child copy 1846 String baseUri = baseUri(); 1847 if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute 1848 return new Element(tag, baseUri, attributes == null ? null : attributes.clone()); 1849 } 1850 1851 @Override doClone(@ullable Node parent)1852 protected Element doClone(@Nullable Node parent) { 1853 Element clone = (Element) super.doClone(parent); 1854 clone.attributes = attributes != null ? attributes.clone() : null; 1855 clone.childNodes = new NodeList(clone, childNodes.size()); 1856 clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone 1857 1858 return clone; 1859 } 1860 1861 // overrides of Node for call chaining 1862 @Override clearAttributes()1863 public Element clearAttributes() { 1864 if (attributes != null) { 1865 super.clearAttributes(); // keeps internal attributes via iterator 1866 if (attributes.size() == 0) 1867 attributes = null; // only remove entirely if no internal attributes 1868 } 1869 1870 return this; 1871 } 1872 1873 @Override removeAttr(String attributeKey)1874 public Element removeAttr(String attributeKey) { 1875 return (Element) super.removeAttr(attributeKey); 1876 } 1877 1878 @Override root()1879 public Element root() { 1880 return (Element) super.root(); // probably a document, but always at least an element 1881 } 1882 1883 @Override traverse(NodeVisitor nodeVisitor)1884 public Element traverse(NodeVisitor nodeVisitor) { 1885 return (Element) super.traverse(nodeVisitor); 1886 } 1887 1888 @Override forEachNode(Consumer<? super Node> action)1889 public Element forEachNode(Consumer<? super Node> action) { 1890 return (Element) super.forEachNode(action); 1891 } 1892 1893 /** 1894 Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal. 1895 Elements may be inspected, changed, added, replaced, or removed. 1896 @param action the function to perform on the element 1897 @return this Element, for chaining 1898 @see Node#forEachNode(Consumer) 1899 @deprecated use {@link #stream()}.{@link Stream#forEach(Consumer) forEach(Consumer)} instead. (Removing this method 1900 so Element can implement Iterable, which this signature conflicts with due to the non-void return.) 1901 */ 1902 @Deprecated forEach(Consumer<? super Element> action)1903 public Element forEach(Consumer<? super Element> action) { 1904 stream().forEach(action); 1905 return this; 1906 } 1907 1908 @Override filter(NodeFilter nodeFilter)1909 public Element filter(NodeFilter nodeFilter) { 1910 return (Element) super.filter(nodeFilter); 1911 } 1912 1913 private static final class NodeList extends ChangeNotifyingArrayList<Node> { 1914 private final Element owner; 1915 NodeList(Element owner, int initialCapacity)1916 NodeList(Element owner, int initialCapacity) { 1917 super(initialCapacity); 1918 this.owner = owner; 1919 } 1920 onContentsChanged()1921 public void onContentsChanged() { 1922 owner.nodelistChanged(); 1923 } 1924 } 1925 isFormatAsBlock(Document.OutputSettings out)1926 private boolean isFormatAsBlock(Document.OutputSettings out) { 1927 return tag.isBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline(); 1928 } 1929 isInlineable(Document.OutputSettings out)1930 private boolean isInlineable(Document.OutputSettings out) { 1931 if (!tag.isInline()) 1932 return false; 1933 return (parent() == null || parent().isBlock()) 1934 && !isEffectivelyFirst() 1935 && !out.outline() 1936 && !nameIs("br"); 1937 } 1938 } 1939