1 package org.jsoup.nodes; 2 3 import org.jsoup.Jsoup; 4 import org.jsoup.TextUtil; 5 import org.jsoup.helper.ValidationException; 6 import org.jsoup.parser.ParseSettings; 7 import org.jsoup.parser.Parser; 8 import org.jsoup.parser.Tag; 9 import org.jsoup.select.Elements; 10 import org.jsoup.select.Evaluator; 11 import org.jsoup.select.NodeFilter; 12 import org.jsoup.select.NodeVisitor; 13 import org.jsoup.select.QueryParser; 14 import org.junit.jupiter.api.Test; 15 import org.junit.jupiter.params.ParameterizedTest; 16 import org.junit.jupiter.params.provider.MethodSource; 17 18 import java.util.ArrayList; 19 import java.util.Collection; 20 import java.util.HashSet; 21 import java.util.LinkedHashSet; 22 import java.util.List; 23 import java.util.Map; 24 import java.util.Set; 25 import java.util.concurrent.atomic.AtomicInteger; 26 import java.util.regex.Pattern; 27 import java.util.stream.Stream; 28 29 import static org.junit.jupiter.api.Assertions.*; 30 31 /** 32 Tests for Element (DOM stuff mostly). 33 34 @author Jonathan Hedley */ 35 public class ElementTest { 36 private final String reference = "<div id=div1><p>Hello</p><p>Another <b>element</b></p><div id=div2><img src=foo.png></div></div>"; 37 validateScriptContents(String src, Element el)38 private static void validateScriptContents(String src, Element el) { 39 assertEquals("", el.text()); // it's not text 40 assertEquals("", el.ownText()); 41 assertEquals("", el.wholeText()); 42 assertEquals(src, el.html()); 43 assertEquals(src, el.data()); 44 } 45 validateXmlScriptContents(Element el)46 private static void validateXmlScriptContents(Element el) { 47 assertEquals("var foo = 5 < 2; var bar = 1 && 2;", el.text()); 48 assertEquals("var foo = 5 < 2; var bar = 1 && 2;", el.ownText()); 49 assertEquals("var foo = 5 < 2;\nvar bar = 1 && 2;", el.wholeText()); 50 assertEquals("var foo = 5 < 2;\nvar bar = 1 && 2;", el.html()); 51 assertEquals("", el.data()); 52 } 53 54 @Test testId()55 public void testId() { 56 Document doc = Jsoup.parse("<div id=Foo>"); 57 Element el = doc.selectFirst("div"); 58 assertEquals("Foo", el.id()); 59 } 60 61 @Test testSetId()62 public void testSetId() { 63 Document doc = Jsoup.parse("<div id=Boo>"); 64 Element el = doc.selectFirst("div"); 65 el.id("Foo"); 66 assertEquals("Foo", el.id()); 67 } 68 69 @Test getElementsByTagName()70 public void getElementsByTagName() { 71 Document doc = Jsoup.parse(reference); 72 List<Element> divs = doc.getElementsByTag("div"); 73 assertEquals(2, divs.size()); 74 assertEquals("div1", divs.get(0).id()); 75 assertEquals("div2", divs.get(1).id()); 76 77 List<Element> ps = doc.getElementsByTag("p"); 78 assertEquals(2, ps.size()); 79 assertEquals("Hello", ((TextNode) ps.get(0).childNode(0)).getWholeText()); 80 assertEquals("Another ", ((TextNode) ps.get(1).childNode(0)).getWholeText()); 81 List<Element> ps2 = doc.getElementsByTag("P"); 82 assertEquals(ps, ps2); 83 84 List<Element> imgs = doc.getElementsByTag("img"); 85 assertEquals("foo.png", imgs.get(0).attr("src")); 86 87 List<Element> empty = doc.getElementsByTag("wtf"); 88 assertEquals(0, empty.size()); 89 } 90 91 @Test getNamespacedElementsByTag()92 public void getNamespacedElementsByTag() { 93 Document doc = Jsoup.parse("<div><abc:def id=1>Hello</abc:def></div>"); 94 Elements els = doc.getElementsByTag("abc:def"); 95 assertEquals(1, els.size()); 96 assertEquals("1", els.first().id()); 97 assertEquals("abc:def", els.first().tagName()); 98 } 99 100 @Test testGetElementById()101 public void testGetElementById() { 102 Document doc = Jsoup.parse(reference); 103 Element div = doc.getElementById("div1"); 104 assertEquals("div1", div.id()); 105 assertNull(doc.getElementById("none")); 106 107 Document doc2 = Jsoup.parse("<div id=1><div id=2><p>Hello <span id=2>world!</span></p></div></div>"); 108 Element div2 = doc2.getElementById("2"); 109 assertEquals("div", div2.tagName()); // not the span 110 Element span = div2.child(0).getElementById("2"); // called from <p> context should be span 111 assertEquals("span", span.tagName()); 112 } 113 114 @Test testGetText()115 public void testGetText() { 116 Document doc = Jsoup.parse(reference); 117 assertEquals("Hello Another element", doc.text()); 118 assertEquals("Another element", doc.getElementsByTag("p").get(1).text()); 119 } 120 121 @Test testGetChildText()122 public void testGetChildText() { 123 Document doc = Jsoup.parse("<p>Hello <b>there</b> now"); 124 Element p = doc.select("p").first(); 125 assertEquals("Hello there now", p.text()); 126 assertEquals("Hello now", p.ownText()); 127 } 128 129 @Test testNormalisesText()130 public void testNormalisesText() { 131 String h = "<p>Hello<p>There.</p> \n <p>Here <b>is</b> \n s<b>om</b>e text."; 132 Document doc = Jsoup.parse(h); 133 String text = doc.text(); 134 assertEquals("Hello There. Here is some text.", text); 135 } 136 137 @Test testKeepsPreText()138 public void testKeepsPreText() { 139 String h = "<p>Hello \n \n there.</p> <div><pre> What's \n\n that?</pre>"; 140 Document doc = Jsoup.parse(h); 141 assertEquals("Hello there. What's \n\n that?", doc.text()); 142 } 143 144 @Test testKeepsPreTextInCode()145 public void testKeepsPreTextInCode() { 146 String h = "<pre><code>code\n\ncode</code></pre>"; 147 Document doc = Jsoup.parse(h); 148 assertEquals("code\n\ncode", doc.text()); 149 assertEquals("<pre><code>code\n\ncode</code></pre>", doc.body().html()); 150 } 151 152 @Test testKeepsPreTextAtDepth()153 public void testKeepsPreTextAtDepth() { 154 String h = "<pre><code><span><b>code\n\ncode</b></span></code></pre>"; 155 Document doc = Jsoup.parse(h); 156 assertEquals("code\n\ncode", doc.text()); 157 assertEquals("<pre><code><span><b>code\n\ncode</b></span></code></pre>", doc.body().html()); 158 } 159 doesNotWrapBlocksInPre()160 @Test void doesNotWrapBlocksInPre() { 161 // https://github.com/jhy/jsoup/issues/1891 162 String h = "<pre><span><foo><div>TEST\n TEST</div></foo></span></pre>"; 163 Document doc = Jsoup.parse(h); 164 assertEquals("TEST\n TEST", doc.wholeText()); 165 assertEquals(h, doc.body().html()); 166 } 167 168 @Test testBrHasSpace()169 public void testBrHasSpace() { 170 Document doc = Jsoup.parse("<p>Hello<br>there</p>"); 171 assertEquals("Hello there", doc.text()); 172 assertEquals("Hello there", doc.select("p").first().ownText()); 173 174 doc = Jsoup.parse("<p>Hello <br> there</p>"); 175 assertEquals("Hello there", doc.text()); 176 } 177 178 @Test testBrHasSpaceCaseSensitive()179 public void testBrHasSpaceCaseSensitive() { 180 Document doc = Jsoup.parse("<p>Hello<br>there<BR>now</p>", Parser.htmlParser().settings(ParseSettings.preserveCase)); 181 assertEquals("Hello there now", doc.text()); 182 assertEquals("Hello there now", doc.select("p").first().ownText()); 183 184 doc = Jsoup.parse("<p>Hello <br> there <BR> now</p>"); 185 assertEquals("Hello there now", doc.text()); 186 } 187 textHasSpacesAfterBlock()188 @Test public void textHasSpacesAfterBlock() { 189 Document doc = Jsoup.parse("<div>One</div><div>Two</div><span>Three</span><p>Fou<i>r</i></p>"); 190 String text = doc.text(); 191 String wholeText = doc.wholeText(); 192 193 assertEquals("One Two Three Four", text); 194 assertEquals("OneTwoThreeFour",wholeText); 195 196 assertEquals("OneTwo",Jsoup.parse("<span>One</span><span>Two</span>").text()); 197 } 198 199 @Test testWholeText()200 public void testWholeText() { 201 Document doc = Jsoup.parse("<p> Hello\nthere </p>"); 202 assertEquals(" Hello\nthere ", doc.wholeText()); 203 204 doc = Jsoup.parse("<p>Hello \n there</p>"); 205 assertEquals("Hello \n there", doc.wholeText()); 206 207 doc = Jsoup.parse("<p>Hello <div>\n there</div></p>"); 208 assertEquals("Hello \n there", doc.wholeText()); 209 } 210 wholeTextRuns()211 @Test void wholeTextRuns() { 212 Document doc = Jsoup.parse("<div><p id=1></p><p id=2> </p><p id=3>. </p>"); 213 214 Element p1 = doc.expectFirst("#1"); 215 Element p2 = doc.expectFirst("#2"); 216 Element p3 = doc.expectFirst("#3"); 217 218 assertEquals("", p1.wholeText()); 219 assertEquals(" ", p2.wholeText()); 220 assertEquals(". ", p3.wholeText()); 221 } 222 223 @Test testGetSiblings()224 public void testGetSiblings() { 225 Document doc = Jsoup.parse("<div><p>Hello<p id=1>there<p>this<p>is<p>an<p id=last>element</div>"); 226 Element p = doc.getElementById("1"); 227 assertEquals("there", p.text()); 228 assertEquals("Hello", p.previousElementSibling().text()); 229 assertEquals("this", p.nextElementSibling().text()); 230 assertEquals("Hello", p.firstElementSibling().text()); 231 assertEquals("element", p.lastElementSibling().text()); 232 assertNull(p.lastElementSibling().nextElementSibling()); 233 assertNull(p.firstElementSibling().previousElementSibling()); 234 } 235 nextElementSibling()236 @Test public void nextElementSibling() { 237 Document doc = Jsoup.parse("<p>One</p>Two<p>Three</p>"); 238 Element el = doc.expectFirst("p"); 239 assertNull(el.previousElementSibling()); 240 Element next = el.nextElementSibling(); 241 assertNotNull(next); 242 assertEquals("Three", next.text()); 243 assertNull(next.nextElementSibling()); 244 } 245 prevElementSibling()246 @Test public void prevElementSibling() { 247 Document doc = Jsoup.parse("<p>One</p>Two<p>Three</p>"); 248 Element el = doc.expectFirst("p:contains(Three)"); 249 assertNull(el.nextElementSibling()); 250 Element prev = el.previousElementSibling(); 251 assertNotNull(prev); 252 assertEquals("One", prev.text()); 253 assertNull(prev.previousElementSibling()); 254 } 255 256 @Test testGetSiblingsWithDuplicateContent()257 public void testGetSiblingsWithDuplicateContent() { 258 Document doc = Jsoup.parse("<div><p>Hello<p id=1>there<p>this<p>this<p>is<p>an<p id=last>element</div>"); 259 Element p = doc.getElementById("1"); 260 assertEquals("there", p.text()); 261 assertEquals("Hello", p.previousElementSibling().text()); 262 assertEquals("this", p.nextElementSibling().text()); 263 assertEquals("this", p.nextElementSibling().nextElementSibling().text()); 264 assertEquals("is", p.nextElementSibling().nextElementSibling().nextElementSibling().text()); 265 assertEquals("Hello", p.firstElementSibling().text()); 266 assertEquals("element", p.lastElementSibling().text()); 267 } 268 269 @Test testFirstElementSiblingOnOrphan()270 public void testFirstElementSiblingOnOrphan() { 271 Element p = new Element("p"); 272 assertSame(p, p.firstElementSibling()); 273 assertSame(p, p.lastElementSibling()); 274 } 275 276 @Test testFirstAndLastSiblings()277 public void testFirstAndLastSiblings() { 278 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three"); 279 Element div = doc.expectFirst("div"); 280 Element one = div.child(0); 281 Element two = div.child(1); 282 Element three = div.child(2); 283 284 assertSame(one, one.firstElementSibling()); 285 assertSame(one, two.firstElementSibling()); 286 assertSame(three, three.lastElementSibling()); 287 assertSame(three, two.lastElementSibling()); 288 assertNull(one.previousElementSibling()); 289 assertNull(three.nextElementSibling()); 290 } 291 292 @Test testGetParents()293 public void testGetParents() { 294 Document doc = Jsoup.parse("<div><p>Hello <span>there</span></div>"); 295 Element span = doc.select("span").first(); 296 Elements parents = span.parents(); 297 298 assertEquals(4, parents.size()); 299 assertEquals("p", parents.get(0).tagName()); 300 assertEquals("div", parents.get(1).tagName()); 301 assertEquals("body", parents.get(2).tagName()); 302 assertEquals("html", parents.get(3).tagName()); 303 304 Element orphan = new Element("p"); 305 Elements none = orphan.parents(); 306 assertEquals(0, none.size()); 307 } 308 309 @Test testElementSiblingIndex()310 public void testElementSiblingIndex() { 311 Document doc = Jsoup.parse("<div><p>One</p>...<p>Two</p>...<p>Three</p>"); 312 Elements ps = doc.select("p"); 313 assertEquals(0, ps.get(0).elementSiblingIndex()); 314 assertEquals(1, ps.get(1).elementSiblingIndex()); 315 assertEquals(2, ps.get(2).elementSiblingIndex()); 316 } 317 318 @Test testElementSiblingIndexSameContent()319 public void testElementSiblingIndexSameContent() { 320 Document doc = Jsoup.parse("<div><p>One</p>...<p>One</p>...<p>One</p>"); 321 Elements ps = doc.select("p"); 322 assertEquals(0, ps.get(0).elementSiblingIndex()); 323 assertEquals(1, ps.get(1).elementSiblingIndex()); 324 assertEquals(2, ps.get(2).elementSiblingIndex()); 325 } 326 327 @Test testGetElementsWithClass()328 public void testGetElementsWithClass() { 329 Document doc = Jsoup.parse("<div class='mellow yellow'><span class=mellow>Hello <b class='yellow'>Yellow!</b></span><p>Empty</p></div>"); 330 331 List<Element> els = doc.getElementsByClass("mellow"); 332 assertEquals(2, els.size()); 333 assertEquals("div", els.get(0).tagName()); 334 assertEquals("span", els.get(1).tagName()); 335 336 List<Element> els2 = doc.getElementsByClass("yellow"); 337 assertEquals(2, els2.size()); 338 assertEquals("div", els2.get(0).tagName()); 339 assertEquals("b", els2.get(1).tagName()); 340 341 List<Element> none = doc.getElementsByClass("solo"); 342 assertEquals(0, none.size()); 343 } 344 345 @Test testGetElementsWithAttribute()346 public void testGetElementsWithAttribute() { 347 Document doc = Jsoup.parse("<div style='bold'><p title=qux><p><b style></b></p></div>"); 348 List<Element> els = doc.getElementsByAttribute("style"); 349 assertEquals(2, els.size()); 350 assertEquals("div", els.get(0).tagName()); 351 assertEquals("b", els.get(1).tagName()); 352 353 List<Element> none = doc.getElementsByAttribute("class"); 354 assertEquals(0, none.size()); 355 } 356 357 @Test testGetElementsWithAttributeDash()358 public void testGetElementsWithAttributeDash() { 359 Document doc = Jsoup.parse("<meta http-equiv=content-type value=utf8 id=1> <meta name=foo content=bar id=2> <div http-equiv=content-type value=utf8 id=3>"); 360 Elements meta = doc.select("meta[http-equiv=content-type], meta[charset]"); 361 assertEquals(1, meta.size()); 362 assertEquals("1", meta.first().id()); 363 } 364 365 @Test testGetElementsWithAttributeValue()366 public void testGetElementsWithAttributeValue() { 367 Document doc = Jsoup.parse("<div style='bold'><p><p><b style></b></p></div>"); 368 List<Element> els = doc.getElementsByAttributeValue("style", "bold"); 369 assertEquals(1, els.size()); 370 assertEquals("div", els.get(0).tagName()); 371 372 List<Element> none = doc.getElementsByAttributeValue("style", "none"); 373 assertEquals(0, none.size()); 374 } 375 376 @Test testClassDomMethods()377 public void testClassDomMethods() { 378 Document doc = Jsoup.parse("<div><span class=' mellow yellow '>Hello <b>Yellow</b></span></div>"); 379 List<Element> els = doc.getElementsByAttribute("class"); 380 Element span = els.get(0); 381 assertEquals("mellow yellow", span.className()); 382 assertTrue(span.hasClass("mellow")); 383 assertTrue(span.hasClass("yellow")); 384 Set<String> classes = span.classNames(); 385 assertEquals(2, classes.size()); 386 assertTrue(classes.contains("mellow")); 387 assertTrue(classes.contains("yellow")); 388 389 assertEquals("", doc.className()); 390 classes = doc.classNames(); 391 assertEquals(0, classes.size()); 392 assertFalse(doc.hasClass("mellow")); 393 } 394 395 @Test testHasClassDomMethods()396 public void testHasClassDomMethods() { 397 Tag tag = Tag.valueOf("a"); 398 Attributes attribs = new Attributes(); 399 Element el = new Element(tag, "", attribs); 400 401 attribs.put("class", "toto"); 402 boolean hasClass = el.hasClass("toto"); 403 assertTrue(hasClass); 404 405 attribs.put("class", " toto"); 406 hasClass = el.hasClass("toto"); 407 assertTrue(hasClass); 408 409 attribs.put("class", "toto "); 410 hasClass = el.hasClass("toto"); 411 assertTrue(hasClass); 412 413 attribs.put("class", "\ttoto "); 414 hasClass = el.hasClass("toto"); 415 assertTrue(hasClass); 416 417 attribs.put("class", " toto "); 418 hasClass = el.hasClass("toto"); 419 assertTrue(hasClass); 420 421 attribs.put("class", "ab"); 422 hasClass = el.hasClass("toto"); 423 assertFalse(hasClass); 424 425 attribs.put("class", " "); 426 hasClass = el.hasClass("toto"); 427 assertFalse(hasClass); 428 429 attribs.put("class", "tototo"); 430 hasClass = el.hasClass("toto"); 431 assertFalse(hasClass); 432 433 attribs.put("class", "raulpismuth "); 434 hasClass = el.hasClass("raulpismuth"); 435 assertTrue(hasClass); 436 437 attribs.put("class", " abcd raulpismuth efgh "); 438 hasClass = el.hasClass("raulpismuth"); 439 assertTrue(hasClass); 440 441 attribs.put("class", " abcd efgh raulpismuth"); 442 hasClass = el.hasClass("raulpismuth"); 443 assertTrue(hasClass); 444 445 attribs.put("class", " abcd efgh raulpismuth "); 446 hasClass = el.hasClass("raulpismuth"); 447 assertTrue(hasClass); 448 } 449 450 @Test testClassUpdates()451 public void testClassUpdates() { 452 Document doc = Jsoup.parse("<div class='mellow yellow'></div>"); 453 Element div = doc.select("div").first(); 454 455 div.addClass("green"); 456 assertEquals("mellow yellow green", div.className()); 457 div.removeClass("red"); // noop 458 div.removeClass("yellow"); 459 assertEquals("mellow green", div.className()); 460 div.toggleClass("green").toggleClass("red"); 461 assertEquals("mellow red", div.className()); 462 } 463 464 @Test testOuterHtml()465 public void testOuterHtml() { 466 Document doc = Jsoup.parse("<div title='Tags &c.'><img src=foo.png><p><!-- comment -->Hello<p>there"); 467 assertEquals("<html><head></head><body><div title=\"Tags &c.\"><img src=\"foo.png\"><p><!-- comment -->Hello</p><p>there</p></div></body></html>", 468 TextUtil.stripNewlines(doc.outerHtml())); 469 } 470 471 @Test testInnerHtml()472 public void testInnerHtml() { 473 Document doc = Jsoup.parse("<div>\n <p>Hello</p> </div>"); 474 assertEquals("<p>Hello</p>", doc.getElementsByTag("div").get(0).html()); 475 } 476 477 @Test testFormatHtml()478 public void testFormatHtml() { 479 Document doc = Jsoup.parse("<title>Format test</title><div><p>Hello <span>jsoup <span>users</span></span></p><p>Good.</p></div>"); 480 assertEquals("<html>\n <head>\n <title>Format test</title>\n </head>\n <body>\n <div>\n <p>Hello <span>jsoup <span>users</span></span></p>\n <p>Good.</p>\n </div>\n </body>\n</html>", doc.html()); 481 } 482 483 @Test testFormatOutline()484 public void testFormatOutline() { 485 Document doc = Jsoup.parse("<title>Format test</title><div><p>Hello <span>jsoup <span>users</span></span></p><p>Good.</p></div>"); 486 doc.outputSettings().outline(true); 487 assertEquals("<html>\n <head>\n <title>Format test</title>\n </head>\n <body>\n <div>\n <p>\n Hello \n <span>\n jsoup \n <span>users</span>\n </span>\n </p>\n <p>Good.</p>\n </div>\n </body>\n</html>", doc.html()); 488 } 489 490 @Test testSetIndent()491 public void testSetIndent() { 492 Document doc = Jsoup.parse("<div><p>Hello\nthere</p></div>"); 493 doc.outputSettings().indentAmount(0); 494 assertEquals("<html>\n<head></head>\n<body>\n<div>\n<p>Hello there</p>\n</div>\n</body>\n</html>", doc.html()); 495 } 496 testIndentLevel()497 @Test void testIndentLevel() { 498 // deep to test default and extended max 499 StringBuilder divs = new StringBuilder(); 500 for (int i = 0; i < 40; i++) { 501 divs.append("<div>"); 502 } 503 divs.append("Foo"); 504 Document doc = Jsoup.parse(divs.toString()); 505 Document.OutputSettings settings = doc.outputSettings(); 506 507 int defaultMax = 30; 508 assertEquals(defaultMax, settings.maxPaddingWidth()); 509 String html = doc.html(); 510 assertTrue(html.contains(" <div>\n" + 511 " Foo\n" + 512 " </div>")); 513 514 settings.maxPaddingWidth(32); 515 assertEquals(32, settings.maxPaddingWidth()); 516 html = doc.html(); 517 assertTrue(html.contains(" <div>\n" + 518 " Foo\n" + 519 " </div>")); 520 521 settings.maxPaddingWidth(-1); 522 assertEquals(-1, settings.maxPaddingWidth()); 523 html = doc.html(); 524 assertTrue(html.contains(" <div>\n" + 525 " Foo\n" + 526 " </div>")); 527 } 528 529 @Test testNotPretty()530 public void testNotPretty() { 531 Document doc = Jsoup.parse("<div> \n<p>Hello\n there\n</p></div>"); 532 doc.outputSettings().prettyPrint(false); 533 assertEquals("<html><head></head><body><div> \n<p>Hello\n there\n</p></div></body></html>", doc.html()); 534 535 Element div = doc.select("div").first(); 536 assertEquals(" \n<p>Hello\n there\n</p>", div.html()); 537 } 538 539 @Test testNotPrettyWithEnDashBody()540 public void testNotPrettyWithEnDashBody() { 541 String html = "<div><span>1:15</span>–<span>2:15</span> p.m.</div>"; 542 Document document = Jsoup.parse(html); 543 document.outputSettings().prettyPrint(false); 544 545 assertEquals("<div><span>1:15</span>–<span>2:15</span> p.m.</div>", document.body().html()); 546 } 547 548 @Test testPrettyWithEnDashBody()549 public void testPrettyWithEnDashBody() { 550 String html = "<div><span>1:15</span>–<span>2:15</span> p.m.</div>"; 551 Document document = Jsoup.parse(html); 552 553 assertEquals("<div>\n <span>1:15</span>–<span>2:15</span> p.m.\n</div>", document.body().html()); 554 } 555 556 @Test testPrettyAndOutlineWithEnDashBody()557 public void testPrettyAndOutlineWithEnDashBody() { 558 String html = "<div><span>1:15</span>–<span>2:15</span> p.m.</div>"; 559 Document document = Jsoup.parse(html); 560 document.outputSettings().outline(true); 561 562 assertEquals("<div>\n <span>1:15</span>\n –\n <span>2:15</span>\n p.m.\n</div>", document.body().html()); 563 } 564 565 @Test testBasicFormats()566 public void testBasicFormats() { 567 String html = "<span>0</span>.<div><span>1</span>-<span>2</span><p><span>3</span>-<span>4</span><div>5</div>"; 568 Document doc = Jsoup.parse(html); 569 assertEquals( 570 "<span>0</span>.\n" + 571 "<div>\n" + 572 " <span>1</span>-<span>2</span>\n" + 573 " <p><span>3</span>-<span>4</span></p>\n" + 574 " <div>\n" + 575 " 5\n" + 576 " </div>\n" + 577 "</div>", doc.body().html()); 578 } 579 580 @Test testEmptyElementFormatHtml()581 public void testEmptyElementFormatHtml() { 582 // don't put newlines into empty blocks 583 Document doc = Jsoup.parse("<section><div></div></section>"); 584 assertEquals("<section>\n <div></div>\n</section>", doc.select("section").first().outerHtml()); 585 } 586 587 @Test testNoIndentOnScriptAndStyle()588 public void testNoIndentOnScriptAndStyle() { 589 // don't newline+indent closing </script> and </style> tags 590 Document doc = Jsoup.parse("<script>one\ntwo</script>\n<style>three\nfour</style>"); 591 assertEquals("<script>one\ntwo</script>\n<style>three\nfour</style>", doc.head().html()); 592 } 593 594 @Test testContainerOutput()595 public void testContainerOutput() { 596 Document doc = Jsoup.parse("<title>Hello there</title> <div><p>Hello</p><p>there</p></div> <div>Another</div>"); 597 assertEquals("<title>Hello there</title>", doc.select("title").first().outerHtml()); 598 assertEquals("<div>\n <p>Hello</p>\n <p>there</p>\n</div>", doc.select("div").first().outerHtml()); 599 assertEquals("<div>\n <p>Hello</p>\n <p>there</p>\n</div>\n<div>\n Another\n</div>", doc.select("body").first().html()); 600 } 601 602 @Test testSetText()603 public void testSetText() { 604 String h = "<div id=1>Hello <p>there <b>now</b></p></div>"; 605 Document doc = Jsoup.parse(h); 606 assertEquals("Hello there now", doc.text()); // need to sort out node whitespace 607 assertEquals("there now", doc.select("p").get(0).text()); 608 609 Element div = doc.getElementById("1").text("Gone"); 610 assertEquals("Gone", div.text()); 611 assertEquals(0, doc.select("p").size()); 612 } 613 614 @Test testAddNewElement()615 public void testAddNewElement() { 616 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 617 Element div = doc.getElementById("1"); 618 div.appendElement("p").text("there"); 619 div.appendElement("P").attr("CLASS", "second").text("now"); 620 // manually specifying tag and attributes should maintain case based on parser settings 621 assertEquals("<html><head></head><body><div id=\"1\"><p>Hello</p><p>there</p><p class=\"second\">now</p></div></body></html>", 622 TextUtil.stripNewlines(doc.html())); 623 624 // check sibling index (with short circuit on reindexChildren): 625 Elements ps = doc.select("p"); 626 for (int i = 0; i < ps.size(); i++) { 627 assertEquals(i, ps.get(i).siblingIndex); 628 } 629 } 630 631 @Test testAddBooleanAttribute()632 public void testAddBooleanAttribute() { 633 Element div = new Element(Tag.valueOf("div"), ""); 634 635 div.attr("true", true); 636 637 div.attr("false", "value"); 638 div.attr("false", false); 639 640 assertTrue(div.hasAttr("true")); 641 assertEquals("", div.attr("true")); 642 643 List<Attribute> attributes = div.attributes().asList(); 644 assertEquals(1, attributes.size(), "There should be one attribute"); 645 assertFalse(div.hasAttr("false")); 646 647 assertEquals("<div true></div>", div.outerHtml()); 648 } 649 650 @Test testAppendRowToTable()651 public void testAppendRowToTable() { 652 Document doc = Jsoup.parse("<table><tr><td>1</td></tr></table>"); 653 Element table = doc.select("tbody").first(); 654 table.append("<tr><td>2</td></tr>"); 655 656 assertEquals("<table><tbody><tr><td>1</td></tr><tr><td>2</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html())); 657 } 658 659 @Test testPrependRowToTable()660 public void testPrependRowToTable() { 661 Document doc = Jsoup.parse("<table><tr><td>1</td></tr></table>"); 662 Element table = doc.select("tbody").first(); 663 table.prepend("<tr><td>2</td></tr>"); 664 665 assertEquals("<table><tbody><tr><td>2</td></tr><tr><td>1</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html())); 666 667 // check sibling index (reindexChildren): 668 Elements ps = doc.select("tr"); 669 for (int i = 0; i < ps.size(); i++) { 670 assertEquals(i, ps.get(i).siblingIndex); 671 } 672 } 673 674 @Test testPrependElement()675 public void testPrependElement() { 676 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 677 Element div = doc.getElementById("1"); 678 div.prependElement("p").text("Before"); 679 assertEquals("Before", div.child(0).text()); 680 assertEquals("Hello", div.child(1).text()); 681 } 682 683 @Test testAddNewText()684 public void testAddNewText() { 685 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 686 Element div = doc.getElementById("1"); 687 div.appendText(" there & now >"); 688 assertEquals ("Hello there & now >", div.text()); 689 assertEquals("<p>Hello</p> there & now >", TextUtil.stripNewlines(div.html())); 690 } 691 692 @Test testPrependText()693 public void testPrependText() { 694 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 695 Element div = doc.getElementById("1"); 696 div.prependText("there & now > "); 697 assertEquals("there & now > Hello", div.text()); 698 assertEquals("there & now > <p>Hello</p>", TextUtil.stripNewlines(div.html())); 699 } 700 701 @Test testThrowsOnAddNullText()702 public void testThrowsOnAddNullText() { 703 assertThrows(IllegalArgumentException.class, () -> { 704 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 705 Element div = doc.getElementById("1"); 706 div.appendText(null); 707 }); 708 } 709 710 @Test testThrowsOnPrependNullText()711 public void testThrowsOnPrependNullText() { 712 assertThrows(IllegalArgumentException.class, () -> { 713 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 714 Element div = doc.getElementById("1"); 715 div.prependText(null); 716 }); 717 } 718 719 @Test testAddNewHtml()720 public void testAddNewHtml() { 721 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 722 Element div = doc.getElementById("1"); 723 div.append("<p>there</p><p>now</p>"); 724 assertEquals("<p>Hello</p><p>there</p><p>now</p>", TextUtil.stripNewlines(div.html())); 725 726 // check sibling index (no reindexChildren): 727 Elements ps = doc.select("p"); 728 for (int i = 0; i < ps.size(); i++) { 729 assertEquals(i, ps.get(i).siblingIndex); 730 } 731 } 732 733 @Test testPrependNewHtml()734 public void testPrependNewHtml() { 735 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 736 Element div = doc.getElementById("1"); 737 div.prepend("<p>there</p><p>now</p>"); 738 assertEquals("<p>there</p><p>now</p><p>Hello</p>", TextUtil.stripNewlines(div.html())); 739 740 // check sibling index (reindexChildren): 741 Elements ps = doc.select("p"); 742 for (int i = 0; i < ps.size(); i++) { 743 assertEquals(i, ps.get(i).siblingIndex); 744 } 745 } 746 prependNodes()747 @Test void prependNodes() { 748 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 749 Element p = doc.expectFirst("p"); 750 p.prepend("Text <!-- comment --> "); 751 assertEquals("Text <!-- comment --> Hello", TextUtil.stripNewlines(p.html())); 752 } 753 appendNodes()754 @Test void appendNodes() { 755 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 756 Element p = doc.expectFirst("p"); 757 p.append(" Text <!-- comment -->"); 758 assertEquals("Hello Text <!-- comment -->", TextUtil.stripNewlines(p.html())); 759 } 760 761 @Test testSetHtml()762 public void testSetHtml() { 763 Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>"); 764 Element div = doc.getElementById("1"); 765 div.html("<p>there</p><p>now</p>"); 766 assertEquals("<p>there</p><p>now</p>", TextUtil.stripNewlines(div.html())); 767 } 768 769 @Test testSetHtmlTitle()770 public void testSetHtmlTitle() { 771 Document doc = Jsoup.parse("<html><head id=2><title id=1></title></head></html>"); 772 773 Element title = doc.getElementById("1"); 774 title.html("good"); 775 assertEquals("good", title.html()); 776 title.html("<i>bad</i>"); 777 assertEquals("<i>bad</i>", title.html()); 778 779 Element head = doc.getElementById("2"); 780 head.html("<title><i>bad</i></title>"); 781 assertEquals("<title><i>bad</i></title>", head.html()); 782 } 783 784 @Test testWrap()785 public void testWrap() { 786 Document doc = Jsoup.parse("<div><p>Hello</p><p>There</p></div>"); 787 Element p = doc.select("p").first(); 788 p.wrap("<div class='head'></div>"); 789 assertEquals("<div><div class=\"head\"><p>Hello</p></div><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); 790 791 Element ret = p.wrap("<div><div class=foo></div><p>What?</p></div>"); 792 assertEquals("<div><div class=\"head\"><div><div class=\"foo\"><p>Hello</p></div><p>What?</p></div></div><p>There</p></div>", 793 TextUtil.stripNewlines(doc.body().html())); 794 795 assertEquals(ret, p); 796 } 797 798 @Test testWrapNoop()799 public void testWrapNoop() { 800 Document doc = Jsoup.parse("<div><p>Hello</p></div>"); 801 Node p = doc.select("p").first(); 802 Node wrapped = p.wrap("Some junk"); 803 assertSame(p, wrapped); 804 assertEquals("<div><p>Hello</p></div>", TextUtil.stripNewlines(doc.body().html())); 805 // should be a NOOP 806 } 807 808 @Test testWrapOnOrphan()809 public void testWrapOnOrphan() { 810 Element orphan = new Element("span").text("Hello!"); 811 assertFalse(orphan.hasParent()); 812 Element wrapped = orphan.wrap("<div></div> There!"); 813 assertSame(orphan, wrapped); 814 assertTrue(orphan.hasParent()); // should now be in the DIV 815 assertNotNull(orphan.parent()); 816 assertEquals("div", orphan.parent().tagName()); 817 assertEquals("<div>\n <span>Hello!</span>\n</div>", orphan.parent().outerHtml()); 818 } 819 820 @Test testWrapArtificialStructure()821 public void testWrapArtificialStructure() { 822 // div normally couldn't get into a p, but explicitly want to wrap 823 Document doc = Jsoup.parse("<p>Hello <i>there</i> now."); 824 Element i = doc.selectFirst("i"); 825 i.wrap("<div id=id1></div> quite"); 826 assertEquals("div", i.parent().tagName()); 827 assertEquals("<p>Hello <div id=\"id1\"><i>there</i></div> quite now.</p>", TextUtil.stripNewlines(doc.body().html())); 828 } 829 830 @Test before()831 public void before() { 832 Document doc = Jsoup.parse("<div><p>Hello</p><p>There</p></div>"); 833 Element p1 = doc.select("p").first(); 834 p1.before("<div>one</div><div>two</div>"); 835 assertEquals("<div><div>one</div><div>two</div><p>Hello</p><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); 836 837 doc.select("p").last().before("<p>Three</p><!-- four -->"); 838 assertEquals("<div><div>one</div><div>two</div><p>Hello</p><p>Three</p><!-- four --><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); 839 } 840 841 @Test after()842 public void after() { 843 Document doc = Jsoup.parse("<div><p>Hello</p><p>There</p></div>"); 844 Element p1 = doc.select("p").first(); 845 p1.after("<div>one</div><div>two</div>"); 846 assertEquals("<div><p>Hello</p><div>one</div><div>two</div><p>There</p></div>", TextUtil.stripNewlines(doc.body().html())); 847 848 doc.select("p").last().after("<p>Three</p><!-- four -->"); 849 assertEquals("<div><p>Hello</p><div>one</div><div>two</div><p>There</p><p>Three</p><!-- four --></div>", TextUtil.stripNewlines(doc.body().html())); 850 } 851 852 @Test testWrapWithRemainder()853 public void testWrapWithRemainder() { 854 Document doc = Jsoup.parse("<div><p>Hello</p></div>"); 855 Element p = doc.select("p").first(); 856 p.wrap("<div class='head'></div><p>There!</p>"); 857 assertEquals("<div><div class=\"head\"><p>Hello</p></div><p>There!</p></div>", TextUtil.stripNewlines(doc.body().html())); 858 } 859 860 @Test testWrapWithSimpleRemainder()861 public void testWrapWithSimpleRemainder() { 862 Document doc = Jsoup.parse("<p>Hello"); 863 Element p = doc.selectFirst("p"); 864 Element body = p.parent(); 865 assertNotNull(body); 866 assertEquals("body", body.tagName()); 867 868 p.wrap("<div></div> There"); 869 Element div = p.parent(); 870 assertNotNull(div); 871 assertEquals("div", div.tagName()); 872 assertSame(div, p.parent()); 873 assertSame(body, div.parent()); 874 875 assertEquals("<div><p>Hello</p></div> There", TextUtil.stripNewlines(doc.body().html())); 876 } 877 878 @Test testHasText()879 public void testHasText() { 880 Document doc = Jsoup.parse("<div><p>Hello</p><p></p></div>"); 881 Element div = doc.select("div").first(); 882 Elements ps = doc.select("p"); 883 884 assertTrue(div.hasText()); 885 assertTrue(ps.first().hasText()); 886 assertFalse(ps.last().hasText()); 887 } 888 889 @Test dataset()890 public void dataset() { 891 Document doc = Jsoup.parse("<div id=1 data-name=jsoup class=new data-package=jar>Hello</div><p id=2>Hello</p>"); 892 Element div = doc.select("div").first(); 893 Map<String, String> dataset = div.dataset(); 894 Attributes attributes = div.attributes(); 895 896 // size, get, set, add, remove 897 assertEquals(2, dataset.size()); 898 assertEquals("jsoup", dataset.get("name")); 899 assertEquals("jar", dataset.get("package")); 900 901 dataset.put("name", "jsoup updated"); 902 dataset.put("language", "java"); 903 dataset.remove("package"); 904 905 assertEquals(2, dataset.size()); 906 assertEquals(4, attributes.size()); 907 assertEquals("jsoup updated", attributes.get("data-name")); 908 assertEquals("jsoup updated", dataset.get("name")); 909 assertEquals("java", attributes.get("data-language")); 910 assertEquals("java", dataset.get("language")); 911 912 attributes.put("data-food", "bacon"); 913 assertEquals(3, dataset.size()); 914 assertEquals("bacon", dataset.get("food")); 915 916 attributes.put("data-", "empty"); 917 assertNull(dataset.get("")); // data- is not a data attribute 918 919 Element p = doc.select("p").first(); 920 assertEquals(0, p.dataset().size()); 921 922 } 923 924 @Test parentlessToString()925 public void parentlessToString() { 926 Document doc = Jsoup.parse("<img src='foo'>"); 927 Element img = doc.select("img").first(); 928 assertEquals("<img src=\"foo\">", img.toString()); 929 930 img.remove(); // lost its parent 931 assertEquals("<img src=\"foo\">", img.toString()); 932 } 933 934 @Test orphanDivToString()935 public void orphanDivToString() { 936 Element orphan = new Element("div").id("foo").text("Hello"); 937 assertEquals("<div id=\"foo\">\n Hello\n</div>", orphan.toString()); 938 } 939 940 @Test testClone()941 public void testClone() { 942 Document doc = Jsoup.parse("<div><p>One<p><span>Two</div>"); 943 944 Element p = doc.select("p").get(1); 945 Element clone = p.clone(); 946 947 assertNotNull(clone.parentNode); // should be a cloned document just containing this clone 948 assertEquals(1, clone.parentNode.childNodeSize()); 949 assertSame(clone.ownerDocument(), clone.parentNode); 950 951 assertEquals(0, clone.siblingIndex); 952 assertEquals(1, p.siblingIndex); 953 assertNotNull(p.parent()); 954 955 clone.append("<span>Three"); 956 assertEquals("<p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(clone.outerHtml())); 957 assertEquals("<div><p>One</p><p><span>Two</span></p></div>", TextUtil.stripNewlines(doc.body().html())); // not modified 958 959 doc.body().appendChild(clone); // adopt 960 assertNotNull(clone.parent()); 961 assertEquals("<div><p>One</p><p><span>Two</span></p></div><p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(doc.body().html())); 962 } 963 964 @Test testClonesClassnames()965 public void testClonesClassnames() { 966 Document doc = Jsoup.parse("<div class='one two'></div>"); 967 Element div = doc.select("div").first(); 968 Set<String> classes = div.classNames(); 969 assertEquals(2, classes.size()); 970 assertTrue(classes.contains("one")); 971 assertTrue(classes.contains("two")); 972 973 Element copy = div.clone(); 974 Set<String> copyClasses = copy.classNames(); 975 assertEquals(2, copyClasses.size()); 976 assertTrue(copyClasses.contains("one")); 977 assertTrue(copyClasses.contains("two")); 978 copyClasses.add("three"); 979 copyClasses.remove("one"); 980 981 assertTrue(classes.contains("one")); 982 assertFalse(classes.contains("three")); 983 assertFalse(copyClasses.contains("one")); 984 assertTrue(copyClasses.contains("three")); 985 986 assertEquals("", div.html()); 987 assertEquals("", copy.html()); 988 } 989 990 @Test testShallowClone()991 public void testShallowClone() { 992 String base = "http://example.com/"; 993 Document doc = Jsoup.parse("<div id=1 class=one><p id=2 class=two>One", base); 994 Element d = doc.selectFirst("div"); 995 Element p = doc.selectFirst("p"); 996 TextNode t = p.textNodes().get(0); 997 998 Element d2 = d.shallowClone(); 999 Element p2 = p.shallowClone(); 1000 TextNode t2 = (TextNode) t.shallowClone(); 1001 1002 assertEquals(1, d.childNodeSize()); 1003 assertEquals(0, d2.childNodeSize()); 1004 1005 assertEquals(1, p.childNodeSize()); 1006 assertEquals(0, p2.childNodeSize()); 1007 1008 assertEquals("", p2.text()); 1009 assertEquals("One", t2.text()); 1010 1011 assertEquals("two", p2.className()); 1012 p2.removeClass("two"); 1013 assertEquals("two", p.className()); 1014 1015 d2.append("<p id=3>Three"); 1016 assertEquals(1, d2.childNodeSize()); 1017 assertEquals("Three", d2.text()); 1018 assertEquals("One", d.text()); 1019 assertEquals(base, d2.baseUri()); 1020 } 1021 1022 @Test testTagNameSet()1023 public void testTagNameSet() { 1024 Document doc = Jsoup.parse("<div><i>Hello</i>"); 1025 doc.select("i").first().tagName("em"); 1026 assertEquals(0, doc.select("i").size()); 1027 assertEquals(1, doc.select("em").size()); 1028 assertEquals("<em>Hello</em>", doc.select("div").first().html()); 1029 } 1030 1031 @Test testHtmlContainsOuter()1032 public void testHtmlContainsOuter() { 1033 Document doc = Jsoup.parse("<title>Check</title> <div>Hello there</div>"); 1034 doc.outputSettings().indentAmount(0); 1035 assertTrue(doc.html().contains(doc.select("title").outerHtml())); 1036 assertTrue(doc.html().contains(doc.select("div").outerHtml())); 1037 } 1038 1039 @Test testGetTextNodes()1040 public void testGetTextNodes() { 1041 Document doc = Jsoup.parse("<p>One <span>Two</span> Three <br> Four</p>"); 1042 List<TextNode> textNodes = doc.select("p").first().textNodes(); 1043 1044 assertEquals(3, textNodes.size()); 1045 assertEquals("One ", textNodes.get(0).text()); 1046 assertEquals(" Three ", textNodes.get(1).text()); 1047 assertEquals(" Four", textNodes.get(2).text()); 1048 1049 assertEquals(0, doc.select("br").first().textNodes().size()); 1050 } 1051 1052 @Test testManipulateTextNodes()1053 public void testManipulateTextNodes() { 1054 Document doc = Jsoup.parse("<p>One <span>Two</span> Three <br> Four</p>"); 1055 Element p = doc.select("p").first(); 1056 List<TextNode> textNodes = p.textNodes(); 1057 1058 textNodes.get(1).text(" three-more "); 1059 textNodes.get(2).splitText(3).text("-ur"); 1060 1061 assertEquals("One Two three-more Fo-ur", p.text()); 1062 assertEquals("One three-more Fo-ur", p.ownText()); 1063 assertEquals(4, p.textNodes().size()); // grew because of split 1064 } 1065 1066 @Test testGetDataNodes()1067 public void testGetDataNodes() { 1068 Document doc = Jsoup.parse("<script>One Two</script> <style>Three Four</style> <p>Fix Six</p>"); 1069 Element script = doc.select("script").first(); 1070 Element style = doc.select("style").first(); 1071 Element p = doc.select("p").first(); 1072 1073 List<DataNode> scriptData = script.dataNodes(); 1074 assertEquals(1, scriptData.size()); 1075 assertEquals("One Two", scriptData.get(0).getWholeData()); 1076 1077 List<DataNode> styleData = style.dataNodes(); 1078 assertEquals(1, styleData.size()); 1079 assertEquals("Three Four", styleData.get(0).getWholeData()); 1080 1081 List<DataNode> pData = p.dataNodes(); 1082 assertEquals(0, pData.size()); 1083 } 1084 1085 @Test elementIsNotASiblingOfItself()1086 public void elementIsNotASiblingOfItself() { 1087 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three</div>"); 1088 Element p2 = doc.select("p").get(1); 1089 1090 assertEquals("Two", p2.text()); 1091 Elements els = p2.siblingElements(); 1092 assertEquals(2, els.size()); 1093 assertEquals("<p>One</p>", els.get(0).outerHtml()); 1094 assertEquals("<p>Three</p>", els.get(1).outerHtml()); 1095 } 1096 1097 @Test testChildThrowsIndexOutOfBoundsOnMissing()1098 public void testChildThrowsIndexOutOfBoundsOnMissing() { 1099 Document doc = Jsoup.parse("<div><p>One</p><p>Two</p></div>"); 1100 Element div = doc.select("div").first(); 1101 1102 assertEquals(2, div.children().size()); 1103 assertEquals("One", div.child(0).text()); 1104 1105 try { 1106 div.child(3); 1107 fail("Should throw index out of bounds"); 1108 } catch (IndexOutOfBoundsException e) { 1109 } 1110 } 1111 1112 @Test moveByAppend()1113 public void moveByAppend() { 1114 // test for https://github.com/jhy/jsoup/issues/239 1115 // can empty an element and append its children to another element 1116 Document doc = Jsoup.parse("<div id=1>Text <p>One</p> Text <p>Two</p></div><div id=2></div>"); 1117 Element div1 = doc.select("div").get(0); 1118 Element div2 = doc.select("div").get(1); 1119 1120 assertEquals(4, div1.childNodeSize()); 1121 List<Node> children = div1.childNodes(); 1122 assertEquals(4, children.size()); 1123 1124 div2.insertChildren(0, children); 1125 1126 assertEquals(4, children.size()); // children is NOT backed by div1.childNodes but a wrapper, so should still be 4 (but re-parented) 1127 assertEquals(0, div1.childNodeSize()); 1128 assertEquals(4, div2.childNodeSize()); 1129 assertEquals("<div id=\"1\"></div>\n<div id=\"2\">\n Text \n <p>One</p> Text \n <p>Two</p>\n</div>", 1130 doc.body().html()); 1131 } 1132 1133 @Test insertChildrenArgumentValidation()1134 public void insertChildrenArgumentValidation() { 1135 Document doc = Jsoup.parse("<div id=1>Text <p>One</p> Text <p>Two</p></div><div id=2></div>"); 1136 Element div1 = doc.select("div").get(0); 1137 Element div2 = doc.select("div").get(1); 1138 List<Node> children = div1.childNodes(); 1139 1140 try { 1141 div2.insertChildren(6, children); 1142 fail(); 1143 } catch (IllegalArgumentException e) { 1144 } 1145 1146 try { 1147 div2.insertChildren(-5, children); 1148 fail(); 1149 } catch (IllegalArgumentException e) { 1150 } 1151 1152 try { 1153 div2.insertChildren(0, (Collection<? extends Node>) null); 1154 fail(); 1155 } catch (IllegalArgumentException e) { 1156 } 1157 } 1158 1159 @Test insertChildrenAtPosition()1160 public void insertChildrenAtPosition() { 1161 Document doc = Jsoup.parse("<div id=1>Text1 <p>One</p> Text2 <p>Two</p></div><div id=2>Text3 <p>Three</p></div>"); 1162 Element div1 = doc.select("div").get(0); 1163 Elements p1s = div1.select("p"); 1164 Element div2 = doc.select("div").get(1); 1165 1166 assertEquals(2, div2.childNodeSize()); 1167 div2.insertChildren(-1, p1s); 1168 assertEquals(2, div1.childNodeSize()); // moved two out 1169 assertEquals(4, div2.childNodeSize()); 1170 assertEquals(3, p1s.get(1).siblingIndex()); // should be last 1171 1172 List<Node> els = new ArrayList<>(); 1173 Element el1 = new Element(Tag.valueOf("span"), "").text("Span1"); 1174 Element el2 = new Element(Tag.valueOf("span"), "").text("Span2"); 1175 TextNode tn1 = new TextNode("Text4"); 1176 els.add(el1); 1177 els.add(el2); 1178 els.add(tn1); 1179 1180 assertNull(el1.parent()); 1181 div2.insertChildren(-2, els); 1182 assertEquals(div2, el1.parent()); 1183 assertEquals(7, div2.childNodeSize()); 1184 assertEquals(3, el1.siblingIndex()); 1185 assertEquals(4, el2.siblingIndex()); 1186 assertEquals(5, tn1.siblingIndex()); 1187 } 1188 1189 @Test insertChildrenAsCopy()1190 public void insertChildrenAsCopy() { 1191 Document doc = Jsoup.parse("<div id=1>Text <p>One</p> Text <p>Two</p></div><div id=2></div>"); 1192 Element div1 = doc.select("div").get(0); 1193 Element div2 = doc.select("div").get(1); 1194 Elements ps = doc.select("p").clone(); 1195 ps.first().text("One cloned"); 1196 div2.insertChildren(-1, ps); 1197 1198 assertEquals(4, div1.childNodeSize()); // not moved -- cloned 1199 assertEquals(2, div2.childNodeSize()); 1200 assertEquals("<div id=\"1\">Text <p>One</p> Text <p>Two</p></div><div id=\"2\"><p>One cloned</p><p>Two</p></div>", 1201 TextUtil.stripNewlines(doc.body().html())); 1202 } 1203 1204 @Test testCssPath()1205 public void testCssPath() { 1206 Document doc = Jsoup.parse("<div id=\"id1\">A</div><div>B</div><div class=\"c1 c2\">C</div>"); 1207 Element divA = doc.select("div").get(0); 1208 Element divB = doc.select("div").get(1); 1209 Element divC = doc.select("div").get(2); 1210 assertEquals(divA.cssSelector(), "#id1"); 1211 assertEquals(divB.cssSelector(), "html > body > div:nth-child(2)"); 1212 assertEquals(divC.cssSelector(), "html > body > div.c1.c2"); 1213 1214 assertSame(divA, doc.select(divA.cssSelector()).first()); 1215 assertSame(divB, doc.select(divB.cssSelector()).first()); 1216 assertSame(divC, doc.select(divC.cssSelector()).first()); 1217 } 1218 1219 @Test testCssPathDuplicateIds()1220 public void testCssPathDuplicateIds() { 1221 // https://github.com/jhy/jsoup/issues/1147 - multiple elements with same ID, use the non-ID form 1222 Document doc = Jsoup.parse("<article><div id=dupe>A</div><div id=dupe>B</div><div id=dupe class=c1>"); 1223 Element divA = doc.select("div").get(0); 1224 Element divB = doc.select("div").get(1); 1225 Element divC = doc.select("div").get(2); 1226 1227 assertEquals(divA.cssSelector(), "html > body > article > div:nth-child(1)"); 1228 assertEquals(divB.cssSelector(), "html > body > article > div:nth-child(2)"); 1229 assertEquals(divC.cssSelector(), "html > body > article > div.c1"); 1230 1231 assertSame(divA, doc.select(divA.cssSelector()).first()); 1232 assertSame(divB, doc.select(divB.cssSelector()).first()); 1233 assertSame(divC, doc.select(divC.cssSelector()).first()); 1234 } 1235 cssSelectorEscaped()1236 @Test public void cssSelectorEscaped() { 1237 // https://github.com/jhy/jsoup/issues/1742 1238 Document doc = Jsoup.parse("<p\\p>One</p\\p> <p id='one.two'>Two</p> <p class='one.two:three/four'>Three</p>"); 1239 Element one = doc.expectFirst("p\\\\p"); 1240 Elements ps = doc.select("p"); 1241 Element two = ps.get(0); 1242 Element three = ps.get(1); 1243 1244 String oneSelect = one.cssSelector(); 1245 assertEquals("html > body > p\\\\p", oneSelect); 1246 assertEquals(one, doc.expectFirst(oneSelect)); 1247 1248 String twoSelect = two.cssSelector(); 1249 assertEquals("#one\\.two", twoSelect); 1250 assertEquals(two, doc.expectFirst(twoSelect)); 1251 1252 String threeSelect = three.cssSelector(); 1253 assertEquals("html > body > p.one\\.two\\:three\\/four", threeSelect); 1254 assertEquals(three, doc.expectFirst(threeSelect)); 1255 } 1256 cssEscapedAmp()1257 @Test public void cssEscapedAmp() { 1258 Document doc = Jsoup.parse("<p class='\\&'>One</p>"); 1259 Element one = doc.expectFirst(".\\\\\\&"); // tested matches js querySelector 1260 assertEquals("One", one.text()); 1261 1262 String q = one.cssSelector(); 1263 assertEquals("html > body > p.\\\\\\&", q); 1264 assertEquals(one, doc.expectFirst(q)); 1265 } 1266 cssSelectorEscapedClass()1267 @Test public void cssSelectorEscapedClass() { 1268 // example in https://github.com/jhy/jsoup/issues/838 1269 String html = "<div class='B\\&W\\?'><div class=test>Text</div></div>"; 1270 Document parse = Jsoup.parse(html); 1271 Element el = parse.expectFirst(".test"); 1272 assertEquals("Text", el.text()); 1273 1274 String q = el.cssSelector(); 1275 assertEquals("html > body > div.B\\\\\\&W\\\\\\? > div.test", q); 1276 Element found = parse.expectFirst(q); 1277 assertEquals(found, el); 1278 } 1279 1280 @Test testClassNames()1281 public void testClassNames() { 1282 Document doc = Jsoup.parse("<div class=\"c1 c2\">C</div>"); 1283 Element div = doc.select("div").get(0); 1284 1285 assertEquals("c1 c2", div.className()); 1286 1287 final Set<String> set1 = div.classNames(); 1288 final Object[] arr1 = set1.toArray(); 1289 assertEquals(2, arr1.length); 1290 assertEquals("c1", arr1[0]); 1291 assertEquals("c2", arr1[1]); 1292 1293 // Changes to the set should not be reflected in the Elements getters 1294 set1.add("c3"); 1295 assertEquals(2, div.classNames().size()); 1296 assertEquals("c1 c2", div.className()); 1297 1298 // Update the class names to a fresh set 1299 final Set<String> newSet = new LinkedHashSet<>(3); 1300 newSet.addAll(set1); 1301 newSet.add("c3"); 1302 1303 div.classNames(newSet); 1304 1305 assertEquals("c1 c2 c3", div.className()); 1306 1307 final Set<String> set2 = div.classNames(); 1308 final Object[] arr2 = set2.toArray(); 1309 assertEquals(3, arr2.length); 1310 assertEquals("c1", arr2[0]); 1311 assertEquals("c2", arr2[1]); 1312 assertEquals("c3", arr2[2]); 1313 } 1314 1315 @Test testHashAndEqualsAndValue()1316 public void testHashAndEqualsAndValue() { 1317 // .equals and hashcode are identity. value is content. 1318 1319 String doc1 = "<div id=1><p class=one>One</p><p class=one>One</p><p class=one>Two</p><p class=two>One</p></div>" + 1320 "<div id=2><p class=one>One</p><p class=one>One</p><p class=one>Two</p><p class=two>One</p></div>"; 1321 1322 Document doc = Jsoup.parse(doc1); 1323 Elements els = doc.select("p"); 1324 1325 /* 1326 for (Element el : els) { 1327 System.out.println(el.hashCode() + " - " + el.outerHtml()); 1328 } 1329 1330 0 1534787905 - <p class="one">One</p> 1331 1 1534787905 - <p class="one">One</p> 1332 2 1539683239 - <p class="one">Two</p> 1333 3 1535455211 - <p class="two">One</p> 1334 4 1534787905 - <p class="one">One</p> 1335 5 1534787905 - <p class="one">One</p> 1336 6 1539683239 - <p class="one">Two</p> 1337 7 1535455211 - <p class="two">One</p> 1338 */ 1339 assertEquals(8, els.size()); 1340 Element e0 = els.get(0); 1341 Element e1 = els.get(1); 1342 Element e2 = els.get(2); 1343 Element e3 = els.get(3); 1344 Element e4 = els.get(4); 1345 Element e5 = els.get(5); 1346 Element e6 = els.get(6); 1347 Element e7 = els.get(7); 1348 1349 assertEquals(e0, e0); 1350 assertTrue(e0.hasSameValue(e1)); 1351 assertTrue(e0.hasSameValue(e4)); 1352 assertTrue(e0.hasSameValue(e5)); 1353 assertNotEquals(e0, e2); 1354 assertFalse(e0.hasSameValue(e2)); 1355 assertFalse(e0.hasSameValue(e3)); 1356 assertFalse(e0.hasSameValue(e6)); 1357 assertFalse(e0.hasSameValue(e7)); 1358 1359 assertEquals(e0.hashCode(), e0.hashCode()); 1360 assertNotEquals(e0.hashCode(), (e2.hashCode())); 1361 assertNotEquals(e0.hashCode(), (e3).hashCode()); 1362 assertNotEquals(e0.hashCode(), (e6).hashCode()); 1363 assertNotEquals(e0.hashCode(), (e7).hashCode()); 1364 } 1365 1366 @Test testRelativeUrls()1367 public void testRelativeUrls() { 1368 String html = "<body><a href='./one.html'>One</a> <a href='two.html'>two</a> <a href='../three.html'>Three</a> <a href='//example2.com/four/'>Four</a> <a href='https://example2.com/five/'>Five</a> <a>Six</a> <a href=''>Seven</a>"; 1369 Document doc = Jsoup.parse(html, "http://example.com/bar/"); 1370 Elements els = doc.select("a"); 1371 1372 assertEquals("http://example.com/bar/one.html", els.get(0).absUrl("href")); 1373 assertEquals("http://example.com/bar/two.html", els.get(1).absUrl("href")); 1374 assertEquals("http://example.com/three.html", els.get(2).absUrl("href")); 1375 assertEquals("http://example2.com/four/", els.get(3).absUrl("href")); 1376 assertEquals("https://example2.com/five/", els.get(4).absUrl("href")); 1377 assertEquals("", els.get(5).absUrl("href")); 1378 assertEquals("http://example.com/bar/", els.get(6).absUrl("href")); 1379 } 1380 1381 @Test testRelativeIdnUrls()1382 public void testRelativeIdnUrls() { 1383 String idn = "https://www.测试.测试/"; 1384 String idnFoo = idn + "foo.html?bar"; 1385 1386 Document doc = Jsoup.parse("<a href=''>One</a><a href='/bar.html?qux'>Two</a>", idnFoo); 1387 Elements els = doc.select("a"); 1388 Element one = els.get(0); 1389 Element two = els.get(1); 1390 String hrefOne = one.absUrl("href"); 1391 String hrefTwo = two.absUrl("href"); 1392 assertEquals(idnFoo, hrefOne); 1393 assertEquals("https://www.测试.测试/bar.html?qux", hrefTwo); 1394 } 1395 1396 @Test appendMustCorrectlyMoveChildrenInsideOneParentElement()1397 public void appendMustCorrectlyMoveChildrenInsideOneParentElement() { 1398 Document doc = new Document(""); 1399 Element body = doc.appendElement("body"); 1400 body.appendElement("div1"); 1401 body.appendElement("div2"); 1402 final Element div3 = body.appendElement("div3"); 1403 div3.text("Check"); 1404 final Element div4 = body.appendElement("div4"); 1405 1406 ArrayList<Element> toMove = new ArrayList<>(); 1407 toMove.add(div3); 1408 toMove.add(div4); 1409 1410 body.insertChildren(0, toMove); 1411 1412 String result = doc.toString().replaceAll("\\s+", ""); 1413 assertEquals("<body><div3>Check</div3><div4></div4><div1></div1><div2></div2></body>", result); 1414 } 1415 1416 @Test testHashcodeIsStableWithContentChanges()1417 public void testHashcodeIsStableWithContentChanges() { 1418 Element root = new Element(Tag.valueOf("root"), ""); 1419 1420 HashSet<Element> set = new HashSet<>(); 1421 // Add root node: 1422 set.add(root); 1423 1424 root.appendChild(new Element(Tag.valueOf("a"), "")); 1425 assertTrue(set.contains(root)); 1426 } 1427 1428 @Test testNamespacedElements()1429 public void testNamespacedElements() { 1430 // Namespaces with ns:tag in HTML must be translated to ns|tag in CSS. 1431 String html = "<html><body><fb:comments /></body></html>"; 1432 Document doc = Jsoup.parse(html, "http://example.com/bar/"); 1433 Elements els = doc.select("fb|comments"); 1434 assertEquals(1, els.size()); 1435 assertEquals("html > body > fb|comments", els.get(0).cssSelector()); 1436 } 1437 1438 @Test testChainedRemoveAttributes()1439 public void testChainedRemoveAttributes() { 1440 String html = "<a one two three four>Text</a>"; 1441 Document doc = Jsoup.parse(html); 1442 Element a = doc.select("a").first(); 1443 a 1444 .removeAttr("zero") 1445 .removeAttr("one") 1446 .removeAttr("two") 1447 .removeAttr("three") 1448 .removeAttr("four") 1449 .removeAttr("five"); 1450 assertEquals("<a>Text</a>", a.outerHtml()); 1451 } 1452 1453 @Test testLoopedRemoveAttributes()1454 public void testLoopedRemoveAttributes() { 1455 String html = "<a one two three four>Text</a><p foo>Two</p>"; 1456 Document doc = Jsoup.parse(html); 1457 for (Element el : doc.getAllElements()) { 1458 el.clearAttributes(); 1459 } 1460 1461 assertEquals("<a>Text</a>\n<p>Two</p>", doc.body().html()); 1462 } 1463 1464 @Test testIs()1465 public void testIs() { 1466 String html = "<div><p>One <a class=big>Two</a> Three</p><p>Another</p>"; 1467 Document doc = Jsoup.parse(html); 1468 Element p = doc.select("p").first(); 1469 1470 assertTrue(p.is("p")); 1471 assertFalse(p.is("div")); 1472 assertTrue(p.is("p:has(a)")); 1473 assertFalse(p.is("a")); // does not descend 1474 assertTrue(p.is("p:first-child")); 1475 assertFalse(p.is("p:last-child")); 1476 assertTrue(p.is("*")); 1477 assertTrue(p.is("div p")); 1478 1479 Element q = doc.select("p").last(); 1480 assertTrue(q.is("p")); 1481 assertTrue(q.is("p ~ p")); 1482 assertTrue(q.is("p + p")); 1483 assertTrue(q.is("p:last-child")); 1484 assertFalse(q.is("p a")); 1485 assertFalse(q.is("a")); 1486 } 1487 1488 @Test testEvalMethods()1489 public void testEvalMethods() { 1490 Document doc = Jsoup.parse("<div><p>One <a class=big>Two</a> Three</p><p>Another</p>"); 1491 Element p = doc.selectFirst(QueryParser.parse(("p"))); 1492 assertEquals("One Three", p.ownText()); 1493 1494 assertTrue(p.is(QueryParser.parse("p"))); 1495 Evaluator aEval = QueryParser.parse("a"); 1496 assertFalse(p.is(aEval)); 1497 1498 Element a = p.selectFirst(aEval); 1499 assertEquals("div", a.closest(QueryParser.parse("div:has( > p)")).tagName()); 1500 Element body = p.closest(QueryParser.parse("body")); 1501 assertEquals("body", body.nodeName()); 1502 } 1503 1504 @Test testClosest()1505 public void testClosest() { 1506 String html = "<article>\n" + 1507 " <div id=div-01>Here is div-01\n" + 1508 " <div id=div-02>Here is div-02\n" + 1509 " <div id=div-03>Here is div-03</div>\n" + 1510 " </div>\n" + 1511 " </div>\n" + 1512 "</article>"; 1513 1514 Document doc = Jsoup.parse(html); 1515 Element el = doc.selectFirst("#div-03"); 1516 assertEquals("Here is div-03", el.text()); 1517 assertEquals("div-03", el.id()); 1518 1519 assertEquals("div-02", el.closest("#div-02").id()); 1520 assertEquals(el, el.closest("div div")); // closest div in a div is itself 1521 assertEquals("div-01", el.closest("article > div").id()); 1522 assertEquals("article", el.closest(":not(div)").tagName()); 1523 assertNull(el.closest("p")); 1524 } 1525 1526 @Test elementByTagName()1527 public void elementByTagName() { 1528 Element a = new Element("P"); 1529 assertEquals("P", a.tagName()); 1530 } 1531 1532 @Test testChildrenElements()1533 public void testChildrenElements() { 1534 String html = "<div><p><a>One</a></p><p><a>Two</a></p>Three</div><span>Four</span><foo></foo><img>"; 1535 Document doc = Jsoup.parse(html); 1536 Element div = doc.select("div").first(); 1537 Element p = doc.select("p").first(); 1538 Element span = doc.select("span").first(); 1539 Element foo = doc.select("foo").first(); 1540 Element img = doc.select("img").first(); 1541 1542 Elements docChildren = div.children(); 1543 assertEquals(2, docChildren.size()); 1544 assertEquals("<p><a>One</a></p>", docChildren.get(0).outerHtml()); 1545 assertEquals("<p><a>Two</a></p>", docChildren.get(1).outerHtml()); 1546 assertEquals(3, div.childNodes().size()); 1547 assertEquals("Three", div.childNodes().get(2).outerHtml()); 1548 1549 assertEquals(1, p.children().size()); 1550 assertEquals("One", p.children().text()); 1551 1552 assertEquals(0, span.children().size()); 1553 assertEquals(1, span.childNodes().size()); 1554 assertEquals("Four", span.childNodes().get(0).outerHtml()); 1555 1556 assertEquals(0, foo.children().size()); 1557 assertEquals(0, foo.childNodes().size()); 1558 assertEquals(0, img.children().size()); 1559 assertEquals(0, img.childNodes().size()); 1560 } 1561 1562 @Test testShadowElementsAreUpdated()1563 public void testShadowElementsAreUpdated() { 1564 String html = "<div><p><a>One</a></p><p><a>Two</a></p>Three</div><span>Four</span><foo></foo><img>"; 1565 Document doc = Jsoup.parse(html); 1566 Element div = doc.select("div").first(); 1567 Elements els = div.children(); 1568 List<Node> nodes = div.childNodes(); 1569 1570 assertEquals(2, els.size()); // the two Ps 1571 assertEquals(3, nodes.size()); // the "Three" textnode 1572 1573 Element p3 = new Element("p").text("P3"); 1574 Element p4 = new Element("p").text("P4"); 1575 div.insertChildren(1, p3); 1576 div.insertChildren(3, p4); 1577 Elements els2 = div.children(); 1578 1579 // first els should not have changed 1580 assertEquals(2, els.size()); 1581 assertEquals(4, els2.size()); 1582 1583 assertEquals("<p><a>One</a></p>\n" + 1584 "<p>P3</p>\n" + 1585 "<p><a>Two</a></p>\n" + 1586 "<p>P4</p>Three", div.html()); 1587 assertEquals("P3", els2.get(1).text()); 1588 assertEquals("P4", els2.get(3).text()); 1589 1590 p3.after("<span>Another</span"); 1591 1592 Elements els3 = div.children(); 1593 assertEquals(5, els3.size()); 1594 assertEquals("span", els3.get(2).tagName()); 1595 assertEquals("Another", els3.get(2).text()); 1596 1597 assertEquals("<p><a>One</a></p>\n" + 1598 "<p>P3</p><span>Another</span>\n" + 1599 "<p><a>Two</a></p>\n" + 1600 "<p>P4</p>Three", div.html()); 1601 } 1602 1603 @Test classNamesAndAttributeNameIsCaseInsensitive()1604 public void classNamesAndAttributeNameIsCaseInsensitive() { 1605 String html = "<p Class='SomeText AnotherText'>One</p>"; 1606 Document doc = Jsoup.parse(html); 1607 Element p = doc.select("p").first(); 1608 assertEquals("SomeText AnotherText", p.className()); 1609 assertTrue(p.classNames().contains("SomeText")); 1610 assertTrue(p.classNames().contains("AnotherText")); 1611 assertTrue(p.hasClass("SomeText")); 1612 assertTrue(p.hasClass("sometext")); 1613 assertTrue(p.hasClass("AnotherText")); 1614 assertTrue(p.hasClass("anothertext")); 1615 1616 Element p1 = doc.select(".SomeText").first(); 1617 Element p2 = doc.select(".sometext").first(); 1618 Element p3 = doc.select("[class=SomeText AnotherText]").first(); 1619 Element p4 = doc.select("[Class=SomeText AnotherText]").first(); 1620 Element p5 = doc.select("[class=sometext anothertext]").first(); 1621 Element p6 = doc.select("[class=SomeText AnotherText]").first(); 1622 Element p7 = doc.select("[class^=sometext]").first(); 1623 Element p8 = doc.select("[class$=nothertext]").first(); 1624 Element p9 = doc.select("[class^=sometext]").first(); 1625 Element p10 = doc.select("[class$=AnotherText]").first(); 1626 1627 assertEquals("One", p1.text()); 1628 assertEquals(p1, p2); 1629 assertEquals(p1, p3); 1630 assertEquals(p1, p4); 1631 assertEquals(p1, p5); 1632 assertEquals(p1, p6); 1633 assertEquals(p1, p7); 1634 assertEquals(p1, p8); 1635 assertEquals(p1, p9); 1636 assertEquals(p1, p10); 1637 } 1638 1639 @Test testAppendTo()1640 public void testAppendTo() { 1641 String parentHtml = "<div class='a'></div>"; 1642 String childHtml = "<div class='b'></div><p>Two</p>"; 1643 1644 Document parentDoc = Jsoup.parse(parentHtml); 1645 Element parent = parentDoc.body(); 1646 Document childDoc = Jsoup.parse(childHtml); 1647 1648 Element div = childDoc.select("div").first(); 1649 Element p = childDoc.select("p").first(); 1650 Element appendTo1 = div.appendTo(parent); 1651 assertEquals(div, appendTo1); 1652 1653 Element appendTo2 = p.appendTo(div); 1654 assertEquals(p, appendTo2); 1655 1656 assertEquals("<div class=\"a\"></div>\n<div class=\"b\">\n <p>Two</p>\n</div>", parentDoc.body().html()); 1657 assertEquals("", childDoc.body().html()); // got moved out 1658 } 1659 1660 @Test testNormalizesNbspInText()1661 public void testNormalizesNbspInText() { 1662 String escaped = "You can't always get what you want."; 1663 String withNbsp = "You can't always get what you want."; // there is an nbsp char in there 1664 Document doc = Jsoup.parse("<p>" + escaped); 1665 Element p = doc.select("p").first(); 1666 assertEquals("You can't always get what you want.", p.text()); // text is normalized 1667 1668 assertEquals("<p>" + escaped + "</p>", p.outerHtml()); // html / whole text keeps 1669 assertEquals(withNbsp, p.textNodes().get(0).getWholeText()); 1670 assertEquals(160, withNbsp.charAt(29)); 1671 1672 Element matched = doc.select("p:contains(get what you want)").first(); 1673 assertEquals("p", matched.nodeName()); 1674 assertTrue(matched.is(":containsOwn(get what you want)")); 1675 } 1676 1677 @Test testNormalizesInvisiblesInText()1678 public void testNormalizesInvisiblesInText() { 1679 String escaped = "This­is​one­long­word"; 1680 String decoded = "This\u00ADis\u200Bone\u00ADlong\u00ADword"; // browser would not display those soft hyphens / other chars, so we don't want them in the text 1681 1682 Document doc = Jsoup.parse("<p>" + escaped); 1683 Element p = doc.select("p").first(); 1684 doc.outputSettings().charset("ascii"); // so that the outer html is easier to see with escaped invisibles 1685 assertEquals("Thisisonelongword", p.text()); // text is normalized 1686 assertEquals("<p>" + escaped + "</p>", p.outerHtml()); // html / whole text keeps ­ etc; 1687 assertEquals(decoded, p.textNodes().get(0).getWholeText()); 1688 1689 Element matched = doc.select("p:contains(Thisisonelongword)").first(); // really just oneloneword, no invisibles 1690 assertEquals("p", matched.nodeName()); 1691 assertTrue(matched.is(":containsOwn(Thisisonelongword)")); 1692 1693 } 1694 1695 @Test testRemoveBeforeIndex()1696 public void testRemoveBeforeIndex() { 1697 Document doc = Jsoup.parse( 1698 "<html><body><div><p>before1</p><p>before2</p><p>XXX</p><p>after1</p><p>after2</p></div></body></html>", 1699 ""); 1700 Element body = doc.select("body").first(); 1701 Elements elems = body.select("p:matchesOwn(XXX)"); 1702 Element xElem = elems.first(); 1703 Elements beforeX = xElem.parent().getElementsByIndexLessThan(xElem.elementSiblingIndex()); 1704 1705 for (Element p : beforeX) { 1706 p.remove(); 1707 } 1708 1709 assertEquals("<body><div><p>XXX</p><p>after1</p><p>after2</p></div></body>", TextUtil.stripNewlines(body.outerHtml())); 1710 } 1711 1712 @Test testRemoveAfterIndex()1713 public void testRemoveAfterIndex() { 1714 Document doc2 = Jsoup.parse( 1715 "<html><body><div><p>before1</p><p>before2</p><p>XXX</p><p>after1</p><p>after2</p></div></body></html>", 1716 ""); 1717 Element body = doc2.select("body").first(); 1718 Elements elems = body.select("p:matchesOwn(XXX)"); 1719 Element xElem = elems.first(); 1720 Elements afterX = xElem.parent().getElementsByIndexGreaterThan(xElem.elementSiblingIndex()); 1721 1722 for (Element p : afterX) { 1723 p.remove(); 1724 } 1725 1726 assertEquals("<body><div><p>before1</p><p>before2</p><p>XXX</p></div></body>", TextUtil.stripNewlines(body.outerHtml())); 1727 } 1728 1729 @Test whiteSpaceClassElement()1730 public void whiteSpaceClassElement() { 1731 Tag tag = Tag.valueOf("a"); 1732 Attributes attribs = new Attributes(); 1733 Element el = new Element(tag, "", attribs); 1734 1735 attribs.put("class", "abc "); 1736 boolean hasClass = el.hasClass("ab"); 1737 assertFalse(hasClass); 1738 } 1739 1740 @Test testNextElementSiblingAfterClone()1741 public void testNextElementSiblingAfterClone() { 1742 // via https://github.com/jhy/jsoup/issues/951 1743 String html = "<!DOCTYPE html><html lang=\"en\"><head></head><body><div>Initial element</div></body></html>"; 1744 String expectedText = "New element"; 1745 String cloneExpect = "New element in clone"; 1746 1747 Document original = Jsoup.parse(html); 1748 Document clone = original.clone(); 1749 1750 Element originalElement = original.body().child(0); 1751 originalElement.after("<div>" + expectedText + "</div>"); 1752 Element originalNextElementSibling = originalElement.nextElementSibling(); 1753 Element originalNextSibling = (Element) originalElement.nextSibling(); 1754 assertEquals(expectedText, originalNextElementSibling.text()); 1755 assertEquals(expectedText, originalNextSibling.text()); 1756 1757 Element cloneElement = clone.body().child(0); 1758 cloneElement.after("<div>" + cloneExpect + "</div>"); 1759 Element cloneNextElementSibling = cloneElement.nextElementSibling(); 1760 Element cloneNextSibling = (Element) cloneElement.nextSibling(); 1761 assertEquals(cloneExpect, cloneNextElementSibling.text()); 1762 assertEquals(cloneExpect, cloneNextSibling.text()); 1763 } 1764 1765 @Test testRemovingEmptyClassAttributeWhenLastClassRemoved()1766 public void testRemovingEmptyClassAttributeWhenLastClassRemoved() { 1767 // https://github.com/jhy/jsoup/issues/947 1768 Document doc = Jsoup.parse("<img class=\"one two\" />"); 1769 Element img = doc.select("img").first(); 1770 img.removeClass("one"); 1771 img.removeClass("two"); 1772 assertFalse(doc.body().html().contains("class=\"\"")); 1773 } 1774 1775 @Test booleanAttributeOutput()1776 public void booleanAttributeOutput() { 1777 Document doc = Jsoup.parse("<img src=foo noshade='' nohref async=async autofocus=false>"); 1778 Element img = doc.selectFirst("img"); 1779 1780 assertEquals("<img src=\"foo\" noshade nohref async autofocus=\"false\">", img.outerHtml()); 1781 } 1782 1783 @Test textHasSpaceAfterBlockTags()1784 public void textHasSpaceAfterBlockTags() { 1785 Document doc = Jsoup.parse("<div>One</div>Two"); 1786 assertEquals("One Two", doc.text()); 1787 } 1788 1789 @Test textHasSpaceBetweenDivAndCenterTags()1790 public void textHasSpaceBetweenDivAndCenterTags() { 1791 Document doc = Jsoup.parse("<div>One</div><div>Two</div><center>Three</center><center>Four</center>"); 1792 assertEquals("One Two Three Four", doc.text()); 1793 } 1794 1795 @Test testNextElementSiblings()1796 public void testNextElementSiblings() { 1797 Document doc = Jsoup.parse("<ul id='ul'>" + 1798 "<li id='a'>a</li>" + 1799 "<li id='b'>b</li>" + 1800 "<li id='c'>c</li>" + 1801 "</ul> Not An Element but a node" + 1802 "<div id='div'>" + 1803 "<li id='d'>d</li>" + 1804 "</div>"); 1805 1806 Element element = doc.getElementById("a"); 1807 Elements elementSiblings = element.nextElementSiblings(); 1808 assertNotNull(elementSiblings); 1809 assertEquals(2, elementSiblings.size()); 1810 assertEquals("b", elementSiblings.get(0).id()); 1811 assertEquals("c", elementSiblings.get(1).id()); 1812 1813 Element element1 = doc.getElementById("b"); 1814 List<Element> elementSiblings1 = element1.nextElementSiblings(); 1815 assertNotNull(elementSiblings1); 1816 assertEquals(1, elementSiblings1.size()); 1817 assertEquals("c", elementSiblings1.get(0).id()); 1818 1819 Element element2 = doc.getElementById("c"); 1820 List<Element> elementSiblings2 = element2.nextElementSiblings(); 1821 assertEquals(0, elementSiblings2.size()); 1822 1823 Element ul = doc.getElementById("ul"); 1824 List<Element> elementSiblings3 = ul.nextElementSiblings(); 1825 assertNotNull(elementSiblings3); 1826 assertEquals(1, elementSiblings3.size()); 1827 assertEquals("div", elementSiblings3.get(0).id()); 1828 1829 Element div = doc.getElementById("div"); 1830 List<Element> elementSiblings4 = div.nextElementSiblings(); 1831 assertEquals(0, elementSiblings4.size()); 1832 } 1833 1834 @Test testPreviousElementSiblings()1835 public void testPreviousElementSiblings() { 1836 Document doc = Jsoup.parse("<ul id='ul'>" + 1837 "<li id='a'>a</li>" + 1838 "<li id='b'>b</li>" + 1839 "<li id='c'>c</li>" + 1840 "</ul>" + 1841 "<div id='div'>" + 1842 "<li id='d'>d</li>" + 1843 "</div>"); 1844 1845 Element element = doc.getElementById("b"); 1846 Elements elementSiblings = element.previousElementSiblings(); 1847 assertNotNull(elementSiblings); 1848 assertEquals(1, elementSiblings.size()); 1849 assertEquals("a", elementSiblings.get(0).id()); 1850 1851 Element element1 = doc.getElementById("a"); 1852 List<Element> elementSiblings1 = element1.previousElementSiblings(); 1853 assertEquals(0, elementSiblings1.size()); 1854 1855 Element element2 = doc.getElementById("c"); 1856 List<Element> elementSiblings2 = element2.previousElementSiblings(); 1857 assertNotNull(elementSiblings2); 1858 assertEquals(2, elementSiblings2.size()); 1859 assertEquals("b", elementSiblings2.get(0).id()); 1860 assertEquals("a", elementSiblings2.get(1).id()); 1861 1862 Element ul = doc.getElementById("ul"); 1863 List<Element> elementSiblings3 = ul.previousElementSiblings(); 1864 assertEquals(0, elementSiblings3.size()); 1865 } 1866 1867 @Test testClearAttributes()1868 public void testClearAttributes() { 1869 Element el = new Element("a").attr("href", "http://example.com").text("Hello"); 1870 assertEquals("<a href=\"http://example.com\">Hello</a>", el.outerHtml()); 1871 Element el2 = el.clearAttributes(); // really just force testing the return type is Element 1872 assertSame(el, el2); 1873 assertEquals("<a>Hello</a>", el2.outerHtml()); 1874 } 1875 1876 @Test testRemoveAttr()1877 public void testRemoveAttr() { 1878 Element el = new Element("a") 1879 .attr("href", "http://example.com") 1880 .attr("id", "1") 1881 .text("Hello"); 1882 assertEquals("<a href=\"http://example.com\" id=\"1\">Hello</a>", el.outerHtml()); 1883 Element el2 = el.removeAttr("href"); // really just force testing the return type is Element 1884 assertSame(el, el2); 1885 assertEquals("<a id=\"1\">Hello</a>", el2.outerHtml()); 1886 } 1887 1888 @Test testRoot()1889 public void testRoot() { 1890 Element el = new Element("a"); 1891 el.append("<span>Hello</span>"); 1892 assertEquals("<a><span>Hello</span></a>", el.outerHtml()); 1893 Element span = el.selectFirst("span"); 1894 assertNotNull(span); 1895 Element el2 = span.root(); 1896 assertSame(el, el2); 1897 1898 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three"); 1899 Element div = doc.selectFirst("div"); 1900 assertSame(doc, div.root()); 1901 assertSame(doc, div.ownerDocument()); 1902 } 1903 1904 @Test testTraverse()1905 public void testTraverse() { 1906 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three"); 1907 Element div = doc.selectFirst("div"); 1908 assertNotNull(div); 1909 final AtomicInteger counter = new AtomicInteger(0); 1910 1911 Element div2 = div.traverse(new NodeVisitor() { 1912 1913 @Override 1914 public void head(Node node, int depth) { 1915 counter.incrementAndGet(); 1916 } 1917 1918 @Override 1919 public void tail(Node node, int depth) { 1920 1921 } 1922 }); 1923 1924 assertEquals(7, counter.get()); 1925 assertEquals(div2, div); 1926 } 1927 testTraverseLambda()1928 @Test void testTraverseLambda() { 1929 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three"); 1930 Element div = doc.selectFirst("div"); 1931 assertNotNull(div); 1932 final AtomicInteger counter = new AtomicInteger(0); 1933 1934 Element div2 = div.traverse((node, depth) -> counter.incrementAndGet()); 1935 1936 assertEquals(7, counter.get()); 1937 assertEquals(div2, div); 1938 } 1939 1940 @Test testFilterCallReturnsElement()1941 public void testFilterCallReturnsElement() { 1942 // doesn't actually test the filter so much as the return type for Element. See node.nodeFilter for an actual test 1943 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three"); 1944 Element div = doc.selectFirst("div"); 1945 assertNotNull(div); 1946 Element div2 = div.filter(new NodeFilter() { 1947 @Override 1948 public FilterResult head(Node node, int depth) { 1949 return FilterResult.CONTINUE; 1950 } 1951 1952 @Override 1953 public FilterResult tail(Node node, int depth) { 1954 return FilterResult.CONTINUE; 1955 } 1956 }); 1957 1958 assertSame(div, div2); 1959 } 1960 testFilterAsLambda()1961 @Test void testFilterAsLambda() { 1962 Document doc = Jsoup.parse("<div><p>One<p id=2>Two<p>Three"); 1963 doc.filter((node, depth) -> node.attr("id").equals("2") 1964 ? NodeFilter.FilterResult.REMOVE 1965 : NodeFilter.FilterResult.CONTINUE); 1966 1967 assertEquals("<div><p>One</p><p>Three</p></div>", TextUtil.stripNewlines(doc.body().html())); 1968 } 1969 testForEach()1970 @Test void testForEach() { 1971 Document doc = Jsoup.parse("<div><p>Hello</p></div><div>There</div><div id=1>Gone<p></div>"); 1972 doc.forEach(el -> { 1973 if (el.id().equals("1")) 1974 el.remove(); 1975 else if (el.text().equals("There")) { 1976 el.text("There Now"); 1977 el.append("<p>Another</p>"); 1978 } 1979 }); 1980 assertEquals("<div><p>Hello</p></div><div>There Now<p>Another</p></div>", TextUtil.stripNewlines(doc.body().html())); 1981 } 1982 1983 @Test doesntDeleteZWJWhenNormalizingText()1984 public void doesntDeleteZWJWhenNormalizingText() { 1985 String text = "\uD83D\uDC69\u200D\uD83D\uDCBB\uD83E\uDD26\uD83C\uDFFB\u200D\u2642\uFE0F"; 1986 1987 Document doc = Jsoup.parse("<p>" + text + "</p><div>One‍Two</div>"); 1988 Element p = doc.selectFirst("p"); 1989 Element d = doc.selectFirst("div"); 1990 1991 assertEquals(12, p.text().length()); 1992 assertEquals(text, p.text()); 1993 assertEquals(7, d.text().length()); 1994 assertEquals("One\u200DTwo", d.text()); 1995 Element found = doc.selectFirst("div:contains(One\u200DTwo)"); 1996 assertTrue(found.hasSameValue(d)); 1997 } 1998 1999 @Test testReparentSeperateNodes()2000 public void testReparentSeperateNodes() { 2001 String html = "<div><p>One<p>Two"; 2002 Document doc = Jsoup.parse(html); 2003 Element new1 = new Element("p").text("Three"); 2004 Element new2 = new Element("p").text("Four"); 2005 2006 doc.body().insertChildren(-1, new1, new2); 2007 assertEquals("<div><p>One</p><p>Two</p></div><p>Three</p><p>Four</p>", TextUtil.stripNewlines(doc.body().html())); 2008 2009 // note that these get moved from the above - as not copied 2010 doc.body().insertChildren(0, new1, new2); 2011 assertEquals("<p>Three</p><p>Four</p><div><p>One</p><p>Two</p></div>", TextUtil.stripNewlines(doc.body().html())); 2012 2013 doc.body().insertChildren(0, new2.clone(), new1.clone()); 2014 assertEquals("<p>Four</p><p>Three</p><p>Three</p><p>Four</p><div><p>One</p><p>Two</p></div>", TextUtil.stripNewlines(doc.body().html())); 2015 2016 // shifted to end 2017 doc.body().appendChild(new1); 2018 assertEquals("<p>Four</p><p>Three</p><p>Four</p><div><p>One</p><p>Two</p></div><p>Three</p>", TextUtil.stripNewlines(doc.body().html())); 2019 } 2020 2021 @Test testNotActuallyAReparent()2022 public void testNotActuallyAReparent() { 2023 // prep 2024 String html = "<div>"; 2025 Document doc = Jsoup.parse(html); 2026 Element div = doc.selectFirst("div"); 2027 Element new1 = new Element("p").text("One"); 2028 Element new2 = new Element("p").text("Two"); 2029 div.addChildren(new1, new2); 2030 2031 assertEquals("<div><p>One</p><p>Two</p></div>", TextUtil.stripNewlines(div.outerHtml())); 2032 2033 // and the issue setup: 2034 Element new3 = new Element("p").text("Three"); 2035 Element wrap = new Element("nav"); 2036 wrap.addChildren(0, new1, new3); 2037 2038 assertEquals("<nav><p>One</p><p>Three</p></nav>", TextUtil.stripNewlines(wrap.outerHtml())); 2039 div.addChildren(wrap); 2040 // now should be that One moved into wrap, leaving Two in div. 2041 2042 assertEquals("<div><p>Two</p><nav><p>One</p><p>Three</p></nav></div>", TextUtil.stripNewlines(div.outerHtml())); 2043 assertEquals("<div><p>Two</p><nav><p>One</p><p>Three</p></nav></div>", TextUtil.stripNewlines(div.outerHtml())); 2044 } 2045 2046 @Test testChildSizeWithMixedContent()2047 public void testChildSizeWithMixedContent() { 2048 Document doc = Jsoup.parse("<table><tbody>\n<tr>\n<td>15:00</td>\n<td>sport</td>\n</tr>\n</tbody></table>"); 2049 Element row = doc.selectFirst("table tbody tr"); 2050 assertEquals(2, row.childrenSize()); 2051 assertEquals(5, row.childNodeSize()); 2052 } 2053 2054 @Test isBlock()2055 public void isBlock() { 2056 String html = "<div><p><span>Hello</span>"; 2057 Document doc = Jsoup.parse(html); 2058 assertTrue(doc.selectFirst("div").isBlock()); 2059 assertTrue(doc.selectFirst("p").isBlock()); 2060 assertFalse(doc.selectFirst("span").isBlock()); 2061 } 2062 2063 @Test testScriptTextHtmlSetAsData()2064 public void testScriptTextHtmlSetAsData() { 2065 String src = "var foo = 5 < 2;\nvar bar = 1 && 2;"; 2066 String html = "<script>" + src + "</script>"; 2067 Document doc = Jsoup.parse(html); 2068 Element el = doc.selectFirst("script"); 2069 assertNotNull(el); 2070 2071 validateScriptContents(src, el); 2072 2073 src = "var foo = 4 < 2;\nvar bar > 1 && 2;"; 2074 el.html(src); 2075 validateScriptContents(src, el); 2076 2077 // special case for .text (in HTML; in XML will just be regular text) 2078 el.text(src); 2079 validateScriptContents(src, el); 2080 2081 // XML, no special treatment, get escaped correctly 2082 Document xml = Parser.xmlParser().parseInput(html, ""); 2083 Element xEl = xml.selectFirst("script"); 2084 assertNotNull(xEl); 2085 src = "var foo = 5 < 2;\nvar bar = 1 && 2;"; 2086 String escaped = "var foo = 5 < 2;\nvar bar = 1 && 2;"; 2087 validateXmlScriptContents(xEl); 2088 xEl.text(src); 2089 validateXmlScriptContents(xEl); 2090 xEl.html(src); 2091 validateXmlScriptContents(xEl); 2092 2093 assertEquals("<script>var foo = 4 < 2;\nvar bar > 1 && 2;</script>", el.outerHtml()); 2094 assertEquals("<script>" + escaped + "</script>", xEl.outerHtml()); // escaped in xml as no special treatment 2095 2096 } 2097 2098 @Test testShallowCloneToString()2099 public void testShallowCloneToString() { 2100 // https://github.com/jhy/jsoup/issues/1410 2101 Document doc = Jsoup.parse("<p><i>Hello</i></p>"); 2102 Element p = doc.selectFirst("p"); 2103 Element i = doc.selectFirst("i"); 2104 String pH = p.shallowClone().toString(); 2105 String iH = i.shallowClone().toString(); 2106 2107 assertEquals("<p></p>", pH); // shallow, so no I 2108 assertEquals("<i></i>", iH); 2109 2110 assertEquals(p.outerHtml(), p.toString()); 2111 assertEquals(i.outerHtml(), i.toString()); 2112 } 2113 2114 @Test styleHtmlRoundTrips()2115 public void styleHtmlRoundTrips() { 2116 String styleContents = "foo < bar > qux {color:white;}"; 2117 String html = "<head><style>" + styleContents + "</style></head>"; 2118 Document doc = Jsoup.parse(html); 2119 2120 Element head = doc.head(); 2121 Element style = head.selectFirst("style"); 2122 assertNotNull(style); 2123 assertEquals(styleContents, style.html()); 2124 style.html(styleContents); 2125 assertEquals(styleContents, style.html()); 2126 assertEquals("", style.text()); 2127 style.text(styleContents); // pushes the HTML, not the Text 2128 assertEquals("", style.text()); 2129 assertEquals(styleContents, style.html()); 2130 } 2131 2132 @Test moveChildren()2133 public void moveChildren() { 2134 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three</div><div></div>"); 2135 Elements divs = doc.select("div"); 2136 Element a = divs.get(0); 2137 Element b = divs.get(1); 2138 2139 b.insertChildren(-1, a.childNodes()); 2140 2141 assertEquals("<div></div>\n<div>\n <p>One</p>\n <p>Two</p>\n <p>Three</p>\n</div>", 2142 doc.body().html()); 2143 } 2144 2145 @Test moveChildrenToOuter()2146 public void moveChildrenToOuter() { 2147 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three</div><div></div>"); 2148 Elements divs = doc.select("div"); 2149 Element a = divs.get(0); 2150 Element b = doc.body(); 2151 2152 b.insertChildren(-1, a.childNodes()); 2153 2154 assertEquals("<div></div>\n<div></div>\n<p>One</p>\n<p>Two</p>\n<p>Three</p>", 2155 doc.body().html()); 2156 } 2157 2158 @Test appendChildren()2159 public void appendChildren() { 2160 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three</div><div><p>Four</div>"); 2161 Elements divs = doc.select("div"); 2162 Element a = divs.get(0); 2163 Element b = divs.get(1); 2164 2165 b.appendChildren(a.childNodes()); 2166 2167 assertEquals("<div></div>\n<div>\n <p>Four</p>\n <p>One</p>\n <p>Two</p>\n <p>Three</p>\n</div>", 2168 doc.body().html()); 2169 } 2170 2171 @Test prependChildren()2172 public void prependChildren() { 2173 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three</div><div><p>Four</div>"); 2174 Elements divs = doc.select("div"); 2175 Element a = divs.get(0); 2176 Element b = divs.get(1); 2177 2178 b.prependChildren(a.childNodes()); 2179 2180 assertEquals("<div></div>\n<div>\n <p>One</p>\n <p>Two</p>\n <p>Three</p>\n <p>Four</p>\n</div>", 2181 doc.body().html()); 2182 } 2183 2184 @Test loopMoveChildren()2185 public void loopMoveChildren() { 2186 Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three</div><div><p>Four</div>"); 2187 Elements divs = doc.select("div"); 2188 Element a = divs.get(0); 2189 Element b = divs.get(1); 2190 2191 Element outer = b.parent(); 2192 assertNotNull(outer); 2193 for (Node node : a.childNodes()) { 2194 outer.appendChild(node); 2195 } 2196 2197 assertEquals("<div></div>\n<div>\n <p>Four</p>\n</div>\n<p>One</p>\n<p>Two</p>\n<p>Three</p>", 2198 doc.body().html()); 2199 } 2200 2201 @Test accessorsDoNotVivifyAttributes()2202 public void accessorsDoNotVivifyAttributes() throws NoSuchFieldException, IllegalAccessException { 2203 // internally, we don't want to create empty Attribute objects unless actually used for something 2204 Document doc = Jsoup.parse("<div><p><a href=foo>One</a>"); 2205 Element div = doc.selectFirst("div"); 2206 Element p = doc.selectFirst("p"); 2207 Element a = doc.selectFirst("a"); 2208 2209 // should not create attributes 2210 assertEquals("", div.attr("href")); 2211 p.removeAttr("href"); 2212 2213 Elements hrefs = doc.select("[href]"); 2214 assertEquals(1, hrefs.size()); 2215 2216 assertFalse(div.hasAttributes()); 2217 assertFalse(p.hasAttributes()); 2218 assertTrue(a.hasAttributes()); 2219 } 2220 2221 @Test childNodesAccessorDoesNotVivify()2222 public void childNodesAccessorDoesNotVivify() { 2223 Document doc = Jsoup.parse("<p></p>"); 2224 Element p = doc.selectFirst("p"); 2225 assertFalse(p.hasChildNodes()); 2226 2227 assertEquals(0, p.childNodeSize()); 2228 assertEquals(0, p.childrenSize()); 2229 2230 List<Node> childNodes = p.childNodes(); 2231 assertEquals(0, childNodes.size()); 2232 2233 Elements children = p.children(); 2234 assertEquals(0, children.size()); 2235 2236 assertFalse(p.hasChildNodes()); 2237 } 2238 emptyChildrenElementsIsModifiable()2239 @Test void emptyChildrenElementsIsModifiable() { 2240 // using unmodifiable empty in childElementList as short circuit, but people may be modifying Elements. 2241 Element p = new Element("p"); 2242 Elements els = p.children(); 2243 assertEquals(0, els.size()); 2244 els.add(new Element("a")); 2245 assertEquals(1, els.size()); 2246 } 2247 attributeSizeDoesNotAutoVivify()2248 @Test public void attributeSizeDoesNotAutoVivify() { 2249 Document doc = Jsoup.parse("<p></p>"); 2250 Element p = doc.selectFirst("p"); 2251 assertNotNull(p); 2252 assertFalse(p.hasAttributes()); 2253 assertEquals(0, p.attributesSize()); 2254 assertFalse(p.hasAttributes()); 2255 2256 p.attr("foo", "bar"); 2257 assertEquals(1, p.attributesSize()); 2258 assertTrue(p.hasAttributes()); 2259 2260 p.removeAttr("foo"); 2261 assertEquals(0, p.attributesSize()); 2262 } 2263 clonedElementsHaveOwnerDocsAndIndependentSettings()2264 @Test void clonedElementsHaveOwnerDocsAndIndependentSettings() { 2265 // https://github.com/jhy/jsoup/issues/763 2266 Document doc = Jsoup.parse("<div>Text</div><div>Two</div>"); 2267 doc.outputSettings().prettyPrint(false); 2268 Element div = doc.selectFirst("div"); 2269 assertNotNull(div); 2270 Node text = div.childNode(0); 2271 assertNotNull(text); 2272 2273 Element divClone = div.clone(); 2274 Document docClone = divClone.ownerDocument(); 2275 assertNotNull(docClone); 2276 assertFalse(docClone.outputSettings().prettyPrint()); 2277 assertNotSame(doc, docClone); 2278 assertSame(docClone, divClone.childNode(0).ownerDocument()); 2279 // the cloned text has same owner doc as the cloned div 2280 2281 doc.outputSettings().prettyPrint(true); 2282 assertTrue(doc.outputSettings().prettyPrint()); 2283 assertFalse(docClone.outputSettings().prettyPrint()); 2284 assertEquals(1, docClone.children().size()); // check did not get the second div as the owner's children 2285 assertEquals(divClone, docClone.child(0)); // note not the head or the body -- not normalized 2286 } 2287 testOutputSettings()2288 private static Stream<Document.OutputSettings> testOutputSettings() { 2289 return Stream.of( 2290 new Document.OutputSettings().prettyPrint(true).indentAmount(4), 2291 new Document.OutputSettings().prettyPrint(true).indentAmount(1), 2292 new Document.OutputSettings().prettyPrint(true).indentAmount(4).outline(true), 2293 new Document.OutputSettings().prettyPrint(false) 2294 ); 2295 } 2296 2297 @ParameterizedTest 2298 @MethodSource("testOutputSettings") prettySerializationRoundTrips(Document.OutputSettings settings)2299 void prettySerializationRoundTrips(Document.OutputSettings settings) { 2300 // https://github.com/jhy/jsoup/issues/1688 2301 // tests that repeated html() and parse() does not accumulate errant spaces / newlines 2302 Document doc = Jsoup.parse("<div>\nFoo\n<p>\nBar\nqux</p></div>\n<script>\n alert('Hello!');\n</script>"); 2303 doc.outputSettings(settings); 2304 String html = doc.html(); 2305 Document doc2 = Jsoup.parse(html); 2306 doc2.outputSettings(settings); 2307 String html2 = doc2.html(); 2308 2309 assertEquals(html, html2); 2310 } 2311 prettyPrintScriptsDoesNotGrowOnRepeat()2312 @Test void prettyPrintScriptsDoesNotGrowOnRepeat() { 2313 Document doc = Jsoup.parse("<div>\nFoo\n<p>\nBar\nqux</p></div>\n<script>\n alert('Hello!');\n</script>"); 2314 Document.OutputSettings settings = doc.outputSettings(); 2315 settings 2316 .prettyPrint(true) 2317 .outline(true) 2318 .indentAmount(4) 2319 ; 2320 2321 String html = doc.html(); 2322 Document doc2 = Jsoup.parse(html); 2323 doc2.outputSettings(settings); 2324 String html2 = doc2.html(); 2325 assertEquals(html, html2); 2326 } 2327 elementBrText()2328 @Test void elementBrText() { 2329 // testcase for https://github.com/jhy/jsoup/issues/1437 2330 String html = "<p>Hello<br>World</p>"; 2331 Document doc = Jsoup.parse(html); 2332 doc.outputSettings().prettyPrint(false); // otherwise html serializes as Hello<br>\n World. 2333 Element p = doc.select("p").first(); 2334 assertNotNull(p); 2335 assertEquals(html, p.outerHtml()); 2336 assertEquals("Hello World", p.text()); 2337 assertEquals("Hello\nWorld", p.wholeText()); 2338 } 2339 wrapTextAfterBr()2340 @Test void wrapTextAfterBr() { 2341 // https://github.com/jhy/jsoup/issues/1858 2342 String html = "<p>Hello<br>there<br>now.</p>"; 2343 Document doc = Jsoup.parse(html); 2344 assertEquals("<p>Hello<br>\n there<br>\n now.</p>", doc.body().html()); 2345 } 2346 prettyprintBrInBlock()2347 @Test void prettyprintBrInBlock() { 2348 String html = "<div><br> </div>"; 2349 Document doc = Jsoup.parse(html); 2350 assertEquals("<div>\n <br>\n</div>", doc.body().html()); // not div\n br\n \n/div 2351 } 2352 prettyprintBrWhenNotFirstChild()2353 @Test void prettyprintBrWhenNotFirstChild() { 2354 // https://github.com/jhy/jsoup/issues/1911 2355 String h = "<div><p><br>Foo</p><br></div>"; 2356 Document doc = Jsoup.parse(h); 2357 assertEquals("<div>\n" + 2358 " <p><br>\n Foo</p>\n" + 2359 " <br>\n" + 2360 "</div>", doc.body().html()); 2361 // br gets wrapped if in div, but not in p (block vs inline), but always wraps after 2362 } 2363 preformatFlowsToChildTextNodes()2364 @Test void preformatFlowsToChildTextNodes() { 2365 // https://github.com/jhy/jsoup/issues/1776 2366 String html = "<div><pre>One\n<span>\nTwo</span>\n <span> \nThree</span>\n <span>Four <span>Five</span>\n Six\n</pre>"; 2367 Document doc = Jsoup.parse(html); 2368 doc.outputSettings().indentAmount(2).prettyPrint(true); 2369 2370 Element div = doc.selectFirst("div"); 2371 assertNotNull(div); 2372 String actual = div.outerHtml(); 2373 String expect = "<div>\n" + 2374 " <pre>One\n" + 2375 "<span>\n" + 2376 "Two</span>\n" + 2377 " <span> \n" + 2378 "Three</span>\n" + 2379 " <span>Four <span>Five</span>\n" + 2380 " Six\n" + 2381 "</span></pre>\n" + 2382 "</div>"; 2383 assertEquals(expect, actual); 2384 2385 String expectText = "One\n" + 2386 "\n" + 2387 "Two\n" + 2388 " \n" + 2389 "Three\n" + 2390 " Four Five\n" + 2391 " Six\n"; 2392 assertEquals(expectText, div.wholeText()); 2393 2394 String expectOwn = "One\n" + 2395 "\n" + 2396 " \n" + 2397 " "; 2398 assertEquals(expectOwn, div.child(0).wholeOwnText()); 2399 } 2400 inlineInBlockShouldIndent()2401 @Test void inlineInBlockShouldIndent() { 2402 // was inconsistent between <div>\n<span> and <div><span> - former would print inline, latter would wrap(!) 2403 String html = "<div>One <span>Hello</span><span>!</span></div><div>\n<span>There</span></div><div> <span>Now</span></div>"; 2404 Document doc = Jsoup.parse(html); 2405 assertEquals( 2406 "<div>\n" + 2407 " One <span>Hello</span><span>!</span>\n" + 2408 "</div>\n" + 2409 "<div>\n" + 2410 " <span>There</span>\n" + 2411 "</div>\n" + 2412 "<div>\n" + 2413 " <span>Now</span>\n" + 2414 "</div>", 2415 doc.body().html()); 2416 } 2417 testExpectFirst()2418 @Test void testExpectFirst() { 2419 Document doc = Jsoup.parse("<p>One</p><p>Two <span>Three</span> <span>Four</span>"); 2420 2421 Element span = doc.expectFirst("span"); 2422 assertEquals("Three", span.text()); 2423 2424 assertNull(doc.selectFirst("div")); 2425 boolean threw = false; 2426 try { 2427 Element div = doc.expectFirst("div"); 2428 } catch (IllegalArgumentException e) { 2429 threw = true; 2430 } 2431 assertTrue(threw); 2432 } 2433 testExpectFirstMessage()2434 @Test void testExpectFirstMessage() { 2435 Document doc = Jsoup.parse("<p>One</p><p>Two <span>Three</span> <span>Four</span>"); 2436 boolean threw = false; 2437 Element p = doc.expectFirst("P"); 2438 try { 2439 Element span = p.expectFirst("span.doesNotExist"); 2440 } catch (ValidationException e) { 2441 threw = true; 2442 assertEquals("No elements matched the query 'span.doesNotExist' on element 'p'.", e.getMessage()); 2443 } 2444 assertTrue(threw); 2445 } 2446 testExpectFirstMessageDoc()2447 @Test void testExpectFirstMessageDoc() { 2448 Document doc = Jsoup.parse("<p>One</p><p>Two <span>Three</span> <span>Four</span>"); 2449 boolean threw = false; 2450 Element p = doc.expectFirst("P"); 2451 try { 2452 Element span = doc.expectFirst("span.doesNotExist"); 2453 } catch (ValidationException e) { 2454 threw = true; 2455 assertEquals("No elements matched the query 'span.doesNotExist' in the document.", e.getMessage()); 2456 } 2457 assertTrue(threw); 2458 } 2459 spanRunsMaintainSpace()2460 @Test void spanRunsMaintainSpace() { 2461 // https://github.com/jhy/jsoup/issues/1787 2462 Document doc = Jsoup.parse("<p><span>One</span>\n<span>Two</span>\n<span>Three</span></p>"); 2463 String text = "One Two Three"; 2464 Element body = doc.body(); 2465 assertEquals(text, body.text()); 2466 2467 Element p = doc.expectFirst("p"); 2468 String html = p.html(); 2469 p.html(html); 2470 assertEquals(text, body.text()); 2471 2472 assertEquals("<p><span>One</span> <span>Two</span> <span>Three</span></p>", body.html()); 2473 } 2474 doctypeIsPrettyPrinted()2475 @Test void doctypeIsPrettyPrinted() { 2476 // resolves underlying issue raised in https://github.com/jhy/jsoup/pull/1664 2477 Document doc1 = Jsoup.parse("<!--\nlicense\n-->\n \n<!doctype html>\n<html>"); 2478 Document doc2 = Jsoup.parse("\n <!doctype html><html>"); 2479 Document doc3 = Jsoup.parse("<!doctype html>\n<html>"); 2480 Document doc4 = Jsoup.parse("\n<!doctype html>\n<html>"); 2481 Document doc5 = Jsoup.parse("\n<!--\n comment \n --> <!doctype html>\n<html>"); 2482 Document doc6 = Jsoup.parse("<!--\n comment \n --> <!doctype html>\n<html>"); 2483 2484 assertEquals("<!--\nlicense\n-->\n<!doctype html>\n<html>\n <head></head>\n <body></body>\n</html>", doc1.html()); 2485 doc1.outputSettings().prettyPrint(false); 2486 assertEquals("<!--\nlicense\n--><!doctype html>\n<html><head></head><body></body></html>", doc1.html()); 2487 // note that the whitespace between the comment and the doctype is not retained, in Initial state 2488 2489 assertEquals("<!doctype html>\n<html>\n <head></head>\n <body></body>\n</html>", doc2.html()); 2490 assertEquals("<!doctype html>\n<html>\n <head></head>\n <body></body>\n</html>", doc3.html()); 2491 assertEquals("<!doctype html>\n<html>\n <head></head>\n <body></body>\n</html>", doc4.html()); 2492 assertEquals("<!--\n comment \n -->\n<!doctype html>\n<html>\n <head></head>\n <body></body>\n</html>", doc5.html()); 2493 assertEquals("<!--\n comment \n -->\n<!doctype html>\n<html>\n <head></head>\n <body></body>\n</html>", doc6.html()); 2494 } 2495 textnodeInBlockIndent()2496 @Test void textnodeInBlockIndent() { 2497 String html ="<div>\n{{ msg }} \n </div>\n<div>\n{{ msg }} \n </div>"; 2498 Document doc = Jsoup.parse(html); 2499 assertEquals("<div>\n {{ msg }}\n</div>\n<div>\n {{ msg }}\n</div>", doc.body().html()); 2500 } 2501 stripTrailing()2502 @Test void stripTrailing() { 2503 String html = "<p> This <span>is </span>fine. </p>"; 2504 Document doc = Jsoup.parse(html); 2505 assertEquals("<p>This <span>is </span>fine.</p>", doc.body().html()); 2506 } 2507 elementIndentAndSpaceTrims()2508 @Test void elementIndentAndSpaceTrims() { 2509 String html = "<body><div> <p> One Two </p> <a> Hello </a><p>\nSome text \n</p>\n </div>"; 2510 Document doc = Jsoup.parse(html); 2511 assertEquals("<div>\n" + 2512 " <p>One Two</p><a> Hello </a>\n" + 2513 " <p>Some text</p>\n" + 2514 "</div>", doc.body().html()); 2515 } 2516 divAInlineable()2517 @Test void divAInlineable() { 2518 String html = "<body><div> <a>Text</a>"; 2519 Document doc = Jsoup.parse(html); 2520 assertEquals("<div>\n" + 2521 " <a>Text</a>\n" + 2522 "</div>", doc.body().html()); 2523 } 2524 noDanglingSpaceAfterCustomElement()2525 @Test void noDanglingSpaceAfterCustomElement() { 2526 // https://github.com/jhy/jsoup/issues/1852 2527 String html = "<bar><p/>\n</bar>"; 2528 Document doc = Jsoup.parse(html); 2529 assertEquals("<bar>\n <p></p>\n</bar>", doc.body().html()); 2530 2531 html = "<foo>\n <bar />\n</foo>"; 2532 doc = Jsoup.parse(html); 2533 assertEquals("<foo>\n <bar />\n</foo>", doc.body().html()); 2534 } 2535 spanInBlockTrims()2536 @Test void spanInBlockTrims() { 2537 String html = "<p>Lorem ipsum</p>\n<span>Thanks</span>"; 2538 Document doc = Jsoup.parse(html); 2539 String outHtml = doc.body().html(); 2540 assertEquals("<p>Lorem ipsum</p><span>Thanks</span>", outHtml); 2541 } 2542 replaceWithSelf()2543 @Test void replaceWithSelf() { 2544 // https://github.com/jhy/jsoup/issues/1843 2545 Document doc = Jsoup.parse("<p>One<p>Two"); 2546 Elements ps = doc.select("p"); 2547 Element first = ps.first(); 2548 2549 assertNotNull(first); 2550 first.replaceWith(first); 2551 assertEquals(ps.get(1), first.nextSibling()); 2552 assertEquals("<p>One</p>\n<p>Two</p>", first.parent().html()); 2553 } 2554 select()2555 @Test void select() { 2556 Evaluator eval = QueryParser.parse("div"); 2557 Document doc = Jsoup.parse(reference); 2558 Elements els = doc.select("div"); 2559 Elements els2 = doc.select(eval); 2560 assertEquals(els, els2); 2561 } 2562 insertChildrenValidation()2563 @Test void insertChildrenValidation() { 2564 Document doc = Jsoup.parse(reference); 2565 Element div = doc.expectFirst("div"); 2566 Throwable ex = assertThrows(ValidationException.class, () -> div.insertChildren(20, new Element("div"))); 2567 assertEquals("Insert position out of bounds.", ex.getMessage()); 2568 } 2569 cssSelectorNoDoc()2570 @Test void cssSelectorNoDoc() { 2571 Element el = new Element("div"); 2572 el.id("one"); 2573 assertEquals("#one", el.cssSelector()); 2574 } 2575 cssSelectorNoParent()2576 @Test void cssSelectorNoParent() { 2577 Element el = new Element("div"); 2578 assertEquals("div", el.cssSelector()); 2579 } 2580 cssSelectorDoesntStackOverflow()2581 @Test void cssSelectorDoesntStackOverflow() { 2582 // https://github.com/jhy/jsoup/issues/2001 2583 Element element = new Element("element"); 2584 Element root = element; 2585 2586 // Create a long chain of elements 2587 for (int i = 0; i < 5000; i++) { 2588 Element elem2 = new Element("element" + i); 2589 element.appendChild(elem2); 2590 element = elem2; 2591 } 2592 2593 String selector = element.cssSelector(); // would overflow in cssSelector parent() recurse 2594 Evaluator eval = QueryParser.parse(selector); 2595 2596 assertEquals(eval.toString(), selector); 2597 assertTrue(selector.startsWith("element > element0 >")); 2598 assertTrue(selector.endsWith("8 > element4999")); 2599 2600 Elements elements = root.select(selector); // would overflow in nested And ImmediateParent chain eval 2601 assertEquals(1, elements.size()); 2602 assertEquals(element, elements.first()); 2603 } 2604 orphanSiblings()2605 @Test void orphanSiblings() { 2606 Element el = new Element("div"); 2607 assertEquals(0, el.siblingElements().size()); 2608 assertEquals(0, el.nextElementSiblings().size()); 2609 assertEquals(0, el.previousElementSiblings().size()); 2610 assertNull(el.nextElementSibling()); 2611 assertNull(el.previousElementSibling()); 2612 } 2613 getElementsByAttributeStarting()2614 @Test void getElementsByAttributeStarting() { 2615 Document doc = Jsoup.parse("<div data-one=1 data-two=2 id=1><p data-one=3 id=2>Text</div><div>"); 2616 Elements els = doc.getElementsByAttributeStarting(" data- "); 2617 assertEquals(2, els.size()); 2618 assertEquals("1", els.get(0).id()); 2619 assertEquals("2", els.get(1).id()); 2620 assertEquals(0, doc.getElementsByAttributeStarting("not-data").size()); 2621 } 2622 getElementsByAttributeValueNot()2623 @Test void getElementsByAttributeValueNot() { 2624 Document doc = Jsoup.parse("<div data-one=1 data-two=2 id=1><p data-one=3 id=2>Text</div><div id=3>"); 2625 Elements els = doc.body().getElementsByAttributeValueNot("data-one", "1"); 2626 assertEquals(3, els.size()); // the body, p, and last div 2627 assertEquals("body", els.get(0).normalName()); 2628 assertEquals("2", els.get(1).id()); 2629 assertEquals("3", els.get(2).id()); 2630 } 2631 getElementsByAttributeValueStarting()2632 @Test void getElementsByAttributeValueStarting() { 2633 Document doc = Jsoup.parse("<a href=one1></a><a href=one2></a><a href=else</a>"); 2634 Elements els = doc.getElementsByAttributeValueStarting("href", "one"); 2635 assertEquals(2, els.size()); 2636 assertEquals("one1", els.get(0).attr("href")); 2637 assertEquals("one2", els.get(1).attr("href")); 2638 } 2639 getElementsByAttributeValueEnding()2640 @Test void getElementsByAttributeValueEnding() { 2641 Document doc = Jsoup.parse("<a href=1one></a><a href=2one></a><a href=else</a>"); 2642 Elements els = doc.getElementsByAttributeValueEnding("href", "one"); 2643 assertEquals(2, els.size()); 2644 assertEquals("1one", els.get(0).attr("href")); 2645 assertEquals("2one", els.get(1).attr("href")); 2646 } 2647 getElementsByAttributeValueContaining()2648 @Test void getElementsByAttributeValueContaining() { 2649 Document doc = Jsoup.parse("<a href=1one></a><a href=2one></a><a href=else</a>"); 2650 Elements els = doc.getElementsByAttributeValueContaining("href", "on"); 2651 assertEquals(2, els.size()); 2652 assertEquals("1one", els.get(0).attr("href")); 2653 assertEquals("2one", els.get(1).attr("href")); 2654 } 2655 getElementsByAttributeValueMatchingPattern()2656 @Test void getElementsByAttributeValueMatchingPattern() { 2657 Document doc = Jsoup.parse("<a href=1one></a><a href=2one></a><a href=else</a>"); 2658 Elements els = doc.getElementsByAttributeValueMatching("href", Pattern.compile("^\\d\\w+")); 2659 assertEquals(2, els.size()); 2660 assertEquals("1one", els.get(0).attr("href")); 2661 assertEquals("2one", els.get(1).attr("href")); 2662 } 2663 getElementsByAttributeValueMatching()2664 @Test void getElementsByAttributeValueMatching() { 2665 Document doc = Jsoup.parse("<a href=1one></a><a href=2one></a><a href=else</a>"); 2666 Elements els = doc.getElementsByAttributeValueMatching("href", "^\\d\\w+"); 2667 assertEquals(2, els.size()); 2668 assertEquals("1one", els.get(0).attr("href")); 2669 assertEquals("2one", els.get(1).attr("href")); 2670 } 2671 getElementsByAttributeValueMatchingValidation()2672 @Test void getElementsByAttributeValueMatchingValidation() { 2673 Document doc = Jsoup.parse(reference); 2674 Throwable ex = assertThrows(IllegalArgumentException.class, 2675 () -> doc.getElementsByAttributeValueMatching("key", "\\x")); 2676 assertEquals("Pattern syntax error: \\x", ex.getMessage()); 2677 } 2678 getElementsByIndexEquals()2679 @Test void getElementsByIndexEquals() { 2680 Document doc = Jsoup.parse("<a href=1one></a><a href=2one></a><a href=else</a>"); 2681 Elements els = doc.body().getElementsByIndexEquals(1); 2682 assertEquals(2, els.size()); 2683 assertEquals("body", els.get(0).normalName()); 2684 assertEquals("2one", els.get(1).attr("href")); 2685 } 2686 getElementsContainingText()2687 @Test void getElementsContainingText() { 2688 Document doc = Jsoup.parse("<div id=1>One</div><div>Two</div>"); 2689 Elements els = doc.body().getElementsContainingText("one"); 2690 assertEquals(2, els.size()); 2691 assertEquals("body", els.get(0).normalName()); 2692 assertEquals("1", els.get(1).id()); 2693 } 2694 getElementsContainingOwnText()2695 @Test void getElementsContainingOwnText() { 2696 Document doc = Jsoup.parse("<div id=1>One</div><div>Two</div>"); 2697 Elements els = doc.body().getElementsContainingOwnText("one"); 2698 assertEquals(1, els.size()); 2699 assertEquals("1", els.get(0).id()); 2700 } 2701 getElementsMatchingTextValidation()2702 @Test void getElementsMatchingTextValidation() { 2703 Document doc = Jsoup.parse(reference); 2704 Throwable ex = assertThrows(IllegalArgumentException.class, 2705 () -> doc.getElementsMatchingText("\\x")); 2706 assertEquals("Pattern syntax error: \\x", ex.getMessage()); 2707 } 2708 getElementsMatchingText()2709 @Test void getElementsMatchingText() { 2710 Document doc = Jsoup.parse("<div id=1>One</div><div>Two</div>"); 2711 Elements els = doc.body().getElementsMatchingText("O\\w+"); 2712 assertEquals(2, els.size()); 2713 assertEquals("body", els.get(0).normalName()); 2714 assertEquals("1", els.get(1).id()); 2715 } 2716 getElementsMatchingOwnText()2717 @Test void getElementsMatchingOwnText() { 2718 Document doc = Jsoup.parse("<div id=1>One</div><div>Two</div>"); 2719 Elements els = doc.body().getElementsMatchingOwnText("O\\w+"); 2720 assertEquals(1, els.size()); 2721 assertEquals("1", els.get(0).id()); 2722 } 2723 getElementsMatchingOwnTextValidation()2724 @Test void getElementsMatchingOwnTextValidation() { 2725 Document doc = Jsoup.parse(reference); 2726 Throwable ex = assertThrows(IllegalArgumentException.class, 2727 () -> doc.getElementsMatchingOwnText("\\x")); 2728 assertEquals("Pattern syntax error: \\x", ex.getMessage()); 2729 } 2730 hasText()2731 @Test void hasText() { 2732 Document doc = Jsoup.parse("<div id=1><p><i>One</i></p></div><div id=2>Two</div><div id=3><script>data</script> </div>"); 2733 assertTrue(doc.getElementById("1").hasText()); 2734 assertTrue(doc.getElementById("2").hasText()); 2735 assertFalse(doc.getElementById("3").hasText()); 2736 } 2737 dataInCdataNode()2738 @Test void dataInCdataNode() { 2739 Element el = new Element("div"); 2740 CDataNode cdata = new CDataNode("Some CData"); 2741 el.appendChild(cdata); 2742 assertEquals("Some CData", el.data()); 2743 2744 Document parse = Jsoup.parse("One <![CDATA[Hello]]>"); 2745 assertEquals("Hello", parse.data()); 2746 } 2747 datanodesOutputCdataInXhtml()2748 @Test void datanodesOutputCdataInXhtml() { 2749 String html = "<p><script>1 && 2</script><style>3 && 4</style> 5 && 6</p>"; 2750 Document doc = Jsoup.parse(html); // parsed as HTML 2751 String out = TextUtil.normalizeSpaces(doc.body().html()); 2752 assertEquals(html, out); 2753 Element scriptEl = doc.expectFirst("script"); 2754 DataNode scriptDataNode = (DataNode) scriptEl.childNode(0); 2755 assertEquals("1 && 2", scriptDataNode.getWholeData()); 2756 2757 doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); 2758 Element p = doc.expectFirst("p"); 2759 String xml = p.html(); 2760 assertEquals( 2761 "<script>//<![CDATA[\n" + 2762 "1 && 2\n" + 2763 "//]]></script>\n" + 2764 "<style>/*<![CDATA[*/\n" + 2765 "3 && 4\n" + 2766 "/*]]>*/</style> 5 && 6", 2767 xml); 2768 2769 Document xmlDoc = Jsoup.parse(xml, Parser.xmlParser()); 2770 assertEquals(xml, xmlDoc.html()); 2771 Element scriptXmlEl = xmlDoc.expectFirst("script"); 2772 TextNode scriptText = (TextNode) scriptXmlEl.childNode(0); 2773 assertEquals("//", scriptText.getWholeText()); 2774 CDataNode scriptCdata = (CDataNode) scriptXmlEl.childNode(1); 2775 assertEquals("\n1 && 2\n//", scriptCdata.text()); 2776 } 2777 datanodesOutputExistingCdataInXhtml()2778 @Test void datanodesOutputExistingCdataInXhtml() { 2779 String html = "<p><script>//<![CDATA[\n1 && 2\n//]]></script><style>\n/*<![CDATA[*/3 && 4\n/*]]>*/</style> 5 && 6</p>";; 2780 Document doc = Jsoup.parse(html); // parsed as HTML 2781 String out = TextUtil.normalizeSpaces(doc.body().html()); 2782 assertEquals("<p><script>//<![CDATA[1 && 2//]]></script><style>/*<![CDATA[*/3 && 4/*]]>*/</style> 5 && 6</p>", out); 2783 Element scriptEl = doc.expectFirst("script"); 2784 DataNode scriptDataNode = (DataNode) scriptEl.childNode(0); 2785 assertEquals("//<![CDATA[\n" + 2786 "1 && 2\n" + 2787 "//]]>", scriptDataNode.getWholeData()); 2788 2789 doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); 2790 Element p = doc.expectFirst("p"); 2791 String xml = p.html(); 2792 assertEquals( 2793 "<script>//<![CDATA[\n" + 2794 "1 && 2\n" + 2795 "//]]></script>\n" + 2796 "<style>\n" + 2797 "/*<![CDATA[*/3 && 4\n" + 2798 "/*]]>*/</style> 5 && 6", 2799 xml); 2800 2801 Document xmlDoc = Jsoup.parse(xml, Parser.xmlParser()); 2802 assertEquals(xml, xmlDoc.html()); 2803 Element scriptXmlEl = xmlDoc.expectFirst("script"); 2804 TextNode scriptText = (TextNode) scriptXmlEl.childNode(0); 2805 assertEquals("//", scriptText.getWholeText()); 2806 CDataNode scriptCdata = (CDataNode) scriptXmlEl.childNode(1); 2807 assertEquals("\n1 && 2\n//", scriptCdata.text()); 2808 } 2809 outerHtmlAppendable()2810 @Test void outerHtmlAppendable() { 2811 // tests not string builder flow 2812 Document doc = Jsoup.parse("<div>One</div>"); 2813 StringBuffer buffer = new StringBuffer(); 2814 doc.body().outerHtml(buffer); 2815 assertEquals("\n<body>\n <div>\n One\n </div>\n</body>", buffer.toString()); 2816 StringBuilder builder = new StringBuilder(); 2817 doc.body().outerHtml(builder); 2818 assertEquals("<body>\n <div>\n One\n </div>\n</body>", builder.toString()); 2819 } 2820 rubyInline()2821 @Test void rubyInline() { 2822 String html = "<ruby>T<rp>(</rp><rtc>!</rtc><rt>)</rt></ruby>"; 2823 Document doc = Jsoup.parse(html); 2824 assertEquals(html, doc.body().html()); 2825 } 2826 nestedFormatAsInlinePrintsAsBlock()2827 @Test void nestedFormatAsInlinePrintsAsBlock() { 2828 // https://github.com/jhy/jsoup/issues/1926 2829 String h = " <table>\n" + 2830 " <tr>\n" + 2831 " <td>\n" + 2832 " <p style=\"display:inline;\">A</p>\n" + 2833 " <p style=\"display:inline;\">B</p>\n" + 2834 " </td>\n" + 2835 " </tr>\n" + 2836 " </table>"; 2837 Document doc = Jsoup.parse(h); 2838 String out = doc.body().html(); 2839 assertEquals("<table>\n" + 2840 " <tbody>\n" + 2841 " <tr>\n" + 2842 " <td>\n" + 2843 " <p style=\"display:inline;\">A</p>\n" + 2844 " <p style=\"display:inline;\">B</p></td>\n" + 2845 " </tr>\n" + 2846 " </tbody>\n" + 2847 "</table>", out); 2848 // todo - I would prefer the </td> to wrap down there - but need to reimplement pretty printer to simplify and track indented state 2849 } 2850 emptyDetachesChildren()2851 @Test void emptyDetachesChildren() { 2852 String html = "<div><p>One<p>Two</p>Three</div>"; 2853 Document doc = Jsoup.parse(html); 2854 Element div = doc.expectFirst("div"); 2855 assertEquals(3, div.childNodeSize()); 2856 2857 List<Node> childNodes = div.childNodes(); 2858 2859 div.empty(); 2860 assertEquals(0, div.childNodeSize()); 2861 assertEquals(3, childNodes.size()); // copied before removing 2862 for (Node childNode : childNodes) { 2863 assertNull(childNode.parentNode); 2864 } 2865 2866 Element p = (Element) childNodes.get(0); 2867 assertEquals(p, p.childNode(0).parentNode()); // TextNode "One" still has parent p, as detachment is only on div element 2868 } 2869 emptyAndAddPreviousChild()2870 @Test void emptyAndAddPreviousChild() { 2871 String html = "<div><p>One<p>Two<p>Three</div>"; 2872 Document doc = Jsoup.parse(html); 2873 Element div = doc.expectFirst("div"); 2874 Element p = div.expectFirst("p"); 2875 div 2876 .empty() 2877 .appendChild(p); 2878 2879 assertEquals("<p>One</p>", div.html()); 2880 } 2881 emptyAndAddPreviousDescendant()2882 @Test void emptyAndAddPreviousDescendant() { 2883 String html = "<header><div><p>One<p>Two<p>Three</div></header>"; 2884 Document doc = Jsoup.parse(html); 2885 Element header = doc.expectFirst("header"); 2886 Element p = header.expectFirst("p"); 2887 header 2888 .empty() 2889 .appendChild(p); 2890 2891 assertEquals("<p>One</p>", header.html()); 2892 } 2893 xmlSyntaxSetsEscapeMode()2894 @Test void xmlSyntaxSetsEscapeMode() { 2895 String html = "Foo ≻"; 2896 Document doc = Jsoup.parse(html); 2897 doc.outputSettings().charset("ascii"); // so we can see the zws 2898 assertEquals("Foo ≻", doc.body().html()); 2899 2900 doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); 2901 String out = doc.body().html(); 2902 assertEquals("Foo ≻", out); 2903 2904 // can set back if desired 2905 doc.outputSettings().escapeMode(Entities.EscapeMode.extended); 2906 assertEquals("Foo ≻", doc.body().html()); // succ is alias for Succeeds, and first hit in entities 2907 } 2908 attribute()2909 @Test void attribute() { 2910 String html = "<p CLASS='yes'>One</p>"; 2911 Document doc = Jsoup.parse(html); 2912 Element p = doc.expectFirst("p"); 2913 Attribute attr = p.attribute("class"); // HTML parse lower-cases names 2914 assertNotNull(attr); 2915 assertEquals("class", attr.getKey()); 2916 assertEquals("yes", attr.getValue()); 2917 assertFalse(attr.sourceRange().nameRange().start().isTracked()); // tracking disabled 2918 2919 assertNull(p.attribute("CLASS")); // no such key 2920 2921 attr.setKey("CLASS"); // set preserves input case 2922 attr.setValue("YES"); 2923 2924 assertEquals("<p CLASS=\"YES\">One</p>", p.outerHtml()); 2925 assertEquals("CLASS=\"YES\"", attr.html()); 2926 } 2927 } 2928