1 package org.jsoup.parser; 2 3 import org.jsoup.Jsoup; 4 import org.jsoup.integration.servlets.FileServlet; 5 import org.jsoup.nodes.Attribute; 6 import org.jsoup.nodes.CDataNode; 7 import org.jsoup.nodes.Comment; 8 import org.jsoup.nodes.DataNode; 9 import org.jsoup.nodes.Document; 10 import org.jsoup.nodes.DocumentType; 11 import org.jsoup.nodes.Element; 12 import org.jsoup.nodes.LeafNode; 13 import org.jsoup.nodes.Node; 14 import org.jsoup.nodes.Range; 15 import org.jsoup.nodes.TextNode; 16 import org.jsoup.nodes.XmlDeclaration; 17 import org.jsoup.select.Elements; 18 import org.junit.jupiter.api.Test; 19 20 import java.io.IOException; 21 import java.util.List; 22 import java.util.stream.Collectors; 23 24 import static org.junit.jupiter.api.Assertions.*; 25 26 /** 27 Functional tests for the Position tracking behavior (across nodes, treebuilder, etc.) 28 */ 29 class PositionTest { 30 static Parser TrackingHtmlParser = Parser.htmlParser().setTrackPosition(true); 31 static Parser TrackingXmlParser = Parser.xmlParser().setTrackPosition(true); 32 parserTrackDefaults()33 @Test void parserTrackDefaults() { 34 Parser htmlParser = Parser.htmlParser(); 35 assertFalse(htmlParser.isTrackPosition()); 36 htmlParser.setTrackPosition(true); 37 assertTrue(htmlParser.isTrackPosition()); 38 39 Parser xmlParser = Parser.xmlParser(); 40 assertFalse(xmlParser.isTrackPosition()); 41 xmlParser.setTrackPosition(true); 42 assertTrue(xmlParser.isTrackPosition()); 43 } 44 tracksPosition()45 @Test void tracksPosition() { 46 String content = "<p id=1\n class=foo>\n<span>Hello\n ®\n there ©.</span> now.\n <!-- comment --> "; 47 Document doc = Jsoup.parse(content, TrackingHtmlParser); 48 49 Element html = doc.expectFirst("html"); 50 Element body = doc.expectFirst("body"); 51 Element p = doc.expectFirst("p"); 52 Element span = doc.expectFirst("span"); 53 TextNode text = (TextNode) span.firstChild(); 54 assertNotNull(text); 55 TextNode now = (TextNode) span.nextSibling(); 56 assertNotNull(now); 57 Comment comment = (Comment) now.nextSibling(); 58 assertNotNull(comment); 59 60 // implicit 61 assertTrue(body.sourceRange().isTracked()); 62 assertTrue(body.endSourceRange().isTracked()); 63 assertTrue(body.sourceRange().isImplicit()); 64 assertTrue(body.endSourceRange().isImplicit()); 65 Range htmlRange = html.sourceRange(); 66 assertEquals("1,1:0-1,1:0", htmlRange.toString()); 67 assertEquals(htmlRange, body.sourceRange()); 68 assertEquals(html.endSourceRange(), body.endSourceRange()); 69 70 71 Range pRange = p.sourceRange(); 72 assertEquals("1,1:0-2,12:19", pRange.toString()); 73 assertFalse(pRange.isImplicit()); 74 assertTrue(p.endSourceRange().isImplicit()); 75 assertEquals("6,19:83-6,19:83", p.endSourceRange().toString()); 76 assertEquals(p.endSourceRange(), html.endSourceRange()); 77 78 // no explicit P closer 79 Range pEndRange = p.endSourceRange(); 80 assertTrue(pEndRange.isTracked()); 81 assertTrue(pEndRange.isImplicit()); 82 83 Range.Position pStart = pRange.start(); 84 assertTrue(pStart.isTracked()); 85 assertEquals(0, pStart.pos()); 86 assertEquals(1, pStart.columnNumber()); 87 assertEquals(1, pStart.lineNumber()); 88 assertEquals("1,1:0", pStart.toString()); 89 90 Range.Position pEnd = pRange.end(); 91 assertTrue(pStart.isTracked()); 92 assertEquals(19, pEnd.pos()); 93 assertEquals(12, pEnd.columnNumber()); 94 assertEquals(2, pEnd.lineNumber()); 95 assertEquals("2,12:19", pEnd.toString()); 96 97 assertEquals("3,1:20", span.sourceRange().start().toString()); 98 assertEquals("3,7:26", span.sourceRange().end().toString()); 99 100 // span end tag 101 Range spanEnd = span.endSourceRange(); 102 assertTrue(spanEnd.isTracked()); 103 assertEquals("5,14:52-5,21:59", spanEnd.toString()); 104 105 String wholeText = text.getWholeText(); 106 assertEquals("Hello\n ®\n there ©.", wholeText); 107 String textOrig = "Hello\n ®\n there ©."; 108 Range textRange = text.sourceRange(); 109 assertEquals(textRange.end().pos() - textRange.start().pos(), textOrig.length()); 110 assertEquals("3,7:26", textRange.start().toString()); 111 assertEquals("5,14:52", textRange.end().toString()); 112 113 assertEquals("6,2:66", comment.sourceRange().start().toString()); 114 assertEquals("6,18:82", comment.sourceRange().end().toString()); 115 } 116 tracksExpectedPoppedElements()117 @Test void tracksExpectedPoppedElements() { 118 // When TreeBuilder hits a direct .pop(), vs popToClose(..) 119 String html = "<html><head><meta></head><body><img><p>One</p><p>Two</p></body></html>"; 120 Document doc = Jsoup.parse(html, TrackingHtmlParser); 121 122 StringBuilder track = new StringBuilder(); 123 doc.expectFirst("html").stream().forEach(el -> { 124 accumulatePositions(el, track); 125 assertTrue(el.sourceRange().isTracked(), el.tagName()); 126 assertTrue(el.endSourceRange().isTracked(), el.tagName()); 127 assertFalse(el.sourceRange().isImplicit(), el.tagName()); 128 assertFalse(el.endSourceRange().isImplicit(), el.tagName()); 129 }); 130 assertEquals("html:0-6~63-70; head:6-12~18-25; meta:12-18~12-18; body:25-31~56-63; img:31-36~31-36; p:36-39~42-46; p:46-49~52-56; ", track.toString()); 131 132 StringBuilder textTrack = new StringBuilder(); 133 doc.nodeStream(TextNode.class).forEach(text -> accumulatePositions(text, textTrack)); 134 assertEquals("#text:39-42; #text:49-52; ", textTrack.toString()); 135 } 136 accumulatePositions(Node node, StringBuilder sb)137 static void accumulatePositions(Node node, StringBuilder sb) { 138 sb 139 .append(node.nodeName()) 140 .append(':') 141 .append(node.sourceRange().startPos()) 142 .append('-') 143 .append(node.sourceRange().endPos()); 144 145 if (node instanceof Element) { 146 Element el = (Element) node; 147 sb 148 .append("~") 149 .append(el.endSourceRange().startPos()) 150 .append('-') 151 .append(el.endSourceRange().endPos()); 152 } 153 sb.append("; "); 154 } 155 tracksImplicitPoppedElements()156 @Test void tracksImplicitPoppedElements() { 157 // When TreeBuilder hits a direct .pop(), vs popToClose(..) 158 String html = "<meta><img><p>One<p>Two<p>Three"; 159 Document doc = Jsoup.parse(html, TrackingHtmlParser); 160 161 StringBuilder track = new StringBuilder(); 162 doc.expectFirst("html").stream().forEach(el -> { 163 assertTrue(el.sourceRange().isTracked()); 164 assertTrue(el.endSourceRange().isTracked()); 165 accumulatePositions(el, track); 166 }); 167 168 assertTrue(doc.expectFirst("p").endSourceRange().isImplicit()); 169 assertFalse(doc.expectFirst("meta").endSourceRange().isImplicit()); 170 assertEquals("html:0-0~31-31; head:0-0~6-6; meta:0-6~0-6; body:6-6~31-31; img:6-11~6-11; p:11-14~17-17; p:17-20~23-23; p:23-26~31-31; ", track.toString()); 171 } printRange(Node node)172 private void printRange(Node node) { 173 if (node instanceof Element) { 174 Element el = (Element) node; 175 System.out.println(el.tagName() + "\t" 176 + el.sourceRange().start().pos() + "-" + el.sourceRange().end().pos() 177 + "\t... " 178 + el.endSourceRange().start().pos() + "-" + el.endSourceRange().end().pos() 179 ); 180 } else { 181 System.out.println(node.nodeName() + "\t" 182 + node.sourceRange().start().pos() + "-" + node.sourceRange().end().pos() 183 ); 184 } 185 } 186 tracksMarkup()187 @Test void tracksMarkup() { 188 String html = "<!doctype\nhtml>\n<title>jsoup ©\n2022</title><body>\n<![CDATA[\n<jsoup>\n]]>"; 189 Document doc = Jsoup.parse(html, TrackingHtmlParser); 190 191 DocumentType doctype = doc.documentType(); 192 assertNotNull(doctype); 193 assertEquals("html", doctype.name()); 194 assertEquals("1,1:0-2,6:15", doctype.sourceRange().toString()); 195 196 Element title = doc.expectFirst("title"); 197 TextNode titleText = (TextNode) title.firstChild(); 198 assertNotNull(titleText); 199 assertEquals("jsoup ©\n2022", title.text()); 200 assertEquals(titleText.getWholeText(), title.text()); 201 assertEquals("3,1:16-3,8:23", title.sourceRange().toString()); 202 assertEquals("3,8:23-4,5:40", titleText.sourceRange().toString()); 203 204 CDataNode cdata = (CDataNode) doc.body().childNode(1); 205 assertEquals("\n<jsoup>\n", cdata.text()); 206 assertEquals("5,1:55-7,4:76", cdata.sourceRange().toString()); 207 } 208 tracksDataNodes()209 @Test void tracksDataNodes() { 210 String html = "<head>\n<script>foo;\nbar()\n5 <= 4;</script>"; 211 Document doc = Jsoup.parse(html, TrackingHtmlParser); 212 213 Element script = doc.expectFirst("script"); 214 assertNotNull(script); 215 assertEquals("2,1:7-2,9:15", script.sourceRange().toString()); 216 DataNode data = (DataNode) script.firstChild(); 217 assertNotNull(data); 218 assertEquals("2,9:15-4,8:33", data.sourceRange().toString()); 219 } 220 tracksXml()221 @Test void tracksXml() { 222 String xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!doctype html>\n<rss url=foo>\nXML\n</rss>\n<!-- comment -->"; 223 Document doc = Jsoup.parse(xml, TrackingXmlParser); 224 225 XmlDeclaration decl = (XmlDeclaration) doc.childNode(0); 226 assertEquals("1,1:0-1,39:38", decl.sourceRange().toString()); 227 228 DocumentType doctype = (DocumentType) doc.childNode(2); 229 assertEquals("2,1:39-2,16:54", doctype.sourceRange().toString()); 230 231 Element rss = doc.firstElementChild(); 232 assertNotNull(rss); 233 assertEquals("3,1:55-3,14:68", rss.sourceRange().toString()); 234 assertEquals("5,1:73-5,7:79", rss.endSourceRange().toString()); 235 236 TextNode text = (TextNode) rss.firstChild(); 237 assertNotNull(text); 238 assertEquals("3,14:68-5,1:73", text.sourceRange().toString()); 239 240 Comment comment = (Comment) rss.nextSibling().nextSibling(); 241 assertEquals("6,1:80-6,17:96", comment.sourceRange().toString()); 242 } 243 tracksFromFetch()244 @Test void tracksFromFetch() throws IOException { 245 String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K 246 Document doc = Jsoup.connect(url).parser(TrackingHtmlParser).get(); 247 248 Element firstP = doc.expectFirst("p"); 249 assertNotNull(firstP); 250 assertEquals("4,1:53-4,4:56", firstP.sourceRange().toString()); 251 252 Element p = doc.expectFirst("#xy"); 253 assertNotNull(p); 254 assertEquals("1000,1:279646-1000,10:279655", p.sourceRange().toString()); 255 assertEquals("1000,567:280212-1000,571:280216", p.endSourceRange().toString()); 256 257 TextNode text = (TextNode) p.firstChild(); 258 assertNotNull(text); 259 assertEquals("1000,10:279655-1000,357:280002", text.sourceRange().toString()); 260 } 261 tracksFromXmlFetch()262 @Test void tracksFromXmlFetch() throws IOException { 263 String url = FileServlet.urlTo("/htmltests/test-rss.xml"); 264 Document doc = Jsoup.connect(url).parser(TrackingXmlParser).get(); 265 266 Element item = doc.expectFirst("item + item"); 267 assertNotNull(item); 268 assertEquals("13,5:496-13,11:502", item.sourceRange().toString()); 269 assertEquals("17,5:779-17,12:786", item.endSourceRange().toString()); 270 } 271 tracksTableMovedText()272 @Test void tracksTableMovedText() { 273 String html = "<table>foo<tr>bar<td>baz</td>qux</tr>coo</table>"; 274 Document doc = Jsoup.parse(html, TrackingHtmlParser); 275 276 StringBuilder track = new StringBuilder(); 277 List<TextNode> textNodes = doc.nodeStream(TextNode.class) 278 .peek(node -> accumulatePositions(node, track)) 279 .collect(Collectors.toList()); 280 281 assertEquals(5, textNodes.size()); 282 assertEquals("foo", textNodes.get(0).text()); 283 assertEquals("bar", textNodes.get(1).text()); 284 assertEquals("baz", textNodes.get(2).text()); 285 assertEquals("qux", textNodes.get(3).text()); 286 assertEquals("coo", textNodes.get(4).text()); 287 288 assertEquals("#text:7-10; #text:14-17; #text:21-24; #text:29-32; #text:37-40; ", track.toString()); 289 } 290 tracksClosingHtmlTagsInXml()291 @Test void tracksClosingHtmlTagsInXml() { 292 // verifies https://github.com/jhy/jsoup/issues/1935 293 String xml = "<p>One</p><title>Two</title><data>Three</data>"; 294 Document doc = Jsoup.parse(xml, TrackingXmlParser); 295 Elements els = doc.children(); 296 for (Element el : els) { 297 assertTrue(el.sourceRange().isTracked()); 298 assertTrue(el.endSourceRange().isTracked()); 299 } 300 } 301 tracksClosingHeadingTags()302 @Test void tracksClosingHeadingTags() { 303 // https://github.com/jhy/jsoup/issues/1987 304 String html = "<h1>One</h1><h2>Two</h2><h10>Ten</h10>"; 305 Document doc = Jsoup.parse(html, TrackingHtmlParser); 306 307 Elements els = doc.body().children(); 308 for (Element el : els) { 309 assertTrue(el.sourceRange().isTracked()); 310 assertTrue(el.endSourceRange().isTracked()); 311 } 312 313 Element h2 = doc.expectFirst("h2"); 314 assertEquals("1,13:12-1,17:16", h2.sourceRange().toString()); 315 assertEquals("1,20:19-1,25:24", h2.endSourceRange().toString()); 316 } 317 tracksAttributes()318 @Test void tracksAttributes() { 319 String html = "<div one=\"Hello there\" id=1 class=foo attr1 = \"bar & qux\" attr2='val > x' attr3=\"\" attr4 attr5>Text"; 320 Document doc = Jsoup.parse(html, TrackingHtmlParser); 321 322 Element div = doc.expectFirst("div"); 323 324 StringBuilder track = new StringBuilder(); 325 for (Attribute attr : div.attributes()) { 326 327 Range.AttributeRange attrRange = attr.sourceRange(); 328 assertTrue(attrRange.nameRange().isTracked()); 329 assertTrue(attrRange.valueRange().isTracked()); 330 assertSame(attrRange, div.attributes().sourceRange(attr.getKey())); 331 332 assertFalse(attrRange.nameRange().isImplicit()); 333 if (attr.getValue().isEmpty()) 334 assertTrue(attrRange.valueRange().isImplicit()); 335 else 336 assertFalse(attrRange.valueRange().isImplicit()); 337 338 accumulatePositions(attr, track); 339 } 340 341 assertEquals("one:5-8=10-21; id:23-25=26-27; class:28-33=34-37; attr1:38-43=47-60; attr2:62-67=69-78; attr3:80-85=85-85; attr4:89-94=94-94; attr5:95-100=100-100; ", track.toString()); 342 } 343 tracksAttributesAcrossLines()344 @Test void tracksAttributesAcrossLines() { 345 String html = "<div one=\"Hello\nthere\" \nid=1 \nclass=\nfoo\nattr5>Text"; 346 Document doc = Jsoup.parse(html, TrackingHtmlParser); 347 348 Element div = doc.expectFirst("div"); 349 350 StringBuilder track = new StringBuilder(); 351 for (Attribute attr : div.attributes()) { 352 Range.AttributeRange attrRange = attr.sourceRange(); 353 assertTrue(attrRange.nameRange().isTracked()); 354 assertTrue(attrRange.valueRange().isTracked()); 355 assertSame(attrRange, div.attributes().sourceRange(attr.getKey())); 356 assertFalse(attrRange.nameRange().isImplicit()); 357 if (attr.getValue().isEmpty()) 358 assertTrue(attrRange.valueRange().isImplicit()); 359 else 360 assertFalse(attrRange.valueRange().isImplicit()); 361 accumulatePositions(attr, track); 362 } 363 364 String value = div.attributes().get("class"); 365 assertEquals("foo", value); 366 Range.AttributeRange foo = div.attributes().sourceRange("class"); 367 assertEquals("4,1:30-4,6:35=5,1:37-5,4:40", foo.toString()); 368 369 assertEquals("one:5-8=10-21; id:24-26=27-28; class:30-35=37-40; attr5:41-46=46-46; ", track.toString()); 370 } 371 trackAttributePositionInFirstElement()372 @Test void trackAttributePositionInFirstElement() { 373 String html = "<html lang=en class=dark><p hidden></p></html>"; 374 375 Document htmlDoc = Jsoup.parse(html, TrackingHtmlParser); 376 StringBuilder htmlPos = new StringBuilder(); 377 htmlDoc.expectFirst("html").nodeStream().forEach(node -> { 378 accumulatePositions(node, htmlPos); 379 accumulateAttributePositions(node, htmlPos); 380 }); 381 382 assertEquals("html:0-25~39-46; lang:6-10=11-13; class:14-19=20-24; head:25-25~25-25; body:25-25~46-46; p:25-35~35-39; hidden:28-34=34-34; ", htmlPos.toString()); 383 384 Document xmlDoc = Jsoup.parse(html, TrackingXmlParser); 385 StringBuilder xmlPos = new StringBuilder(); 386 xmlDoc.expectFirst("html").nodeStream().forEach(node -> { 387 accumulatePositions(node, xmlPos); 388 accumulateAttributePositions(node, xmlPos); 389 }); 390 391 assertEquals("html:0-25~39-46; lang:6-10=11-13; class:14-19=20-24; p:25-35~35-39; hidden:28-34=34-34; ", xmlPos.toString()); 392 } 393 trackAttributePositionWithCase()394 @Test void trackAttributePositionWithCase() { 395 String pomXml = "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n" + 396 " <modelVersion>4.0.0</modelVersion>"; 397 398 Document htmlDoc = Jsoup.parse(pomXml, TrackingHtmlParser); 399 StringBuilder htmlPos = new StringBuilder(); 400 htmlDoc.expectFirst("html").nodeStream().forEach(node -> { 401 accumulatePositions(node, htmlPos); 402 accumulateAttributePositions(node, htmlPos); 403 }); 404 405 assertEquals("html:0-0~243-243; head:0-0~0-0; body:0-0~243-243; project:0-204~243-243; xmlns:9-14=16-49; xmlns:xsi:51-60=62-103; xsi:schemalocation:105-123=125-202; #text:204-209; modelversion:209-223~228-243; #text:223-228; ", htmlPos.toString()); 406 407 Document xmlDoc = Jsoup.parse(pomXml, TrackingXmlParser); 408 StringBuilder xmlPos = new StringBuilder(); 409 xmlDoc.expectFirst("project").nodeStream().forEach(node -> { 410 accumulatePositions(node, xmlPos); 411 accumulateAttributePositions(node, xmlPos); 412 }); 413 414 assertEquals("project:0-204~243-243; xmlns:9-14=16-49; xmlns:xsi:51-60=62-103; xsi:schemaLocation:105-123=125-202; #text:204-209; modelVersion:209-223~228-243; #text:223-228; ", xmlPos.toString()); 415 416 Document xmlDocLc = Jsoup.parse(pomXml, Parser.xmlParser().setTrackPosition(true).settings(new ParseSettings(false, false))); 417 StringBuilder xmlPosLc = new StringBuilder(); 418 xmlDocLc.expectFirst("project").nodeStream().forEach(node -> { 419 accumulatePositions(node, xmlPosLc); 420 accumulateAttributePositions(node, xmlPosLc); 421 }); 422 423 assertEquals("project:0-204~243-243; xmlns:9-14=16-49; xmlns:xsi:51-60=62-103; xsi:schemalocation:105-123=125-202; #text:204-209; modelversion:209-223~228-243; #text:223-228; ", xmlPosLc.toString()); 424 } 425 426 trackAttributesPositionsDedupes()427 @Test void trackAttributesPositionsDedupes() { 428 String html = "<p id=1 id=2 Id=3 Id=4 id=5 Id=6>"; 429 Document htmlDoc = Jsoup.parse(html, TrackingHtmlParser); 430 Document htmlDocUc = Jsoup.parse(html, Parser.htmlParser().setTrackPosition(true).settings(new ParseSettings(true, true))); 431 Document xmlDoc = Jsoup.parse(html, TrackingXmlParser); 432 Document xmlDocLc = Jsoup.parse(html, Parser.xmlParser().setTrackPosition(true).settings(new ParseSettings(false, false))); 433 434 StringBuilder htmlPos = new StringBuilder(); 435 StringBuilder htmlUcPos = new StringBuilder(); 436 StringBuilder xmlPos = new StringBuilder(); 437 StringBuilder xmlLcPos = new StringBuilder(); 438 439 accumulateAttributePositions(htmlDoc .expectFirst("p"), htmlPos); 440 accumulateAttributePositions(htmlDocUc .expectFirst("p"), htmlUcPos); 441 accumulateAttributePositions(xmlDoc .expectFirst("p"), xmlPos); 442 accumulateAttributePositions(xmlDocLc .expectFirst("p"), xmlLcPos); 443 444 assertEquals("id:3-5=6-7; ", htmlPos .toString()); 445 assertEquals("id:3-5=6-7; Id:13-15=16-17; ", htmlUcPos .toString()); 446 assertEquals("id:3-5=6-7; Id:13-15=16-17; ", xmlPos .toString()); 447 assertEquals("id:3-5=6-7; ", xmlLcPos .toString()); 448 } 449 trackAttributesPositionsDirectionalDedupes()450 @Test void trackAttributesPositionsDirectionalDedupes() { 451 String html = "<p Id=1 id=2 Id=3 Id=4 id=5 Id=6>"; 452 Document htmlDoc = Jsoup.parse(html, TrackingHtmlParser); 453 Document htmlDocUc = Jsoup.parse(html, Parser.htmlParser().setTrackPosition(true).settings(new ParseSettings(true, true))); 454 Document xmlDoc = Jsoup.parse(html, TrackingXmlParser); 455 Document xmlDocLc = Jsoup.parse(html, Parser.xmlParser().setTrackPosition(true).settings(new ParseSettings(false, false))); 456 457 StringBuilder htmlPos = new StringBuilder(); 458 StringBuilder htmlUcPos = new StringBuilder(); 459 StringBuilder xmlPos = new StringBuilder(); 460 StringBuilder xmlLcPos = new StringBuilder(); 461 462 accumulateAttributePositions(htmlDoc .expectFirst("p"), htmlPos); 463 accumulateAttributePositions(htmlDocUc .expectFirst("p"), htmlUcPos); 464 accumulateAttributePositions(xmlDoc .expectFirst("p"), xmlPos); 465 accumulateAttributePositions(xmlDocLc .expectFirst("p"), xmlLcPos); 466 467 assertEquals("id:3-5=6-7; ", htmlPos .toString()); 468 assertEquals("Id:3-5=6-7; id:8-10=11-12; ", htmlUcPos .toString()); 469 assertEquals("Id:3-5=6-7; id:8-10=11-12; ", xmlPos .toString()); 470 assertEquals("id:3-5=6-7; ", xmlLcPos .toString()); 471 } 472 tracksFrag()473 @Test void tracksFrag() { 474 // https://github.com/jhy/jsoup/issues/2068 475 String html = "<h1 id=1>One</h1>\n<h2 id=2>Two</h2><h10>Ten</h10>"; 476 Document shellDoc = Document.createShell(""); 477 478 List<Node> nodes = TrackingHtmlParser.parseFragmentInput(html, shellDoc.body(), shellDoc.baseUri()); 479 StringBuilder track = new StringBuilder(); 480 481 // nodes is the top level nodes - want to descend to check all tracked OK 482 nodes.forEach(node -> node.nodeStream().forEach(descend -> { 483 accumulatePositions(descend, track); 484 accumulateAttributePositions(descend, track); 485 })); 486 487 assertEquals("h1:0-9~12-17; id:4-6=7-8; #text:9-12; #text:17-18; h2:18-27~30-35; id:22-24=25-26; #text:27-30; h10:35-40~43-49; #text:40-43; ", track.toString()); 488 } 489 updateKeyMaintainsRangeLc()490 @Test void updateKeyMaintainsRangeLc() { 491 String html = "<p xsi:CLASS=On>One</p>"; 492 Document doc = Jsoup.parse(html, TrackingHtmlParser); 493 Element p = doc.expectFirst("p"); 494 Attribute attr = p.attribute("xsi:class"); 495 assertNotNull(attr); 496 497 String expectedRange = "1,4:3-1,13:12=1,14:13-1,16:15"; 498 assertEquals(expectedRange, attr.sourceRange().toString()); 499 attr.setKey("class"); 500 assertEquals(expectedRange, attr.sourceRange().toString()); 501 assertEquals("class=\"On\"", attr.html()); 502 } 503 updateKeyMaintainsRangeUc()504 @Test void updateKeyMaintainsRangeUc() { 505 String html = "<p xsi:CLASS=On>One</p>"; 506 Document doc = Jsoup.parse(html, TrackingXmlParser); 507 Element p = doc.expectFirst("p"); 508 Attribute attr = p.attribute("xsi:CLASS"); 509 assertNotNull(attr); 510 511 String expectedRange = "1,4:3-1,13:12=1,14:13-1,16:15"; 512 assertEquals(expectedRange, attr.sourceRange().toString()); 513 attr.setKey("class"); 514 assertEquals(expectedRange, attr.sourceRange().toString()); 515 assertEquals("class=\"On\"", attr.html()); 516 517 attr.setKey("CLASSY"); 518 assertEquals(expectedRange, attr.sourceRange().toString()); 519 assertEquals("CLASSY=\"On\"", attr.html()); 520 521 attr.setValue("To"); 522 assertEquals(expectedRange, attr.sourceRange().toString()); 523 assertEquals("CLASSY=\"To\"", attr.html()); 524 525 assertEquals("<p CLASSY=\"To\">One</p>", p.outerHtml()); 526 527 p.attr("CLASSY", "Tree"); 528 assertEquals(expectedRange, attr.sourceRange().toString()); 529 assertEquals("CLASSY=\"To\"", attr.html()); // changes in this direction do not get to the attribute as it's not connected that way 530 531 Attribute attr2 = p.attribute("CLASSY"); 532 assertEquals("CLASSY=\"Tree\"", attr2.html()); 533 assertEquals(expectedRange, attr2.sourceRange().toString()); 534 } 535 accumulateAttributePositions(Node node, StringBuilder sb)536 static void accumulateAttributePositions(Node node, StringBuilder sb) { 537 if (node instanceof LeafNode) return; // leafnode pseudo attributes are not tracked 538 for (Attribute attribute : node.attributes()) { 539 accumulatePositions(attribute, sb); 540 } 541 } 542 accumulatePositions(Attribute attr, StringBuilder sb)543 static void accumulatePositions(Attribute attr, StringBuilder sb) { 544 Range.AttributeRange range = attr.sourceRange(); 545 546 sb 547 .append(attr.getKey()) 548 .append(':') 549 .append(range.nameRange().startPos()) 550 .append('-') 551 .append(range.nameRange().endPos()) 552 553 .append('=') 554 .append(range.valueRange().startPos()) 555 .append('-') 556 .append(range.valueRange().endPos()); 557 558 sb.append("; "); 559 } 560 }