xref: /aosp_15_r20/external/jsoup/src/test/java/org/jsoup/parser/PositionTest.java (revision 6da8f8c4bc310ad659121b84dd089062417a2ce2)
1 package org.jsoup.parser;
2 
3 import org.jsoup.Jsoup;
4 import org.jsoup.integration.servlets.FileServlet;
5 import org.jsoup.nodes.Attribute;
6 import org.jsoup.nodes.CDataNode;
7 import org.jsoup.nodes.Comment;
8 import org.jsoup.nodes.DataNode;
9 import org.jsoup.nodes.Document;
10 import org.jsoup.nodes.DocumentType;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.nodes.LeafNode;
13 import org.jsoup.nodes.Node;
14 import org.jsoup.nodes.Range;
15 import org.jsoup.nodes.TextNode;
16 import org.jsoup.nodes.XmlDeclaration;
17 import org.jsoup.select.Elements;
18 import org.junit.jupiter.api.Test;
19 
20 import java.io.IOException;
21 import java.util.List;
22 import java.util.stream.Collectors;
23 
24 import static org.junit.jupiter.api.Assertions.*;
25 
26 /**
27  Functional tests for the Position tracking behavior (across nodes, treebuilder, etc.)
28  */
29 class PositionTest {
30     static Parser TrackingHtmlParser = Parser.htmlParser().setTrackPosition(true);
31     static Parser TrackingXmlParser = Parser.xmlParser().setTrackPosition(true);
32 
parserTrackDefaults()33     @Test void parserTrackDefaults() {
34         Parser htmlParser = Parser.htmlParser();
35         assertFalse(htmlParser.isTrackPosition());
36         htmlParser.setTrackPosition(true);
37         assertTrue(htmlParser.isTrackPosition());
38 
39         Parser xmlParser = Parser.xmlParser();
40         assertFalse(xmlParser.isTrackPosition());
41         xmlParser.setTrackPosition(true);
42         assertTrue(xmlParser.isTrackPosition());
43     }
44 
tracksPosition()45     @Test void tracksPosition() {
46         String content = "<p id=1\n class=foo>\n<span>Hello\n &reg;\n there &copy.</span> now.\n <!-- comment --> ";
47         Document doc = Jsoup.parse(content, TrackingHtmlParser);
48 
49         Element html = doc.expectFirst("html");
50         Element body = doc.expectFirst("body");
51         Element p = doc.expectFirst("p");
52         Element span = doc.expectFirst("span");
53         TextNode text = (TextNode) span.firstChild();
54         assertNotNull(text);
55         TextNode now = (TextNode) span.nextSibling();
56         assertNotNull(now);
57         Comment comment = (Comment) now.nextSibling();
58         assertNotNull(comment);
59 
60         // implicit
61         assertTrue(body.sourceRange().isTracked());
62         assertTrue(body.endSourceRange().isTracked());
63         assertTrue(body.sourceRange().isImplicit());
64         assertTrue(body.endSourceRange().isImplicit());
65         Range htmlRange = html.sourceRange();
66         assertEquals("1,1:0-1,1:0", htmlRange.toString());
67         assertEquals(htmlRange, body.sourceRange());
68         assertEquals(html.endSourceRange(), body.endSourceRange());
69 
70 
71         Range pRange = p.sourceRange();
72         assertEquals("1,1:0-2,12:19", pRange.toString());
73         assertFalse(pRange.isImplicit());
74         assertTrue(p.endSourceRange().isImplicit());
75         assertEquals("6,19:83-6,19:83", p.endSourceRange().toString());
76         assertEquals(p.endSourceRange(), html.endSourceRange());
77 
78         // no explicit P closer
79         Range pEndRange = p.endSourceRange();
80         assertTrue(pEndRange.isTracked());
81         assertTrue(pEndRange.isImplicit());
82 
83         Range.Position pStart = pRange.start();
84         assertTrue(pStart.isTracked());
85         assertEquals(0, pStart.pos());
86         assertEquals(1, pStart.columnNumber());
87         assertEquals(1, pStart.lineNumber());
88         assertEquals("1,1:0", pStart.toString());
89 
90         Range.Position pEnd = pRange.end();
91         assertTrue(pStart.isTracked());
92         assertEquals(19, pEnd.pos());
93         assertEquals(12, pEnd.columnNumber());
94         assertEquals(2, pEnd.lineNumber());
95         assertEquals("2,12:19", pEnd.toString());
96 
97         assertEquals("3,1:20", span.sourceRange().start().toString());
98         assertEquals("3,7:26", span.sourceRange().end().toString());
99 
100         // span end tag
101         Range spanEnd = span.endSourceRange();
102         assertTrue(spanEnd.isTracked());
103         assertEquals("5,14:52-5,21:59", spanEnd.toString());
104 
105         String wholeText = text.getWholeText();
106         assertEquals("Hello\n ®\n there ©.", wholeText);
107         String textOrig = "Hello\n &reg;\n there &copy.";
108         Range textRange = text.sourceRange();
109         assertEquals(textRange.end().pos() -  textRange.start().pos(), textOrig.length());
110         assertEquals("3,7:26", textRange.start().toString());
111         assertEquals("5,14:52", textRange.end().toString());
112 
113         assertEquals("6,2:66", comment.sourceRange().start().toString());
114         assertEquals("6,18:82", comment.sourceRange().end().toString());
115     }
116 
tracksExpectedPoppedElements()117     @Test void tracksExpectedPoppedElements() {
118         // When TreeBuilder hits a direct .pop(), vs popToClose(..)
119         String html = "<html><head><meta></head><body><img><p>One</p><p>Two</p></body></html>";
120         Document doc = Jsoup.parse(html, TrackingHtmlParser);
121 
122         StringBuilder track = new StringBuilder();
123         doc.expectFirst("html").stream().forEach(el -> {
124             accumulatePositions(el, track);
125             assertTrue(el.sourceRange().isTracked(), el.tagName());
126             assertTrue(el.endSourceRange().isTracked(), el.tagName());
127             assertFalse(el.sourceRange().isImplicit(), el.tagName());
128             assertFalse(el.endSourceRange().isImplicit(), el.tagName());
129         });
130         assertEquals("html:0-6~63-70; head:6-12~18-25; meta:12-18~12-18; body:25-31~56-63; img:31-36~31-36; p:36-39~42-46; p:46-49~52-56; ", track.toString());
131 
132         StringBuilder textTrack = new StringBuilder();
133         doc.nodeStream(TextNode.class).forEach(text -> accumulatePositions(text, textTrack));
134         assertEquals("#text:39-42; #text:49-52; ", textTrack.toString());
135     }
136 
accumulatePositions(Node node, StringBuilder sb)137     static void accumulatePositions(Node node, StringBuilder sb) {
138         sb
139             .append(node.nodeName())
140             .append(':')
141             .append(node.sourceRange().startPos())
142             .append('-')
143             .append(node.sourceRange().endPos());
144 
145         if (node instanceof Element) {
146             Element el = (Element) node;
147             sb
148                 .append("~")
149                 .append(el.endSourceRange().startPos())
150                 .append('-')
151                 .append(el.endSourceRange().endPos());
152         }
153         sb.append("; ");
154     }
155 
tracksImplicitPoppedElements()156     @Test void tracksImplicitPoppedElements() {
157         // When TreeBuilder hits a direct .pop(), vs popToClose(..)
158         String html = "<meta><img><p>One<p>Two<p>Three";
159         Document doc = Jsoup.parse(html, TrackingHtmlParser);
160 
161         StringBuilder track = new StringBuilder();
162         doc.expectFirst("html").stream().forEach(el -> {
163             assertTrue(el.sourceRange().isTracked());
164             assertTrue(el.endSourceRange().isTracked());
165             accumulatePositions(el, track);
166         });
167 
168         assertTrue(doc.expectFirst("p").endSourceRange().isImplicit());
169         assertFalse(doc.expectFirst("meta").endSourceRange().isImplicit());
170         assertEquals("html:0-0~31-31; head:0-0~6-6; meta:0-6~0-6; body:6-6~31-31; img:6-11~6-11; p:11-14~17-17; p:17-20~23-23; p:23-26~31-31; ", track.toString());
171     }
printRange(Node node)172     private void printRange(Node node) {
173         if (node instanceof Element) {
174             Element el = (Element) node;
175             System.out.println(el.tagName() + "\t"
176                 + el.sourceRange().start().pos() + "-" + el.sourceRange().end().pos()
177                 + "\t... "
178                 + el.endSourceRange().start().pos() + "-" + el.endSourceRange().end().pos()
179             );
180         } else {
181             System.out.println(node.nodeName() + "\t"
182                 + node.sourceRange().start().pos() + "-" + node.sourceRange().end().pos()
183             );
184         }
185     }
186 
tracksMarkup()187     @Test void tracksMarkup() {
188         String html = "<!doctype\nhtml>\n<title>jsoup &copy;\n2022</title><body>\n<![CDATA[\n<jsoup>\n]]>";
189         Document doc = Jsoup.parse(html, TrackingHtmlParser);
190 
191         DocumentType doctype = doc.documentType();
192         assertNotNull(doctype);
193         assertEquals("html", doctype.name());
194         assertEquals("1,1:0-2,6:15", doctype.sourceRange().toString());
195 
196         Element title = doc.expectFirst("title");
197         TextNode titleText = (TextNode) title.firstChild();
198         assertNotNull(titleText);
199         assertEquals("jsoup ©\n2022", title.text());
200         assertEquals(titleText.getWholeText(), title.text());
201         assertEquals("3,1:16-3,8:23", title.sourceRange().toString());
202         assertEquals("3,8:23-4,5:40", titleText.sourceRange().toString());
203 
204         CDataNode cdata = (CDataNode) doc.body().childNode(1);
205         assertEquals("\n<jsoup>\n", cdata.text());
206         assertEquals("5,1:55-7,4:76", cdata.sourceRange().toString());
207     }
208 
tracksDataNodes()209     @Test void tracksDataNodes() {
210         String html = "<head>\n<script>foo;\nbar()\n5 <= 4;</script>";
211         Document doc = Jsoup.parse(html, TrackingHtmlParser);
212 
213         Element script = doc.expectFirst("script");
214         assertNotNull(script);
215         assertEquals("2,1:7-2,9:15", script.sourceRange().toString());
216         DataNode data = (DataNode) script.firstChild();
217         assertNotNull(data);
218         assertEquals("2,9:15-4,8:33", data.sourceRange().toString());
219     }
220 
tracksXml()221     @Test void tracksXml() {
222         String xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!doctype html>\n<rss url=foo>\nXML\n</rss>\n<!-- comment -->";
223         Document doc = Jsoup.parse(xml, TrackingXmlParser);
224 
225         XmlDeclaration decl = (XmlDeclaration) doc.childNode(0);
226         assertEquals("1,1:0-1,39:38", decl.sourceRange().toString());
227 
228         DocumentType doctype = (DocumentType) doc.childNode(2);
229         assertEquals("2,1:39-2,16:54", doctype.sourceRange().toString());
230 
231         Element rss = doc.firstElementChild();
232         assertNotNull(rss);
233         assertEquals("3,1:55-3,14:68", rss.sourceRange().toString());
234         assertEquals("5,1:73-5,7:79", rss.endSourceRange().toString());
235 
236         TextNode text = (TextNode) rss.firstChild();
237         assertNotNull(text);
238         assertEquals("3,14:68-5,1:73", text.sourceRange().toString());
239 
240         Comment comment = (Comment) rss.nextSibling().nextSibling();
241         assertEquals("6,1:80-6,17:96", comment.sourceRange().toString());
242     }
243 
tracksFromFetch()244     @Test void tracksFromFetch() throws IOException {
245         String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K
246         Document doc = Jsoup.connect(url).parser(TrackingHtmlParser).get();
247 
248         Element firstP = doc.expectFirst("p");
249         assertNotNull(firstP);
250         assertEquals("4,1:53-4,4:56", firstP.sourceRange().toString());
251 
252         Element p = doc.expectFirst("#xy");
253         assertNotNull(p);
254         assertEquals("1000,1:279646-1000,10:279655", p.sourceRange().toString());
255         assertEquals("1000,567:280212-1000,571:280216", p.endSourceRange().toString());
256 
257         TextNode text = (TextNode) p.firstChild();
258         assertNotNull(text);
259         assertEquals("1000,10:279655-1000,357:280002", text.sourceRange().toString());
260     }
261 
tracksFromXmlFetch()262     @Test void tracksFromXmlFetch() throws IOException {
263         String url = FileServlet.urlTo("/htmltests/test-rss.xml");
264         Document doc = Jsoup.connect(url).parser(TrackingXmlParser).get();
265 
266         Element item = doc.expectFirst("item + item");
267         assertNotNull(item);
268         assertEquals("13,5:496-13,11:502", item.sourceRange().toString());
269         assertEquals("17,5:779-17,12:786", item.endSourceRange().toString());
270     }
271 
tracksTableMovedText()272     @Test void tracksTableMovedText() {
273         String html = "<table>foo<tr>bar<td>baz</td>qux</tr>coo</table>";
274         Document doc = Jsoup.parse(html, TrackingHtmlParser);
275 
276         StringBuilder track = new StringBuilder();
277         List<TextNode> textNodes = doc.nodeStream(TextNode.class)
278             .peek(node -> accumulatePositions(node, track))
279             .collect(Collectors.toList());
280 
281         assertEquals(5, textNodes.size());
282         assertEquals("foo", textNodes.get(0).text());
283         assertEquals("bar", textNodes.get(1).text());
284         assertEquals("baz", textNodes.get(2).text());
285         assertEquals("qux", textNodes.get(3).text());
286         assertEquals("coo", textNodes.get(4).text());
287 
288         assertEquals("#text:7-10; #text:14-17; #text:21-24; #text:29-32; #text:37-40; ", track.toString());
289     }
290 
tracksClosingHtmlTagsInXml()291     @Test void tracksClosingHtmlTagsInXml() {
292         // verifies https://github.com/jhy/jsoup/issues/1935
293         String xml = "<p>One</p><title>Two</title><data>Three</data>";
294         Document doc = Jsoup.parse(xml, TrackingXmlParser);
295         Elements els = doc.children();
296         for (Element el : els) {
297             assertTrue(el.sourceRange().isTracked());
298             assertTrue(el.endSourceRange().isTracked());
299         }
300     }
301 
tracksClosingHeadingTags()302     @Test void tracksClosingHeadingTags() {
303         // https://github.com/jhy/jsoup/issues/1987
304         String html = "<h1>One</h1><h2>Two</h2><h10>Ten</h10>";
305         Document doc = Jsoup.parse(html, TrackingHtmlParser);
306 
307         Elements els = doc.body().children();
308         for (Element el : els) {
309             assertTrue(el.sourceRange().isTracked());
310             assertTrue(el.endSourceRange().isTracked());
311         }
312 
313         Element h2 = doc.expectFirst("h2");
314         assertEquals("1,13:12-1,17:16", h2.sourceRange().toString());
315         assertEquals("1,20:19-1,25:24", h2.endSourceRange().toString());
316     }
317 
tracksAttributes()318     @Test void tracksAttributes() {
319         String html = "<div one=\"Hello there\" id=1 class=foo attr1 = \"bar &amp; qux\" attr2='val &gt x' attr3=\"\" attr4 attr5>Text";
320         Document doc = Jsoup.parse(html, TrackingHtmlParser);
321 
322         Element div = doc.expectFirst("div");
323 
324         StringBuilder track = new StringBuilder();
325         for (Attribute attr : div.attributes()) {
326 
327             Range.AttributeRange attrRange = attr.sourceRange();
328             assertTrue(attrRange.nameRange().isTracked());
329             assertTrue(attrRange.valueRange().isTracked());
330             assertSame(attrRange, div.attributes().sourceRange(attr.getKey()));
331 
332             assertFalse(attrRange.nameRange().isImplicit());
333             if (attr.getValue().isEmpty())
334                 assertTrue(attrRange.valueRange().isImplicit());
335             else
336                 assertFalse(attrRange.valueRange().isImplicit());
337 
338             accumulatePositions(attr, track);
339         }
340 
341         assertEquals("one:5-8=10-21; id:23-25=26-27; class:28-33=34-37; attr1:38-43=47-60; attr2:62-67=69-78; attr3:80-85=85-85; attr4:89-94=94-94; attr5:95-100=100-100; ", track.toString());
342     }
343 
tracksAttributesAcrossLines()344     @Test void tracksAttributesAcrossLines() {
345         String html = "<div one=\"Hello\nthere\" \nid=1 \nclass=\nfoo\nattr5>Text";
346         Document doc = Jsoup.parse(html, TrackingHtmlParser);
347 
348         Element div = doc.expectFirst("div");
349 
350         StringBuilder track = new StringBuilder();
351         for (Attribute attr : div.attributes()) {
352             Range.AttributeRange attrRange = attr.sourceRange();
353             assertTrue(attrRange.nameRange().isTracked());
354             assertTrue(attrRange.valueRange().isTracked());
355             assertSame(attrRange, div.attributes().sourceRange(attr.getKey()));
356             assertFalse(attrRange.nameRange().isImplicit());
357             if (attr.getValue().isEmpty())
358                 assertTrue(attrRange.valueRange().isImplicit());
359             else
360                 assertFalse(attrRange.valueRange().isImplicit());
361             accumulatePositions(attr, track);
362         }
363 
364         String value = div.attributes().get("class");
365         assertEquals("foo", value);
366         Range.AttributeRange foo = div.attributes().sourceRange("class");
367         assertEquals("4,1:30-4,6:35=5,1:37-5,4:40", foo.toString());
368 
369         assertEquals("one:5-8=10-21; id:24-26=27-28; class:30-35=37-40; attr5:41-46=46-46; ", track.toString());
370     }
371 
trackAttributePositionInFirstElement()372     @Test void trackAttributePositionInFirstElement() {
373         String html = "<html lang=en class=dark><p hidden></p></html>";
374 
375         Document htmlDoc = Jsoup.parse(html, TrackingHtmlParser);
376         StringBuilder htmlPos = new StringBuilder();
377         htmlDoc.expectFirst("html").nodeStream().forEach(node -> {
378             accumulatePositions(node, htmlPos);
379             accumulateAttributePositions(node, htmlPos);
380         });
381 
382         assertEquals("html:0-25~39-46; lang:6-10=11-13; class:14-19=20-24; head:25-25~25-25; body:25-25~46-46; p:25-35~35-39; hidden:28-34=34-34; ", htmlPos.toString());
383 
384         Document xmlDoc = Jsoup.parse(html, TrackingXmlParser);
385         StringBuilder xmlPos = new StringBuilder();
386         xmlDoc.expectFirst("html").nodeStream().forEach(node -> {
387             accumulatePositions(node, xmlPos);
388             accumulateAttributePositions(node, xmlPos);
389         });
390 
391         assertEquals("html:0-25~39-46; lang:6-10=11-13; class:14-19=20-24; p:25-35~35-39; hidden:28-34=34-34; ", xmlPos.toString());
392     }
393 
trackAttributePositionWithCase()394     @Test void trackAttributePositionWithCase() {
395         String pomXml = "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\n" +
396             "    <modelVersion>4.0.0</modelVersion>";
397 
398         Document htmlDoc = Jsoup.parse(pomXml, TrackingHtmlParser);
399         StringBuilder htmlPos = new StringBuilder();
400         htmlDoc.expectFirst("html").nodeStream().forEach(node -> {
401             accumulatePositions(node, htmlPos);
402             accumulateAttributePositions(node, htmlPos);
403         });
404 
405         assertEquals("html:0-0~243-243; head:0-0~0-0; body:0-0~243-243; project:0-204~243-243; xmlns:9-14=16-49; xmlns:xsi:51-60=62-103; xsi:schemalocation:105-123=125-202; #text:204-209; modelversion:209-223~228-243; #text:223-228; ", htmlPos.toString());
406 
407         Document xmlDoc = Jsoup.parse(pomXml, TrackingXmlParser);
408         StringBuilder xmlPos = new StringBuilder();
409         xmlDoc.expectFirst("project").nodeStream().forEach(node -> {
410             accumulatePositions(node, xmlPos);
411             accumulateAttributePositions(node, xmlPos);
412         });
413 
414         assertEquals("project:0-204~243-243; xmlns:9-14=16-49; xmlns:xsi:51-60=62-103; xsi:schemaLocation:105-123=125-202; #text:204-209; modelVersion:209-223~228-243; #text:223-228; ", xmlPos.toString());
415 
416         Document xmlDocLc = Jsoup.parse(pomXml, Parser.xmlParser().setTrackPosition(true).settings(new ParseSettings(false, false)));
417         StringBuilder xmlPosLc = new StringBuilder();
418         xmlDocLc.expectFirst("project").nodeStream().forEach(node -> {
419             accumulatePositions(node, xmlPosLc);
420             accumulateAttributePositions(node, xmlPosLc);
421         });
422 
423         assertEquals("project:0-204~243-243; xmlns:9-14=16-49; xmlns:xsi:51-60=62-103; xsi:schemalocation:105-123=125-202; #text:204-209; modelversion:209-223~228-243; #text:223-228; ", xmlPosLc.toString());
424     }
425 
426 
trackAttributesPositionsDedupes()427     @Test void trackAttributesPositionsDedupes() {
428         String html = "<p id=1 id=2 Id=3 Id=4 id=5 Id=6>";
429         Document      htmlDoc   = Jsoup.parse(html, TrackingHtmlParser);
430         Document      htmlDocUc = Jsoup.parse(html, Parser.htmlParser().setTrackPosition(true).settings(new ParseSettings(true, true)));
431         Document      xmlDoc    = Jsoup.parse(html, TrackingXmlParser);
432         Document      xmlDocLc  = Jsoup.parse(html, Parser.xmlParser().setTrackPosition(true).settings(new ParseSettings(false, false)));
433 
434         StringBuilder htmlPos   = new StringBuilder();
435         StringBuilder htmlUcPos = new StringBuilder();
436         StringBuilder xmlPos    = new StringBuilder();
437         StringBuilder xmlLcPos  = new StringBuilder();
438 
439         accumulateAttributePositions(htmlDoc   .expectFirst("p"), htmlPos);
440         accumulateAttributePositions(htmlDocUc .expectFirst("p"), htmlUcPos);
441         accumulateAttributePositions(xmlDoc    .expectFirst("p"), xmlPos);
442         accumulateAttributePositions(xmlDocLc  .expectFirst("p"), xmlLcPos);
443 
444         assertEquals("id:3-5=6-7; ", htmlPos   .toString());
445         assertEquals("id:3-5=6-7; Id:13-15=16-17; ", htmlUcPos .toString());
446         assertEquals("id:3-5=6-7; Id:13-15=16-17; ", xmlPos    .toString());
447         assertEquals("id:3-5=6-7; ", xmlLcPos .toString());
448     }
449 
trackAttributesPositionsDirectionalDedupes()450     @Test void trackAttributesPositionsDirectionalDedupes() {
451         String html = "<p Id=1 id=2 Id=3 Id=4 id=5 Id=6>";
452         Document      htmlDoc   = Jsoup.parse(html, TrackingHtmlParser);
453         Document      htmlDocUc = Jsoup.parse(html, Parser.htmlParser().setTrackPosition(true).settings(new ParseSettings(true, true)));
454         Document      xmlDoc    = Jsoup.parse(html, TrackingXmlParser);
455         Document      xmlDocLc  = Jsoup.parse(html, Parser.xmlParser().setTrackPosition(true).settings(new ParseSettings(false, false)));
456 
457         StringBuilder htmlPos   = new StringBuilder();
458         StringBuilder htmlUcPos = new StringBuilder();
459         StringBuilder xmlPos    = new StringBuilder();
460         StringBuilder xmlLcPos  = new StringBuilder();
461 
462         accumulateAttributePositions(htmlDoc   .expectFirst("p"), htmlPos);
463         accumulateAttributePositions(htmlDocUc .expectFirst("p"), htmlUcPos);
464         accumulateAttributePositions(xmlDoc    .expectFirst("p"), xmlPos);
465         accumulateAttributePositions(xmlDocLc  .expectFirst("p"), xmlLcPos);
466 
467         assertEquals("id:3-5=6-7; ", htmlPos   .toString());
468         assertEquals("Id:3-5=6-7; id:8-10=11-12; ", htmlUcPos .toString());
469         assertEquals("Id:3-5=6-7; id:8-10=11-12; ", xmlPos    .toString());
470         assertEquals("id:3-5=6-7; ", xmlLcPos .toString());
471     }
472 
tracksFrag()473     @Test void tracksFrag() {
474         // https://github.com/jhy/jsoup/issues/2068
475         String html = "<h1 id=1>One</h1>\n<h2 id=2>Two</h2><h10>Ten</h10>";
476         Document shellDoc = Document.createShell("");
477 
478         List<Node> nodes = TrackingHtmlParser.parseFragmentInput(html, shellDoc.body(), shellDoc.baseUri());
479         StringBuilder track = new StringBuilder();
480 
481         // nodes is the top level nodes - want to descend to check all tracked OK
482         nodes.forEach(node -> node.nodeStream().forEach(descend -> {
483             accumulatePositions(descend, track);
484             accumulateAttributePositions(descend, track);
485         }));
486 
487         assertEquals("h1:0-9~12-17; id:4-6=7-8; #text:9-12; #text:17-18; h2:18-27~30-35; id:22-24=25-26; #text:27-30; h10:35-40~43-49; #text:40-43; ", track.toString());
488     }
489 
updateKeyMaintainsRangeLc()490     @Test void updateKeyMaintainsRangeLc() {
491         String html = "<p xsi:CLASS=On>One</p>";
492         Document doc = Jsoup.parse(html, TrackingHtmlParser);
493         Element p = doc.expectFirst("p");
494         Attribute attr = p.attribute("xsi:class");
495         assertNotNull(attr);
496 
497         String expectedRange = "1,4:3-1,13:12=1,14:13-1,16:15";
498         assertEquals(expectedRange, attr.sourceRange().toString());
499         attr.setKey("class");
500         assertEquals(expectedRange, attr.sourceRange().toString());
501         assertEquals("class=\"On\"", attr.html());
502     }
503 
updateKeyMaintainsRangeUc()504     @Test void updateKeyMaintainsRangeUc() {
505         String html = "<p xsi:CLASS=On>One</p>";
506         Document doc = Jsoup.parse(html, TrackingXmlParser);
507         Element p = doc.expectFirst("p");
508         Attribute attr = p.attribute("xsi:CLASS");
509         assertNotNull(attr);
510 
511         String expectedRange = "1,4:3-1,13:12=1,14:13-1,16:15";
512         assertEquals(expectedRange, attr.sourceRange().toString());
513         attr.setKey("class");
514         assertEquals(expectedRange, attr.sourceRange().toString());
515         assertEquals("class=\"On\"", attr.html());
516 
517         attr.setKey("CLASSY");
518         assertEquals(expectedRange, attr.sourceRange().toString());
519         assertEquals("CLASSY=\"On\"", attr.html());
520 
521         attr.setValue("To");
522         assertEquals(expectedRange, attr.sourceRange().toString());
523         assertEquals("CLASSY=\"To\"", attr.html());
524 
525         assertEquals("<p CLASSY=\"To\">One</p>", p.outerHtml());
526 
527         p.attr("CLASSY", "Tree");
528         assertEquals(expectedRange, attr.sourceRange().toString());
529         assertEquals("CLASSY=\"To\"", attr.html()); // changes in this direction do not get to the attribute as it's not connected that way
530 
531         Attribute attr2 = p.attribute("CLASSY");
532         assertEquals("CLASSY=\"Tree\"", attr2.html());
533         assertEquals(expectedRange, attr2.sourceRange().toString());
534     }
535 
accumulateAttributePositions(Node node, StringBuilder sb)536     static void accumulateAttributePositions(Node node, StringBuilder sb) {
537         if (node instanceof LeafNode) return; // leafnode pseudo attributes are not tracked
538         for (Attribute attribute : node.attributes()) {
539             accumulatePositions(attribute, sb);
540         }
541     }
542 
accumulatePositions(Attribute attr, StringBuilder sb)543     static void accumulatePositions(Attribute attr, StringBuilder sb) {
544         Range.AttributeRange range = attr.sourceRange();
545 
546         sb
547             .append(attr.getKey())
548             .append(':')
549             .append(range.nameRange().startPos())
550             .append('-')
551             .append(range.nameRange().endPos())
552 
553             .append('=')
554             .append(range.valueRange().startPos())
555             .append('-')
556             .append(range.valueRange().endPos());
557 
558         sb.append("; ");
559     }
560 }