1 package org.jsoup.nodes; 2 3 import org.jsoup.SerializationException; 4 import org.jsoup.helper.Validate; 5 import org.jsoup.internal.SharedConstants; 6 import org.jsoup.internal.StringUtil; 7 import org.jsoup.parser.ParseSettings; 8 import org.jspecify.annotations.Nullable; 9 10 import java.io.IOException; 11 import java.util.AbstractMap; 12 import java.util.AbstractSet; 13 import java.util.ArrayList; 14 import java.util.Arrays; 15 import java.util.Collections; 16 import java.util.ConcurrentModificationException; 17 import java.util.HashMap; 18 import java.util.Iterator; 19 import java.util.List; 20 import java.util.Map; 21 import java.util.NoSuchElementException; 22 import java.util.Set; 23 24 import static org.jsoup.internal.Normalizer.lowerCase; 25 import static org.jsoup.internal.SharedConstants.AttrRangeKey; 26 import static org.jsoup.nodes.Range.AttributeRange.UntrackedAttr; 27 28 /** 29 * The attributes of an Element. 30 * <p> 31 * During parsing, attributes in with the same name in an element are deduplicated, according to the configured parser's 32 * attribute case-sensitive setting. It is possible to have duplicate attributes subsequently if 33 * {@link #add(String, String)} vs {@link #put(String, String)} is used. 34 * </p> 35 * <p> 36 * Attribute name and value comparisons are generally <b>case sensitive</b>. By default for HTML, attribute names are 37 * normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by 38 * name. 39 * </p> 40 * 41 * @author Jonathan Hedley, [email protected] 42 */ 43 public class Attributes implements Iterable<Attribute>, Cloneable { 44 // Indicates an internal key. Can't be set via HTML. (It could be set via accessor, but not too worried about 45 // that. Suppressed from list, iter.) 46 static final char InternalPrefix = '/'; 47 48 // The Attributes object is only created on the first use of an attribute; the Element will just have a null 49 // Attribute slot otherwise 50 protected static final String dataPrefix = "data-"; 51 private static final int InitialCapacity = 3; // sampling found mean count when attrs present = 1.49; 1.08 overall. 2.6:1 don't have any attrs. 52 53 // manages the key/val arrays 54 private static final int GrowthFactor = 2; 55 static final int NotFound = -1; 56 private static final String EmptyString = ""; 57 58 // the number of instance fields is kept as low as possible giving an object size of 24 bytes 59 private int size = 0; // number of slots used (not total capacity, which is keys.length) 60 String[] keys = new String[InitialCapacity]; 61 Object[] vals = new Object[InitialCapacity]; // Genericish: all non-internal attribute values must be Strings and are cast on access. 62 // todo - make keys iterable without creating Attribute objects 63 64 // check there's room for more checkCapacity(int minNewSize)65 private void checkCapacity(int minNewSize) { 66 Validate.isTrue(minNewSize >= size); 67 int curCap = keys.length; 68 if (curCap >= minNewSize) 69 return; 70 int newCap = curCap >= InitialCapacity ? size * GrowthFactor : InitialCapacity; 71 if (minNewSize > newCap) 72 newCap = minNewSize; 73 74 keys = Arrays.copyOf(keys, newCap); 75 vals = Arrays.copyOf(vals, newCap); 76 } 77 indexOfKey(String key)78 int indexOfKey(String key) { 79 Validate.notNull(key); 80 for (int i = 0; i < size; i++) { 81 if (key.equals(keys[i])) 82 return i; 83 } 84 return NotFound; 85 } 86 indexOfKeyIgnoreCase(String key)87 private int indexOfKeyIgnoreCase(String key) { 88 Validate.notNull(key); 89 for (int i = 0; i < size; i++) { 90 if (key.equalsIgnoreCase(keys[i])) 91 return i; 92 } 93 return NotFound; 94 } 95 96 // we track boolean attributes as null in values - they're just keys. so returns empty for consumers 97 // casts to String, so only for non-internal attributes checkNotNull(@ullable Object val)98 static String checkNotNull(@Nullable Object val) { 99 return val == null ? EmptyString : (String) val; 100 } 101 102 /** 103 Get an attribute value by key. 104 @param key the (case-sensitive) attribute key 105 @return the attribute value if set; or empty string if not set (or a boolean attribute). 106 @see #hasKey(String) 107 */ get(String key)108 public String get(String key) { 109 int i = indexOfKey(key); 110 return i == NotFound ? EmptyString : checkNotNull(vals[i]); 111 } 112 113 /** 114 Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via 115 {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and 116 their owning Element. 117 @param key the (case-sensitive) attribute key 118 @return the Attribute for this key, or null if not present. 119 @since 1.17.2 120 */ attribute(String key)121 public Attribute attribute(String key) { 122 int i = indexOfKey(key); 123 return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this); 124 } 125 126 /** 127 * Get an attribute's value by case-insensitive key 128 * @param key the attribute name 129 * @return the first matching attribute value if set; or empty string if not set (ora boolean attribute). 130 */ getIgnoreCase(String key)131 public String getIgnoreCase(String key) { 132 int i = indexOfKeyIgnoreCase(key); 133 return i == NotFound ? EmptyString : checkNotNull(vals[i]); 134 } 135 136 /** 137 * Adds a new attribute. Will produce duplicates if the key already exists. 138 * @see Attributes#put(String, String) 139 */ add(String key, @Nullable String value)140 public Attributes add(String key, @Nullable String value) { 141 addObject(key, value); 142 return this; 143 } 144 addObject(String key, @Nullable Object value)145 private void addObject(String key, @Nullable Object value) { 146 checkCapacity(size + 1); 147 keys[size] = key; 148 vals[size] = value; 149 size++; 150 } 151 152 /** 153 * Set a new attribute, or replace an existing one by key. 154 * @param key case sensitive attribute key (not null) 155 * @param value attribute value (which can be null, to set a true boolean attribute) 156 * @return these attributes, for chaining 157 */ put(String key, @Nullable String value)158 public Attributes put(String key, @Nullable String value) { 159 Validate.notNull(key); 160 int i = indexOfKey(key); 161 if (i != NotFound) 162 vals[i] = value; 163 else 164 add(key, value); 165 return this; 166 } 167 168 /** 169 Get the map holding any user-data associated with these Attributes. Will be created empty on first use. Held as 170 an internal attribute, not a field member, to reduce the memory footprint of Attributes when not used. Can hold 171 arbitrary objects; use for source ranges, connecting W3C nodes to Elements, etc. 172 * @return the map holding user-data 173 */ userData()174 Map<String, Object> userData() { 175 final Map<String, Object> userData; 176 int i = indexOfKey(SharedConstants.UserDataKey); 177 if (i == NotFound) { 178 userData = new HashMap<>(); 179 addObject(SharedConstants.UserDataKey, userData); 180 } else { 181 //noinspection unchecked 182 userData = (Map<String, Object>) vals[i]; 183 } 184 return userData; 185 } 186 187 /** 188 Get an arbitrary user-data object by key. 189 * @param key case-sensitive key to the object. 190 * @return the object associated to this key, or {@code null} if not found. 191 * @see #userData(String key, Object val) 192 * @since 1.17.1 193 */ 194 @Nullable userData(String key)195 public Object userData(String key) { 196 Validate.notNull(key); 197 if (!hasKey(SharedConstants.UserDataKey)) return null; // no user data exists 198 Map<String, Object> userData = userData(); 199 return userData.get(key); 200 } 201 202 /** 203 Set an arbitrary user-data object by key. Will be treated as an internal attribute, so will not be emitted in HTML. 204 * @param key case-sensitive key 205 * @param value object value 206 * @return these attributes 207 * @see #userData(String key) 208 * @since 1.17.1 209 */ userData(String key, Object value)210 public Attributes userData(String key, Object value) { 211 Validate.notNull(key); 212 userData().put(key, value); 213 return this; 214 } 215 putIgnoreCase(String key, @Nullable String value)216 void putIgnoreCase(String key, @Nullable String value) { 217 int i = indexOfKeyIgnoreCase(key); 218 if (i != NotFound) { 219 vals[i] = value; 220 if (!keys[i].equals(key)) // case changed, update 221 keys[i] = key; 222 } 223 else 224 add(key, value); 225 } 226 227 /** 228 * Set a new boolean attribute. Removes the attribute if the value is false. 229 * @param key case <b>insensitive</b> attribute key 230 * @param value attribute value 231 * @return these attributes, for chaining 232 */ put(String key, boolean value)233 public Attributes put(String key, boolean value) { 234 if (value) 235 putIgnoreCase(key, null); 236 else 237 remove(key); 238 return this; 239 } 240 241 /** 242 Set a new attribute, or replace an existing one by key. 243 @param attribute attribute with case-sensitive key 244 @return these attributes, for chaining 245 */ put(Attribute attribute)246 public Attributes put(Attribute attribute) { 247 Validate.notNull(attribute); 248 put(attribute.getKey(), attribute.getValue()); 249 attribute.parent = this; 250 return this; 251 } 252 253 // removes and shifts up 254 @SuppressWarnings("AssignmentToNull") remove(int index)255 private void remove(int index) { 256 Validate.isFalse(index >= size); 257 int shifted = size - index - 1; 258 if (shifted > 0) { 259 System.arraycopy(keys, index + 1, keys, index, shifted); 260 System.arraycopy(vals, index + 1, vals, index, shifted); 261 } 262 size--; 263 keys[size] = null; // release hold 264 vals[size] = null; 265 } 266 267 /** 268 Remove an attribute by key. <b>Case sensitive.</b> 269 @param key attribute key to remove 270 */ remove(String key)271 public void remove(String key) { 272 int i = indexOfKey(key); 273 if (i != NotFound) 274 remove(i); 275 } 276 277 /** 278 Remove an attribute by key. <b>Case insensitive.</b> 279 @param key attribute key to remove 280 */ removeIgnoreCase(String key)281 public void removeIgnoreCase(String key) { 282 int i = indexOfKeyIgnoreCase(key); 283 if (i != NotFound) 284 remove(i); 285 } 286 287 /** 288 Tests if these attributes contain an attribute with this key. 289 @param key case-sensitive key to check for 290 @return true if key exists, false otherwise 291 */ hasKey(String key)292 public boolean hasKey(String key) { 293 return indexOfKey(key) != NotFound; 294 } 295 296 /** 297 Tests if these attributes contain an attribute with this key. 298 @param key key to check for 299 @return true if key exists, false otherwise 300 */ hasKeyIgnoreCase(String key)301 public boolean hasKeyIgnoreCase(String key) { 302 return indexOfKeyIgnoreCase(key) != NotFound; 303 } 304 305 /** 306 * Check if these attributes contain an attribute with a value for this key. 307 * @param key key to check for 308 * @return true if key exists, and it has a value 309 */ hasDeclaredValueForKey(String key)310 public boolean hasDeclaredValueForKey(String key) { 311 int i = indexOfKey(key); 312 return i != NotFound && vals[i] != null; 313 } 314 315 /** 316 * Check if these attributes contain an attribute with a value for this key. 317 * @param key case-insensitive key to check for 318 * @return true if key exists, and it has a value 319 */ hasDeclaredValueForKeyIgnoreCase(String key)320 public boolean hasDeclaredValueForKeyIgnoreCase(String key) { 321 int i = indexOfKeyIgnoreCase(key); 322 return i != NotFound && vals[i] != null; 323 } 324 325 /** 326 Get the number of attributes in this set, including any jsoup internal-only attributes. Internal attributes are 327 excluded from the {@link #html()}, {@link #asList()}, and {@link #iterator()} methods. 328 @return size 329 */ size()330 public int size() { 331 return size; 332 // todo - exclude internal attributes from this count - maintain size, count of internals 333 } 334 335 /** 336 * Test if this Attributes list is empty (size==0). 337 */ isEmpty()338 public boolean isEmpty() { 339 return size == 0; 340 } 341 342 /** 343 Add all the attributes from the incoming set to this set. 344 @param incoming attributes to add to these attributes. 345 */ addAll(Attributes incoming)346 public void addAll(Attributes incoming) { 347 if (incoming.size() == 0) 348 return; 349 checkCapacity(size + incoming.size); 350 351 boolean needsPut = size != 0; // if this set is empty, no need to check existing set, so can add() vs put() 352 // (and save bashing on the indexOfKey() 353 for (Attribute attr : incoming) { 354 if (needsPut) 355 put(attr); 356 else 357 add(attr.getKey(), attr.getValue()); 358 } 359 } 360 361 /** 362 Get the source ranges (start to end position) in the original input source from which this attribute's <b>name</b> 363 and <b>value</b> were parsed. 364 <p>Position tracking must be enabled prior to parsing the content.</p> 365 @param key the attribute name 366 @return the ranges for the attribute's name and value, or {@code untracked} if the attribute does not exist or its range 367 was not tracked. 368 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 369 @see Attribute#sourceRange() 370 @see Node#sourceRange() 371 @see Element#endSourceRange() 372 @since 1.17.1 373 */ sourceRange(String key)374 public Range.AttributeRange sourceRange(String key) { 375 if (!hasKey(key)) return UntrackedAttr; 376 Map<String, Range.AttributeRange> ranges = getRanges(); 377 if (ranges == null) return Range.AttributeRange.UntrackedAttr; 378 Range.AttributeRange range = ranges.get(key); 379 return range != null ? range : Range.AttributeRange.UntrackedAttr; 380 } 381 382 /** Get the Ranges, if tracking is enabled; null otherwise. */ getRanges()383 @Nullable Map<String, Range.AttributeRange> getRanges() { 384 //noinspection unchecked 385 return (Map<String, Range.AttributeRange>) userData(AttrRangeKey); 386 } 387 388 389 @Override iterator()390 public Iterator<Attribute> iterator() { 391 return new Iterator<Attribute>() { 392 int expectedSize = size; 393 int i = 0; 394 395 @Override 396 public boolean hasNext() { 397 checkModified(); 398 while (i < size) { 399 if (isInternalKey(keys[i])) // skip over internal keys 400 i++; 401 else 402 break; 403 } 404 405 return i < size; 406 } 407 408 @Override 409 public Attribute next() { 410 checkModified(); 411 if (i >= size) throw new NoSuchElementException(); 412 final Attribute attr = new Attribute(keys[i], (String) vals[i], Attributes.this); 413 i++; 414 return attr; 415 } 416 417 private void checkModified() { 418 if (size != expectedSize) throw new ConcurrentModificationException("Use Iterator#remove() instead to remove attributes while iterating."); 419 } 420 421 @Override 422 public void remove() { 423 Attributes.this.remove(--i); // next() advanced, so rewind 424 expectedSize--; 425 } 426 }; 427 } 428 429 /** 430 Get the attributes as a List, for iteration. 431 @return a view of the attributes as an unmodifiable List. 432 */ asList()433 public List<Attribute> asList() { 434 ArrayList<Attribute> list = new ArrayList<>(size); 435 for (int i = 0; i < size; i++) { 436 if (isInternalKey(keys[i])) 437 continue; // skip internal keys 438 Attribute attr = new Attribute(keys[i], (String) vals[i], Attributes.this); 439 list.add(attr); 440 } 441 return Collections.unmodifiableList(list); 442 } 443 444 /** 445 * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys 446 * starting with {@code data-}. 447 * @return map of custom data attributes. 448 */ dataset()449 public Map<String, String> dataset() { 450 return new Dataset(this); 451 } 452 453 /** 454 Get the HTML representation of these attributes. 455 @return HTML 456 */ html()457 public String html() { 458 StringBuilder sb = StringUtil.borrowBuilder(); 459 try { 460 html(sb, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used 461 } catch (IOException e) { // ought never happen 462 throw new SerializationException(e); 463 } 464 return StringUtil.releaseBuilder(sb); 465 } 466 html(final Appendable accum, final Document.OutputSettings out)467 final void html(final Appendable accum, final Document.OutputSettings out) throws IOException { 468 final int sz = size; 469 for (int i = 0; i < sz; i++) { 470 if (isInternalKey(keys[i])) 471 continue; 472 final String key = Attribute.getValidKey(keys[i], out.syntax()); 473 if (key != null) 474 Attribute.htmlNoValidate(key, (String) vals[i], accum.append(' '), out); 475 } 476 } 477 478 @Override toString()479 public String toString() { 480 return html(); 481 } 482 483 /** 484 * Checks if these attributes are equal to another set of attributes, by comparing the two sets. Note that the order 485 * of the attributes does not impact this equality (as per the Map interface equals()). 486 * @param o attributes to compare with 487 * @return if both sets of attributes have the same content 488 */ 489 @Override equals(@ullable Object o)490 public boolean equals(@Nullable Object o) { 491 if (this == o) return true; 492 if (o == null || getClass() != o.getClass()) return false; 493 494 Attributes that = (Attributes) o; 495 if (size != that.size) return false; 496 for (int i = 0; i < size; i++) { 497 String key = keys[i]; 498 int thatI = that.indexOfKey(key); 499 if (thatI == NotFound) 500 return false; 501 Object val = vals[i]; 502 Object thatVal = that.vals[thatI]; 503 if (val == null) { 504 if (thatVal != null) 505 return false; 506 } else if (!val.equals(thatVal)) 507 return false; 508 } 509 return true; 510 } 511 512 /** 513 * Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes. 514 * @return calculated hashcode 515 */ 516 @Override hashCode()517 public int hashCode() { 518 int result = size; 519 result = 31 * result + Arrays.hashCode(keys); 520 result = 31 * result + Arrays.hashCode(vals); 521 return result; 522 } 523 524 @Override clone()525 public Attributes clone() { 526 Attributes clone; 527 try { 528 clone = (Attributes) super.clone(); 529 } catch (CloneNotSupportedException e) { 530 throw new RuntimeException(e); 531 } 532 clone.size = size; 533 clone.keys = Arrays.copyOf(keys, size); 534 clone.vals = Arrays.copyOf(vals, size); 535 return clone; 536 } 537 538 /** 539 * Internal method. Lowercases all (non-internal) keys. 540 */ normalize()541 public void normalize() { 542 for (int i = 0; i < size; i++) { 543 if (!isInternalKey(keys[i])) 544 keys[i] = lowerCase(keys[i]); 545 } 546 } 547 548 /** 549 * Internal method. Removes duplicate attribute by name. Settings for case sensitivity of key names. 550 * @param settings case sensitivity 551 * @return number of removed dupes 552 */ deduplicate(ParseSettings settings)553 public int deduplicate(ParseSettings settings) { 554 if (isEmpty()) 555 return 0; 556 boolean preserve = settings.preserveAttributeCase(); 557 int dupes = 0; 558 OUTER: for (int i = 0; i < keys.length; i++) { 559 for (int j = i + 1; j < keys.length; j++) { 560 if (keys[j] == null) 561 continue OUTER; // keys.length doesn't shrink when removing, so re-test 562 if ((preserve && keys[i].equals(keys[j])) || (!preserve && keys[i].equalsIgnoreCase(keys[j]))) { 563 dupes++; 564 remove(j); 565 j--; 566 } 567 } 568 } 569 return dupes; 570 } 571 572 private static class Dataset extends AbstractMap<String, String> { 573 private final Attributes attributes; 574 Dataset(Attributes attributes)575 private Dataset(Attributes attributes) { 576 this.attributes = attributes; 577 } 578 579 @Override entrySet()580 public Set<Entry<String, String>> entrySet() { 581 return new EntrySet(); 582 } 583 584 @Override put(String key, String value)585 public String put(String key, String value) { 586 String dataKey = dataKey(key); 587 String oldValue = attributes.hasKey(dataKey) ? attributes.get(dataKey) : null; 588 attributes.put(dataKey, value); 589 return oldValue; 590 } 591 592 private class EntrySet extends AbstractSet<Map.Entry<String, String>> { 593 594 @Override iterator()595 public Iterator<Map.Entry<String, String>> iterator() { 596 return new DatasetIterator(); 597 } 598 599 @Override size()600 public int size() { 601 int count = 0; 602 Iterator iter = new DatasetIterator(); 603 while (iter.hasNext()) 604 count++; 605 return count; 606 } 607 } 608 609 private class DatasetIterator implements Iterator<Map.Entry<String, String>> { 610 private Iterator<Attribute> attrIter = attributes.iterator(); 611 private Attribute attr; hasNext()612 public boolean hasNext() { 613 while (attrIter.hasNext()) { 614 attr = attrIter.next(); 615 if (attr.isDataAttribute()) return true; 616 } 617 return false; 618 } 619 next()620 public Entry<String, String> next() { 621 return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue()); 622 } 623 remove()624 public void remove() { 625 attributes.remove(attr.getKey()); 626 } 627 } 628 } 629 dataKey(String key)630 private static String dataKey(String key) { 631 return dataPrefix + key; 632 } 633 internalKey(String key)634 static String internalKey(String key) { 635 return InternalPrefix + key; 636 } 637 isInternalKey(String key)638 static boolean isInternalKey(String key) { 639 return key != null && key.length() > 1 && key.charAt(0) == InternalPrefix; 640 } 641 } 642