xref: /aosp_15_r20/external/jsoup/src/main/java/org/jsoup/nodes/Attributes.java (revision 6da8f8c4bc310ad659121b84dd089062417a2ce2)
1 package org.jsoup.nodes;
2 
3 import org.jsoup.SerializationException;
4 import org.jsoup.helper.Validate;
5 import org.jsoup.internal.SharedConstants;
6 import org.jsoup.internal.StringUtil;
7 import org.jsoup.parser.ParseSettings;
8 import org.jspecify.annotations.Nullable;
9 
10 import java.io.IOException;
11 import java.util.AbstractMap;
12 import java.util.AbstractSet;
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.Collections;
16 import java.util.ConcurrentModificationException;
17 import java.util.HashMap;
18 import java.util.Iterator;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.NoSuchElementException;
22 import java.util.Set;
23 
24 import static org.jsoup.internal.Normalizer.lowerCase;
25 import static org.jsoup.internal.SharedConstants.AttrRangeKey;
26 import static org.jsoup.nodes.Range.AttributeRange.UntrackedAttr;
27 
28 /**
29  * The attributes of an Element.
30  * <p>
31  * During parsing, attributes in with the same name in an element are deduplicated, according to the configured parser's
32  * attribute case-sensitive setting. It is possible to have duplicate attributes subsequently if
33  * {@link #add(String, String)} vs {@link #put(String, String)} is used.
34  * </p>
35  * <p>
36  * Attribute name and value comparisons are generally <b>case sensitive</b>. By default for HTML, attribute names are
37  * normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by
38  * name.
39  * </p>
40  *
41  * @author Jonathan Hedley, [email protected]
42  */
43 public class Attributes implements Iterable<Attribute>, Cloneable {
44     // Indicates an internal key. Can't be set via HTML. (It could be set via accessor, but not too worried about
45     // that. Suppressed from list, iter.)
46     static final char InternalPrefix = '/';
47 
48     // The Attributes object is only created on the first use of an attribute; the Element will just have a null
49     // Attribute slot otherwise
50     protected static final String dataPrefix = "data-";
51     private static final int InitialCapacity = 3; // sampling found mean count when attrs present = 1.49; 1.08 overall. 2.6:1 don't have any attrs.
52 
53     // manages the key/val arrays
54     private static final int GrowthFactor = 2;
55     static final int NotFound = -1;
56     private static final String EmptyString = "";
57 
58     // the number of instance fields is kept as low as possible giving an object size of 24 bytes
59     private int size = 0; // number of slots used (not total capacity, which is keys.length)
60     String[] keys = new String[InitialCapacity];
61     Object[] vals = new Object[InitialCapacity]; // Genericish: all non-internal attribute values must be Strings and are cast on access.
62     // todo - make keys iterable without creating Attribute objects
63 
64     // check there's room for more
checkCapacity(int minNewSize)65     private void checkCapacity(int minNewSize) {
66         Validate.isTrue(minNewSize >= size);
67         int curCap = keys.length;
68         if (curCap >= minNewSize)
69             return;
70         int newCap = curCap >= InitialCapacity ? size * GrowthFactor : InitialCapacity;
71         if (minNewSize > newCap)
72             newCap = minNewSize;
73 
74         keys = Arrays.copyOf(keys, newCap);
75         vals = Arrays.copyOf(vals, newCap);
76     }
77 
indexOfKey(String key)78     int indexOfKey(String key) {
79         Validate.notNull(key);
80         for (int i = 0; i < size; i++) {
81             if (key.equals(keys[i]))
82                 return i;
83         }
84         return NotFound;
85     }
86 
indexOfKeyIgnoreCase(String key)87     private int indexOfKeyIgnoreCase(String key) {
88         Validate.notNull(key);
89         for (int i = 0; i < size; i++) {
90             if (key.equalsIgnoreCase(keys[i]))
91                 return i;
92         }
93         return NotFound;
94     }
95 
96     // we track boolean attributes as null in values - they're just keys. so returns empty for consumers
97     // casts to String, so only for non-internal attributes
checkNotNull(@ullable Object val)98     static String checkNotNull(@Nullable Object val) {
99         return val == null ? EmptyString : (String) val;
100     }
101 
102     /**
103      Get an attribute value by key.
104      @param key the (case-sensitive) attribute key
105      @return the attribute value if set; or empty string if not set (or a boolean attribute).
106      @see #hasKey(String)
107      */
get(String key)108     public String get(String key) {
109         int i = indexOfKey(key);
110         return i == NotFound ? EmptyString : checkNotNull(vals[i]);
111     }
112 
113     /**
114      Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via
115      {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and
116      their owning Element.
117      @param key the (case-sensitive) attribute key
118      @return the Attribute for this key, or null if not present.
119      @since 1.17.2
120      */
attribute(String key)121     public Attribute attribute(String key) {
122         int i = indexOfKey(key);
123         return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this);
124     }
125 
126     /**
127      * Get an attribute's value by case-insensitive key
128      * @param key the attribute name
129      * @return the first matching attribute value if set; or empty string if not set (ora boolean attribute).
130      */
getIgnoreCase(String key)131     public String getIgnoreCase(String key) {
132         int i = indexOfKeyIgnoreCase(key);
133         return i == NotFound ? EmptyString : checkNotNull(vals[i]);
134     }
135 
136     /**
137      * Adds a new attribute. Will produce duplicates if the key already exists.
138      * @see Attributes#put(String, String)
139      */
add(String key, @Nullable String value)140     public Attributes add(String key, @Nullable String value) {
141         addObject(key, value);
142         return this;
143     }
144 
addObject(String key, @Nullable Object value)145     private void addObject(String key, @Nullable Object value) {
146         checkCapacity(size + 1);
147         keys[size] = key;
148         vals[size] = value;
149         size++;
150     }
151 
152     /**
153      * Set a new attribute, or replace an existing one by key.
154      * @param key case sensitive attribute key (not null)
155      * @param value attribute value (which can be null, to set a true boolean attribute)
156      * @return these attributes, for chaining
157      */
put(String key, @Nullable String value)158     public Attributes put(String key, @Nullable String value) {
159         Validate.notNull(key);
160         int i = indexOfKey(key);
161         if (i != NotFound)
162             vals[i] = value;
163         else
164             add(key, value);
165         return this;
166     }
167 
168     /**
169      Get the map holding any user-data associated with these Attributes. Will be created empty on first use. Held as
170      an internal attribute, not a field member, to reduce the memory footprint of Attributes when not used. Can hold
171      arbitrary objects; use for source ranges, connecting W3C nodes to Elements, etc.
172      * @return the map holding user-data
173      */
userData()174     Map<String, Object> userData() {
175         final Map<String, Object> userData;
176         int i = indexOfKey(SharedConstants.UserDataKey);
177         if (i == NotFound) {
178             userData = new HashMap<>();
179             addObject(SharedConstants.UserDataKey, userData);
180         } else {
181             //noinspection unchecked
182             userData = (Map<String, Object>) vals[i];
183         }
184         return userData;
185     }
186 
187     /**
188      Get an arbitrary user-data object by key.
189      * @param key case-sensitive key to the object.
190      * @return the object associated to this key, or {@code null} if not found.
191      * @see #userData(String key, Object val)
192      * @since 1.17.1
193      */
194     @Nullable
userData(String key)195     public Object userData(String key) {
196         Validate.notNull(key);
197         if (!hasKey(SharedConstants.UserDataKey)) return null; // no user data exists
198         Map<String, Object> userData = userData();
199         return userData.get(key);
200     }
201 
202     /**
203      Set an arbitrary user-data object by key. Will be treated as an internal attribute, so will not be emitted in HTML.
204      * @param key case-sensitive key
205      * @param value object value
206      * @return these attributes
207      * @see #userData(String key)
208      * @since 1.17.1
209      */
userData(String key, Object value)210     public Attributes userData(String key, Object value) {
211         Validate.notNull(key);
212         userData().put(key, value);
213         return this;
214     }
215 
putIgnoreCase(String key, @Nullable String value)216     void putIgnoreCase(String key, @Nullable String value) {
217         int i = indexOfKeyIgnoreCase(key);
218         if (i != NotFound) {
219             vals[i] = value;
220             if (!keys[i].equals(key)) // case changed, update
221                 keys[i] = key;
222         }
223         else
224             add(key, value);
225     }
226 
227     /**
228      * Set a new boolean attribute. Removes the attribute if the value is false.
229      * @param key case <b>insensitive</b> attribute key
230      * @param value attribute value
231      * @return these attributes, for chaining
232      */
put(String key, boolean value)233     public Attributes put(String key, boolean value) {
234         if (value)
235             putIgnoreCase(key, null);
236         else
237             remove(key);
238         return this;
239     }
240 
241     /**
242      Set a new attribute, or replace an existing one by key.
243      @param attribute attribute with case-sensitive key
244      @return these attributes, for chaining
245      */
put(Attribute attribute)246     public Attributes put(Attribute attribute) {
247         Validate.notNull(attribute);
248         put(attribute.getKey(), attribute.getValue());
249         attribute.parent = this;
250         return this;
251     }
252 
253     // removes and shifts up
254     @SuppressWarnings("AssignmentToNull")
remove(int index)255     private void remove(int index) {
256         Validate.isFalse(index >= size);
257         int shifted = size - index - 1;
258         if (shifted > 0) {
259             System.arraycopy(keys, index + 1, keys, index, shifted);
260             System.arraycopy(vals, index + 1, vals, index, shifted);
261         }
262         size--;
263         keys[size] = null; // release hold
264         vals[size] = null;
265     }
266 
267     /**
268      Remove an attribute by key. <b>Case sensitive.</b>
269      @param key attribute key to remove
270      */
remove(String key)271     public void remove(String key) {
272         int i = indexOfKey(key);
273         if (i != NotFound)
274             remove(i);
275     }
276 
277     /**
278      Remove an attribute by key. <b>Case insensitive.</b>
279      @param key attribute key to remove
280      */
removeIgnoreCase(String key)281     public void removeIgnoreCase(String key) {
282         int i = indexOfKeyIgnoreCase(key);
283         if (i != NotFound)
284             remove(i);
285     }
286 
287     /**
288      Tests if these attributes contain an attribute with this key.
289      @param key case-sensitive key to check for
290      @return true if key exists, false otherwise
291      */
hasKey(String key)292     public boolean hasKey(String key) {
293         return indexOfKey(key) != NotFound;
294     }
295 
296     /**
297      Tests if these attributes contain an attribute with this key.
298      @param key key to check for
299      @return true if key exists, false otherwise
300      */
hasKeyIgnoreCase(String key)301     public boolean hasKeyIgnoreCase(String key) {
302         return indexOfKeyIgnoreCase(key) != NotFound;
303     }
304 
305     /**
306      * Check if these attributes contain an attribute with a value for this key.
307      * @param key key to check for
308      * @return true if key exists, and it has a value
309      */
hasDeclaredValueForKey(String key)310     public boolean hasDeclaredValueForKey(String key) {
311         int i = indexOfKey(key);
312         return i != NotFound && vals[i] != null;
313     }
314 
315     /**
316      * Check if these attributes contain an attribute with a value for this key.
317      * @param key case-insensitive key to check for
318      * @return true if key exists, and it has a value
319      */
hasDeclaredValueForKeyIgnoreCase(String key)320     public boolean hasDeclaredValueForKeyIgnoreCase(String key) {
321         int i = indexOfKeyIgnoreCase(key);
322         return i != NotFound && vals[i] != null;
323     }
324 
325     /**
326      Get the number of attributes in this set, including any jsoup internal-only attributes. Internal attributes are
327      excluded from the {@link #html()}, {@link #asList()}, and {@link #iterator()} methods.
328      @return size
329      */
size()330     public int size() {
331         return size;
332         // todo - exclude internal attributes from this count - maintain size, count of internals
333     }
334 
335     /**
336      * Test if this Attributes list is empty (size==0).
337      */
isEmpty()338     public boolean isEmpty() {
339         return size == 0;
340     }
341 
342     /**
343      Add all the attributes from the incoming set to this set.
344      @param incoming attributes to add to these attributes.
345      */
addAll(Attributes incoming)346     public void addAll(Attributes incoming) {
347         if (incoming.size() == 0)
348             return;
349         checkCapacity(size + incoming.size);
350 
351         boolean needsPut = size != 0; // if this set is empty, no need to check existing set, so can add() vs put()
352         // (and save bashing on the indexOfKey()
353         for (Attribute attr : incoming) {
354             if (needsPut)
355                 put(attr);
356             else
357                 add(attr.getKey(), attr.getValue());
358         }
359     }
360 
361     /**
362      Get the source ranges (start to end position) in the original input source from which this attribute's <b>name</b>
363      and <b>value</b> were parsed.
364      <p>Position tracking must be enabled prior to parsing the content.</p>
365      @param key the attribute name
366      @return the ranges for the attribute's name and value, or {@code untracked} if the attribute does not exist or its range
367      was not tracked.
368      @see org.jsoup.parser.Parser#setTrackPosition(boolean)
369      @see Attribute#sourceRange()
370      @see Node#sourceRange()
371      @see Element#endSourceRange()
372      @since 1.17.1
373      */
sourceRange(String key)374     public Range.AttributeRange sourceRange(String key) {
375         if (!hasKey(key)) return UntrackedAttr;
376         Map<String, Range.AttributeRange> ranges = getRanges();
377         if (ranges == null) return Range.AttributeRange.UntrackedAttr;
378         Range.AttributeRange range = ranges.get(key);
379         return range != null ? range : Range.AttributeRange.UntrackedAttr;
380     }
381 
382     /** Get the Ranges, if tracking is enabled; null otherwise. */
getRanges()383     @Nullable Map<String, Range.AttributeRange> getRanges() {
384         //noinspection unchecked
385         return (Map<String, Range.AttributeRange>) userData(AttrRangeKey);
386     }
387 
388 
389     @Override
iterator()390     public Iterator<Attribute> iterator() {
391         return new Iterator<Attribute>() {
392             int expectedSize = size;
393             int i = 0;
394 
395             @Override
396             public boolean hasNext() {
397                 checkModified();
398                 while (i < size) {
399                     if (isInternalKey(keys[i])) // skip over internal keys
400                         i++;
401                     else
402                         break;
403                 }
404 
405                 return i < size;
406             }
407 
408             @Override
409             public Attribute next() {
410                 checkModified();
411                 if (i >= size) throw new NoSuchElementException();
412                 final Attribute attr = new Attribute(keys[i], (String) vals[i], Attributes.this);
413                 i++;
414                 return attr;
415             }
416 
417             private void checkModified() {
418                 if (size != expectedSize) throw new ConcurrentModificationException("Use Iterator#remove() instead to remove attributes while iterating.");
419             }
420 
421             @Override
422             public void remove() {
423                 Attributes.this.remove(--i); // next() advanced, so rewind
424                 expectedSize--;
425             }
426         };
427     }
428 
429     /**
430      Get the attributes as a List, for iteration.
431      @return a view of the attributes as an unmodifiable List.
432      */
asList()433     public List<Attribute> asList() {
434         ArrayList<Attribute> list = new ArrayList<>(size);
435         for (int i = 0; i < size; i++) {
436             if (isInternalKey(keys[i]))
437                 continue; // skip internal keys
438             Attribute attr = new Attribute(keys[i], (String) vals[i], Attributes.this);
439             list.add(attr);
440         }
441         return Collections.unmodifiableList(list);
442     }
443 
444     /**
445      * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
446      * starting with {@code data-}.
447      * @return map of custom data attributes.
448      */
dataset()449     public Map<String, String> dataset() {
450         return new Dataset(this);
451     }
452 
453     /**
454      Get the HTML representation of these attributes.
455      @return HTML
456      */
html()457     public String html() {
458         StringBuilder sb = StringUtil.borrowBuilder();
459         try {
460             html(sb, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
461         } catch (IOException e) { // ought never happen
462             throw new SerializationException(e);
463         }
464         return StringUtil.releaseBuilder(sb);
465     }
466 
html(final Appendable accum, final Document.OutputSettings out)467     final void html(final Appendable accum, final Document.OutputSettings out) throws IOException {
468         final int sz = size;
469         for (int i = 0; i < sz; i++) {
470             if (isInternalKey(keys[i]))
471                 continue;
472             final String key = Attribute.getValidKey(keys[i], out.syntax());
473             if (key != null)
474                 Attribute.htmlNoValidate(key, (String) vals[i], accum.append(' '), out);
475         }
476     }
477 
478     @Override
toString()479     public String toString() {
480         return html();
481     }
482 
483     /**
484      * Checks if these attributes are equal to another set of attributes, by comparing the two sets. Note that the order
485      * of the attributes does not impact this equality (as per the Map interface equals()).
486      * @param o attributes to compare with
487      * @return if both sets of attributes have the same content
488      */
489     @Override
equals(@ullable Object o)490     public boolean equals(@Nullable Object o) {
491         if (this == o) return true;
492         if (o == null || getClass() != o.getClass()) return false;
493 
494         Attributes that = (Attributes) o;
495         if (size != that.size) return false;
496         for (int i = 0; i < size; i++) {
497             String key = keys[i];
498             int thatI = that.indexOfKey(key);
499             if (thatI == NotFound)
500                 return false;
501             Object val = vals[i];
502             Object thatVal = that.vals[thatI];
503             if (val == null) {
504                 if (thatVal != null)
505                     return false;
506             } else if (!val.equals(thatVal))
507                 return false;
508         }
509         return true;
510     }
511 
512     /**
513      * Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes.
514      * @return calculated hashcode
515      */
516     @Override
hashCode()517     public int hashCode() {
518         int result = size;
519         result = 31 * result + Arrays.hashCode(keys);
520         result = 31 * result + Arrays.hashCode(vals);
521         return result;
522     }
523 
524     @Override
clone()525     public Attributes clone() {
526         Attributes clone;
527         try {
528             clone = (Attributes) super.clone();
529         } catch (CloneNotSupportedException e) {
530             throw new RuntimeException(e);
531         }
532         clone.size = size;
533         clone.keys = Arrays.copyOf(keys, size);
534         clone.vals = Arrays.copyOf(vals, size);
535         return clone;
536     }
537 
538     /**
539      * Internal method. Lowercases all (non-internal) keys.
540      */
normalize()541     public void normalize() {
542         for (int i = 0; i < size; i++) {
543             if (!isInternalKey(keys[i]))
544                 keys[i] = lowerCase(keys[i]);
545         }
546     }
547 
548     /**
549      * Internal method. Removes duplicate attribute by name. Settings for case sensitivity of key names.
550      * @param settings case sensitivity
551      * @return number of removed dupes
552      */
deduplicate(ParseSettings settings)553     public int deduplicate(ParseSettings settings) {
554         if (isEmpty())
555             return 0;
556         boolean preserve = settings.preserveAttributeCase();
557         int dupes = 0;
558         OUTER: for (int i = 0; i < keys.length; i++) {
559             for (int j = i + 1; j < keys.length; j++) {
560                 if (keys[j] == null)
561                     continue OUTER; // keys.length doesn't shrink when removing, so re-test
562                 if ((preserve && keys[i].equals(keys[j])) || (!preserve && keys[i].equalsIgnoreCase(keys[j]))) {
563                     dupes++;
564                     remove(j);
565                     j--;
566                 }
567             }
568         }
569         return dupes;
570     }
571 
572     private static class Dataset extends AbstractMap<String, String> {
573         private final Attributes attributes;
574 
Dataset(Attributes attributes)575         private Dataset(Attributes attributes) {
576             this.attributes = attributes;
577         }
578 
579         @Override
entrySet()580         public Set<Entry<String, String>> entrySet() {
581             return new EntrySet();
582         }
583 
584         @Override
put(String key, String value)585         public String put(String key, String value) {
586             String dataKey = dataKey(key);
587             String oldValue = attributes.hasKey(dataKey) ? attributes.get(dataKey) : null;
588             attributes.put(dataKey, value);
589             return oldValue;
590         }
591 
592         private class EntrySet extends AbstractSet<Map.Entry<String, String>> {
593 
594             @Override
iterator()595             public Iterator<Map.Entry<String, String>> iterator() {
596                 return new DatasetIterator();
597             }
598 
599             @Override
size()600             public int size() {
601                 int count = 0;
602                 Iterator iter = new DatasetIterator();
603                 while (iter.hasNext())
604                     count++;
605                 return count;
606             }
607         }
608 
609         private class DatasetIterator implements Iterator<Map.Entry<String, String>> {
610             private Iterator<Attribute> attrIter = attributes.iterator();
611             private Attribute attr;
hasNext()612             public boolean hasNext() {
613                 while (attrIter.hasNext()) {
614                     attr = attrIter.next();
615                     if (attr.isDataAttribute()) return true;
616                 }
617                 return false;
618             }
619 
next()620             public Entry<String, String> next() {
621                 return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue());
622             }
623 
remove()624             public void remove() {
625                 attributes.remove(attr.getKey());
626             }
627         }
628     }
629 
dataKey(String key)630     private static String dataKey(String key) {
631         return dataPrefix + key;
632     }
633 
internalKey(String key)634     static String internalKey(String key) {
635         return InternalPrefix + key;
636     }
637 
isInternalKey(String key)638     static boolean isInternalKey(String key) {
639         return key != null && key.length() > 1 && key.charAt(0) == InternalPrefix;
640     }
641 }
642