xref: /aosp_15_r20/external/jsoup/src/main/java/org/jsoup/parser/Tag.java (revision 6da8f8c4bc310ad659121b84dd089062417a2ce2)
1 package org.jsoup.parser;
2 
3 import org.jsoup.helper.Validate;
4 import org.jsoup.internal.Normalizer;
5 
6 import java.util.HashMap;
7 import java.util.Map;
8 import java.util.function.Consumer;
9 
10 /**
11  * Tag capabilities.
12  *
13  * @author Jonathan Hedley, [email protected]
14  */
15 public class Tag implements Cloneable {
16     private static final Map<String, Tag> Tags = new HashMap<>(); // map of known tags
17 
18     private String tagName;
19     private final String normalName; // always the lower case version of this tag, regardless of case preservation mode
20     private String namespace;
21     private boolean isBlock = true; // block
22     private boolean formatAsBlock = true; // should be formatted as a block
23     private boolean empty = false; // can hold nothing; e.g. img
24     private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
25     private boolean preserveWhitespace = false; // for pre, textarea, script etc
26     private boolean formList = false; // a control that appears in forms: input, textarea, output etc
27     private boolean formSubmit = false; // a control that can be submitted in a form: input etc
28 
Tag(String tagName, String namespace)29     private Tag(String tagName, String namespace) {
30         this.tagName = tagName;
31         normalName = Normalizer.lowerCase(tagName);
32         this.namespace = namespace;
33     }
34 
35     /**
36      * Get this tag's name.
37      *
38      * @return the tag's name
39      */
getName()40     public String getName() {
41         return tagName;
42     }
43 
44     /**
45      * Get this tag's normalized (lowercased) name.
46      * @return the tag's normal name.
47      */
normalName()48     public String normalName() {
49         return normalName;
50     }
51 
namespace()52     public String namespace() {
53         return namespace;
54     }
55 
56     /**
57      * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
58      * <p>
59      * Pre-defined tags (p, div etc) will be ==, but unknown tags are not registered and will only .equals().
60      * </p>
61      *
62      * @param tagName Name of tag, e.g. "p". Case-insensitive.
63      * @param namespace the namespace for the tag.
64      * @param settings used to control tag name sensitivity
65      * @return The tag, either defined or new generic.
66      */
valueOf(String tagName, String namespace, ParseSettings settings)67     public static Tag valueOf(String tagName, String namespace, ParseSettings settings) {
68         Validate.notEmpty(tagName);
69         Validate.notNull(namespace);
70         Tag tag = Tags.get(tagName);
71         if (tag != null && tag.namespace.equals(namespace))
72             return tag;
73 
74         tagName = settings.normalizeTag(tagName); // the name we'll use
75         Validate.notEmpty(tagName);
76         String normalName = Normalizer.lowerCase(tagName); // the lower-case name to get tag settings off
77         tag = Tags.get(normalName);
78         if (tag != null && tag.namespace.equals(namespace)) {
79             if (settings.preserveTagCase() && !tagName.equals(normalName)) {
80                 tag = tag.clone(); // get a new version vs the static one, so name update doesn't reset all
81                 tag.tagName = tagName;
82             }
83             return tag;
84         }
85 
86         // not defined: create default; go anywhere, do anything! (incl be inside a <p>)
87         tag = new Tag(tagName, namespace);
88         tag.isBlock = false;
89 
90         return tag;
91     }
92 
93     /**
94      * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
95      * <p>
96      * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
97      * </p>
98      *
99      * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
100      * @return The tag, either defined or new generic.
101      * @see #valueOf(String tagName, String namespace, ParseSettings settings)
102      */
valueOf(String tagName)103     public static Tag valueOf(String tagName) {
104         return valueOf(tagName, Parser.NamespaceHtml, ParseSettings.preserveCase);
105     }
106 
107     /**
108      * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
109      * <p>
110      * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
111      * </p>
112      *
113      * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
114      * @param settings used to control tag name sensitivity
115      * @return The tag, either defined or new generic.
116      * @see #valueOf(String tagName, String namespace, ParseSettings settings)
117      */
valueOf(String tagName, ParseSettings settings)118     public static Tag valueOf(String tagName, ParseSettings settings) {
119         return valueOf(tagName, Parser.NamespaceHtml, settings);
120     }
121 
122     /**
123      * Gets if this is a block tag.
124      *
125      * @return if block tag
126      */
isBlock()127     public boolean isBlock() {
128         return isBlock;
129     }
130 
131     /**
132      * Gets if this tag should be formatted as a block (or as inline)
133      *
134      * @return if should be formatted as block or inline
135      */
formatAsBlock()136     public boolean formatAsBlock() {
137         return formatAsBlock;
138     }
139 
140     /**
141      * Gets if this tag is an inline tag.
142      *
143      * @return if this tag is an inline tag.
144      */
isInline()145     public boolean isInline() {
146         return !isBlock;
147     }
148 
149     /**
150      * Get if this is an empty tag
151      *
152      * @return if this is an empty tag
153      */
isEmpty()154     public boolean isEmpty() {
155         return empty;
156     }
157 
158     /**
159      * Get if this tag is self-closing.
160      *
161      * @return if this tag should be output as self-closing.
162      */
isSelfClosing()163     public boolean isSelfClosing() {
164         return empty || selfClosing;
165     }
166 
167     /**
168      * Get if this is a pre-defined tag, or was auto created on parsing.
169      *
170      * @return if a known tag
171      */
isKnownTag()172     public boolean isKnownTag() {
173         return Tags.containsKey(tagName);
174     }
175 
176     /**
177      * Check if this tagname is a known tag.
178      *
179      * @param tagName name of tag
180      * @return if known HTML tag
181      */
isKnownTag(String tagName)182     public static boolean isKnownTag(String tagName) {
183         return Tags.containsKey(tagName);
184     }
185 
186     /**
187      * Get if this tag should preserve whitespace within child text nodes.
188      *
189      * @return if preserve whitespace
190      */
preserveWhitespace()191     public boolean preserveWhitespace() {
192         return preserveWhitespace;
193     }
194 
195     /**
196      * Get if this tag represents a control associated with a form. E.g. input, textarea, output
197      * @return if associated with a form
198      */
isFormListed()199     public boolean isFormListed() {
200         return formList;
201     }
202 
203     /**
204      * Get if this tag represents an element that should be submitted with a form. E.g. input, option
205      * @return if submittable with a form
206      */
isFormSubmittable()207     public boolean isFormSubmittable() {
208         return formSubmit;
209     }
210 
setSelfClosing()211     Tag setSelfClosing() {
212         selfClosing = true;
213         return this;
214     }
215 
216     @Override
equals(Object o)217     public boolean equals(Object o) {
218         if (this == o) return true;
219         if (!(o instanceof Tag)) return false;
220 
221         Tag tag = (Tag) o;
222 
223         if (!tagName.equals(tag.tagName)) return false;
224         if (empty != tag.empty) return false;
225         if (formatAsBlock != tag.formatAsBlock) return false;
226         if (isBlock != tag.isBlock) return false;
227         if (preserveWhitespace != tag.preserveWhitespace) return false;
228         if (selfClosing != tag.selfClosing) return false;
229         if (formList != tag.formList) return false;
230         return formSubmit == tag.formSubmit;
231     }
232 
233     @Override
hashCode()234     public int hashCode() {
235         int result = tagName.hashCode();
236         result = 31 * result + (isBlock ? 1 : 0);
237         result = 31 * result + (formatAsBlock ? 1 : 0);
238         result = 31 * result + (empty ? 1 : 0);
239         result = 31 * result + (selfClosing ? 1 : 0);
240         result = 31 * result + (preserveWhitespace ? 1 : 0);
241         result = 31 * result + (formList ? 1 : 0);
242         result = 31 * result + (formSubmit ? 1 : 0);
243         return result;
244     }
245 
246     @Override
toString()247     public String toString() {
248         return tagName;
249     }
250 
251     @Override
clone()252     protected Tag clone() {
253         try {
254             return (Tag) super.clone();
255         } catch (CloneNotSupportedException e) {
256             throw new RuntimeException(e);
257         }
258     }
259 
260     // internal static initialisers:
261     // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
262     private static final String[] blockTags = {
263             "html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
264             "noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
265             "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
266             "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
267             "td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
268             "svg", "math", "center", "template",
269             "dir", "applet", "marquee", "listing" // deprecated but still known / special handling
270     };
271     private static final String[] inlineTags = {
272             "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
273             "var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "rtc", "a", "img", "br", "wbr", "map", "q",
274             "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
275             "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
276             "summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
277             "data", "bdi", "s", "strike", "nobr",
278             "rb", // deprecated but still known / special handling
279             "text", // in SVG NS
280             "mi", "mo", "msup", "mn", "mtext" // in MathML NS, to ensure inline
281     };
282     private static final String[] emptyTags = {
283             "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
284             "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track"
285     };
286     // todo - rework this to format contents as inline; and update html emitter in Element. Same output, just neater.
287     private static final String[] formatAsInlineTags = {
288             "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
289             "ins", "del", "s"
290     };
291     private static final String[] preserveWhitespaceTags = {
292             "pre", "plaintext", "title", "textarea"
293             // script is not here as it is a data node, which always preserve whitespace
294     };
295     // todo: I think we just need submit tags, and can scrub listed
296     private static final String[] formListedTags = {
297             "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
298     };
299     private static final String[] formSubmitTags = {
300             "input", "keygen", "object", "select", "textarea"
301     };
302 
303     private static final Map<String, String[]> namespaces = new HashMap<>();
304     static {
namespaces.put(Parser.NamespaceMathml, new String[]{"math", "mi", "mo", "msup", "mn", "mtext"})305         namespaces.put(Parser.NamespaceMathml, new String[]{"math", "mi", "mo", "msup", "mn", "mtext"});
namespaces.put(Parser.NamespaceSvg, new String[]{"svg", "text"})306         namespaces.put(Parser.NamespaceSvg, new String[]{"svg", "text"});
307         // We don't need absolute coverage here as other cases will be inferred by the HtmlTreeBuilder
308     }
309 
setupTags(String[] tagNames, Consumer<Tag> tagModifier)310     private static void setupTags(String[] tagNames, Consumer<Tag> tagModifier) {
311         for (String tagName : tagNames) {
312             Tag tag = Tags.get(tagName);
313             if (tag == null) {
314                 tag = new Tag(tagName, Parser.NamespaceHtml);
315                 Tags.put(tag.tagName, tag);
316             }
317             tagModifier.accept(tag);
318         }
319     }
320 
321     static {
setupTags(blockTags, tag -> { tag.isBlock = true; tag.formatAsBlock = true; })322         setupTags(blockTags, tag -> {
323             tag.isBlock = true;
324             tag.formatAsBlock = true;
325         });
326 
setupTags(inlineTags, tag -> { tag.isBlock = false; tag.formatAsBlock = false; })327         setupTags(inlineTags, tag -> {
328             tag.isBlock = false;
329             tag.formatAsBlock = false;
330         });
331 
setupTags(emptyTags, tag -> tag.empty = true)332         setupTags(emptyTags, tag -> tag.empty = true);
setupTags(formatAsInlineTags, tag -> tag.formatAsBlock = false)333         setupTags(formatAsInlineTags, tag -> tag.formatAsBlock = false);
setupTags(preserveWhitespaceTags, tag -> tag.preserveWhitespace = true)334         setupTags(preserveWhitespaceTags, tag -> tag.preserveWhitespace = true);
setupTags(formListedTags, tag -> tag.formList = true)335         setupTags(formListedTags, tag -> tag.formList = true);
setupTags(formSubmitTags, tag -> tag.formSubmit = true)336         setupTags(formSubmitTags, tag -> tag.formSubmit = true);
337         for (Map.Entry<String, String[]> ns : namespaces.entrySet()) {
ns.getValue()338             setupTags(ns.getValue(), tag -> tag.namespace = ns.getKey());
339         }
340     }
341 }
342