1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"fmt"
9	"text/template/parse"
10)
11
12// context describes the state an HTML parser must be in when it reaches the
13// portion of HTML produced by evaluating a particular template node.
14//
15// The zero value of type context is the start context for a template that
16// produces an HTML fragment as defined at
17// https://www.w3.org/TR/html5/syntax.html#the-end
18// where the context element is null.
19type context struct {
20	state   state
21	delim   delim
22	urlPart urlPart
23	jsCtx   jsCtx
24	// jsBraceDepth contains the current depth, for each JS template literal
25	// string interpolation expression, of braces we've seen. This is used to
26	// determine if the next } will close a JS template literal string
27	// interpolation expression or not.
28	jsBraceDepth []int
29	attr         attr
30	element      element
31	n            parse.Node // for range break/continue
32	err          *Error
33}
34
35func (c context) String() string {
36	var err error
37	if c.err != nil {
38		err = c.err
39	}
40	return fmt.Sprintf("{%v %v %v %v %v %v %v}", c.state, c.delim, c.urlPart, c.jsCtx, c.attr, c.element, err)
41}
42
43// eq reports whether two contexts are equal.
44func (c context) eq(d context) bool {
45	return c.state == d.state &&
46		c.delim == d.delim &&
47		c.urlPart == d.urlPart &&
48		c.jsCtx == d.jsCtx &&
49		c.attr == d.attr &&
50		c.element == d.element &&
51		c.err == d.err
52}
53
54// mangle produces an identifier that includes a suffix that distinguishes it
55// from template names mangled with different contexts.
56func (c context) mangle(templateName string) string {
57	// The mangled name for the default context is the input templateName.
58	if c.state == stateText {
59		return templateName
60	}
61	s := templateName + "$htmltemplate_" + c.state.String()
62	if c.delim != delimNone {
63		s += "_" + c.delim.String()
64	}
65	if c.urlPart != urlPartNone {
66		s += "_" + c.urlPart.String()
67	}
68	if c.jsCtx != jsCtxRegexp {
69		s += "_" + c.jsCtx.String()
70	}
71	if c.attr != attrNone {
72		s += "_" + c.attr.String()
73	}
74	if c.element != elementNone {
75		s += "_" + c.element.String()
76	}
77	return s
78}
79
80// state describes a high-level HTML parser state.
81//
82// It bounds the top of the element stack, and by extension the HTML insertion
83// mode, but also contains state that does not correspond to anything in the
84// HTML5 parsing algorithm because a single token production in the HTML
85// grammar may contain embedded actions in a template. For instance, the quoted
86// HTML attribute produced by
87//
88//	<div title="Hello {{.World}}">
89//
90// is a single token in HTML's grammar but in a template spans several nodes.
91type state uint8
92
93//go:generate stringer -type state
94
95const (
96	// stateText is parsed character data. An HTML parser is in
97	// this state when its parse position is outside an HTML tag,
98	// directive, comment, and special element body.
99	stateText state = iota
100	// stateTag occurs before an HTML attribute or the end of a tag.
101	stateTag
102	// stateAttrName occurs inside an attribute name.
103	// It occurs between the ^'s in ` ^name^ = value`.
104	stateAttrName
105	// stateAfterName occurs after an attr name has ended but before any
106	// equals sign. It occurs between the ^'s in ` name^ ^= value`.
107	stateAfterName
108	// stateBeforeValue occurs after the equals sign but before the value.
109	// It occurs between the ^'s in ` name =^ ^value`.
110	stateBeforeValue
111	// stateHTMLCmt occurs inside an <!-- HTML comment -->.
112	stateHTMLCmt
113	// stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
114	// as described at https://www.w3.org/TR/html5/syntax.html#elements-0
115	stateRCDATA
116	// stateAttr occurs inside an HTML attribute whose content is text.
117	stateAttr
118	// stateURL occurs inside an HTML attribute whose content is a URL.
119	stateURL
120	// stateSrcset occurs inside an HTML srcset attribute.
121	stateSrcset
122	// stateJS occurs inside an event handler or script element.
123	stateJS
124	// stateJSDqStr occurs inside a JavaScript double quoted string.
125	stateJSDqStr
126	// stateJSSqStr occurs inside a JavaScript single quoted string.
127	stateJSSqStr
128	// stateJSTmplLit occurs inside a JavaScript back quoted string.
129	stateJSTmplLit
130	// stateJSRegexp occurs inside a JavaScript regexp literal.
131	stateJSRegexp
132	// stateJSBlockCmt occurs inside a JavaScript /* block comment */.
133	stateJSBlockCmt
134	// stateJSLineCmt occurs inside a JavaScript // line comment.
135	stateJSLineCmt
136	// stateJSHTMLOpenCmt occurs inside a JavaScript <!-- HTML-like comment.
137	stateJSHTMLOpenCmt
138	// stateJSHTMLCloseCmt occurs inside a JavaScript --> HTML-like comment.
139	stateJSHTMLCloseCmt
140	// stateCSS occurs inside a <style> element or style attribute.
141	stateCSS
142	// stateCSSDqStr occurs inside a CSS double quoted string.
143	stateCSSDqStr
144	// stateCSSSqStr occurs inside a CSS single quoted string.
145	stateCSSSqStr
146	// stateCSSDqURL occurs inside a CSS double quoted url("...").
147	stateCSSDqURL
148	// stateCSSSqURL occurs inside a CSS single quoted url('...').
149	stateCSSSqURL
150	// stateCSSURL occurs inside a CSS unquoted url(...).
151	stateCSSURL
152	// stateCSSBlockCmt occurs inside a CSS /* block comment */.
153	stateCSSBlockCmt
154	// stateCSSLineCmt occurs inside a CSS // line comment.
155	stateCSSLineCmt
156	// stateError is an infectious error state outside any valid
157	// HTML/CSS/JS construct.
158	stateError
159	// stateDead marks unreachable code after a {{break}} or {{continue}}.
160	stateDead
161)
162
163// isComment is true for any state that contains content meant for template
164// authors & maintainers, not for end-users or machines.
165func isComment(s state) bool {
166	switch s {
167	case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt, stateCSSBlockCmt, stateCSSLineCmt:
168		return true
169	}
170	return false
171}
172
173// isInTag return whether s occurs solely inside an HTML tag.
174func isInTag(s state) bool {
175	switch s {
176	case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr:
177		return true
178	}
179	return false
180}
181
182// isInScriptLiteral returns true if s is one of the literal states within a
183// <script> tag, and as such occurrences of "<!--", "<script", and "</script"
184// need to be treated specially.
185func isInScriptLiteral(s state) bool {
186	// Ignore the comment states (stateJSBlockCmt, stateJSLineCmt,
187	// stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already
188	// omitted from the output.
189	switch s {
190	case stateJSDqStr, stateJSSqStr, stateJSTmplLit, stateJSRegexp:
191		return true
192	}
193	return false
194}
195
196// delim is the delimiter that will end the current HTML attribute.
197type delim uint8
198
199//go:generate stringer -type delim
200
201const (
202	// delimNone occurs outside any attribute.
203	delimNone delim = iota
204	// delimDoubleQuote occurs when a double quote (") closes the attribute.
205	delimDoubleQuote
206	// delimSingleQuote occurs when a single quote (') closes the attribute.
207	delimSingleQuote
208	// delimSpaceOrTagEnd occurs when a space or right angle bracket (>)
209	// closes the attribute.
210	delimSpaceOrTagEnd
211)
212
213// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
214// encoding strategies.
215type urlPart uint8
216
217//go:generate stringer -type urlPart
218
219const (
220	// urlPartNone occurs when not in a URL, or possibly at the start:
221	// ^ in "^http://auth/path?k=v#frag".
222	urlPartNone urlPart = iota
223	// urlPartPreQuery occurs in the scheme, authority, or path; between the
224	// ^s in "h^ttp://auth/path^?k=v#frag".
225	urlPartPreQuery
226	// urlPartQueryOrFrag occurs in the query portion between the ^s in
227	// "http://auth/path?^k=v#frag^".
228	urlPartQueryOrFrag
229	// urlPartUnknown occurs due to joining of contexts both before and
230	// after the query separator.
231	urlPartUnknown
232)
233
234// jsCtx determines whether a '/' starts a regular expression literal or a
235// division operator.
236type jsCtx uint8
237
238//go:generate stringer -type jsCtx
239
240const (
241	// jsCtxRegexp occurs where a '/' would start a regexp literal.
242	jsCtxRegexp jsCtx = iota
243	// jsCtxDivOp occurs where a '/' would start a division operator.
244	jsCtxDivOp
245	// jsCtxUnknown occurs where a '/' is ambiguous due to context joining.
246	jsCtxUnknown
247)
248
249// element identifies the HTML element when inside a start tag or special body.
250// Certain HTML element (for example <script> and <style>) have bodies that are
251// treated differently from stateText so the element type is necessary to
252// transition into the correct context at the end of a tag and to identify the
253// end delimiter for the body.
254type element uint8
255
256//go:generate stringer -type element
257
258const (
259	// elementNone occurs outside a special tag or special element body.
260	elementNone element = iota
261	// elementScript corresponds to the raw text <script> element
262	// with JS MIME type or no type attribute.
263	elementScript
264	// elementStyle corresponds to the raw text <style> element.
265	elementStyle
266	// elementTextarea corresponds to the RCDATA <textarea> element.
267	elementTextarea
268	// elementTitle corresponds to the RCDATA <title> element.
269	elementTitle
270)
271
272//go:generate stringer -type attr
273
274// attr identifies the current HTML attribute when inside the attribute,
275// that is, starting from stateAttrName until stateTag/stateText (exclusive).
276type attr uint8
277
278const (
279	// attrNone corresponds to a normal attribute or no attribute.
280	attrNone attr = iota
281	// attrScript corresponds to an event handler attribute.
282	attrScript
283	// attrScriptType corresponds to the type attribute in script HTML element
284	attrScriptType
285	// attrStyle corresponds to the style attribute whose value is CSS.
286	attrStyle
287	// attrURL corresponds to an attribute whose value is a URL.
288	attrURL
289	// attrSrcset corresponds to a srcset attribute.
290	attrSrcset
291)
292