1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"fmt"
10	"strings"
11	"unicode/utf8"
12)
13
14// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
15func htmlNospaceEscaper(args ...any) string {
16	s, t := stringify(args...)
17	if s == "" {
18		return filterFailsafe
19	}
20	if t == contentTypeHTML {
21		return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
22	}
23	return htmlReplacer(s, htmlNospaceReplacementTable, false)
24}
25
26// attrEscaper escapes for inclusion in quoted attribute values.
27func attrEscaper(args ...any) string {
28	s, t := stringify(args...)
29	if t == contentTypeHTML {
30		return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
31	}
32	return htmlReplacer(s, htmlReplacementTable, true)
33}
34
35// rcdataEscaper escapes for inclusion in an RCDATA element body.
36func rcdataEscaper(args ...any) string {
37	s, t := stringify(args...)
38	if t == contentTypeHTML {
39		return htmlReplacer(s, htmlNormReplacementTable, true)
40	}
41	return htmlReplacer(s, htmlReplacementTable, true)
42}
43
44// htmlEscaper escapes for inclusion in HTML text.
45func htmlEscaper(args ...any) string {
46	s, t := stringify(args...)
47	if t == contentTypeHTML {
48		return s
49	}
50	return htmlReplacer(s, htmlReplacementTable, true)
51}
52
53// htmlReplacementTable contains the runes that need to be escaped
54// inside a quoted attribute value or in a text node.
55var htmlReplacementTable = []string{
56	// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
57	// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
58	// CHARACTER character to the current attribute's value.
59	// "
60	// and similarly
61	// https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
62	0:    "\uFFFD",
63	'"':  """,
64	'&':  "&",
65	'\'': "'",
66	'+':  "+",
67	'<':  "&lt;",
68	'>':  "&gt;",
69}
70
71// htmlNormReplacementTable is like htmlReplacementTable but without '&' to
72// avoid over-encoding existing entities.
73var htmlNormReplacementTable = []string{
74	0:    "\uFFFD",
75	'"':  "&#34;",
76	'\'': "&#39;",
77	'+':  "&#43;",
78	'<':  "&lt;",
79	'>':  "&gt;",
80}
81
82// htmlNospaceReplacementTable contains the runes that need to be escaped
83// inside an unquoted attribute value.
84// The set of runes escaped is the union of the HTML specials and
85// those determined by running the JS below in browsers:
86// <div id=d></div>
87// <script>(function () {
88// var a = [], d = document.getElementById("d"), i, c, s;
89// for (i = 0; i < 0x10000; ++i) {
90//
91//	c = String.fromCharCode(i);
92//	d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
93//	s = d.getElementsByTagName("SPAN")[0];
94//	if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
95//
96// }
97// document.write(a.join(", "));
98// })()</script>
99var htmlNospaceReplacementTable = []string{
100	0:    "&#xfffd;",
101	'\t': "&#9;",
102	'\n': "&#10;",
103	'\v': "&#11;",
104	'\f': "&#12;",
105	'\r': "&#13;",
106	' ':  "&#32;",
107	'"':  "&#34;",
108	'&':  "&amp;",
109	'\'': "&#39;",
110	'+':  "&#43;",
111	'<':  "&lt;",
112	'=':  "&#61;",
113	'>':  "&gt;",
114	// A parse error in the attribute value (unquoted) and
115	// before attribute value states.
116	// Treated as a quoting character by IE.
117	'`': "&#96;",
118}
119
120// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
121// without '&' to avoid over-encoding existing entities.
122var htmlNospaceNormReplacementTable = []string{
123	0:    "&#xfffd;",
124	'\t': "&#9;",
125	'\n': "&#10;",
126	'\v': "&#11;",
127	'\f': "&#12;",
128	'\r': "&#13;",
129	' ':  "&#32;",
130	'"':  "&#34;",
131	'\'': "&#39;",
132	'+':  "&#43;",
133	'<':  "&lt;",
134	'=':  "&#61;",
135	'>':  "&gt;",
136	// A parse error in the attribute value (unquoted) and
137	// before attribute value states.
138	// Treated as a quoting character by IE.
139	'`': "&#96;",
140}
141
142// htmlReplacer returns s with runes replaced according to replacementTable
143// and when badRunes is true, certain bad runes are allowed through unescaped.
144func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
145	written, b := 0, new(strings.Builder)
146	r, w := rune(0), 0
147	for i := 0; i < len(s); i += w {
148		// Cannot use 'for range s' because we need to preserve the width
149		// of the runes in the input. If we see a decoding error, the input
150		// width will not be utf8.Runelen(r) and we will overrun the buffer.
151		r, w = utf8.DecodeRuneInString(s[i:])
152		if int(r) < len(replacementTable) {
153			if repl := replacementTable[r]; len(repl) != 0 {
154				if written == 0 {
155					b.Grow(len(s))
156				}
157				b.WriteString(s[written:i])
158				b.WriteString(repl)
159				written = i + w
160			}
161		} else if badRunes {
162			// No-op.
163			// IE does not allow these ranges in unquoted attrs.
164		} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
165			if written == 0 {
166				b.Grow(len(s))
167			}
168			fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
169			written = i + w
170		}
171	}
172	if written == 0 {
173		return s
174	}
175	b.WriteString(s[written:])
176	return b.String()
177}
178
179// stripTags takes a snippet of HTML and returns only the text content.
180// For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
181func stripTags(html string) string {
182	var b strings.Builder
183	s, c, i, allText := []byte(html), context{}, 0, true
184	// Using the transition funcs helps us avoid mangling
185	// `<div title="1>2">` or `I <3 Ponies!`.
186	for i != len(s) {
187		if c.delim == delimNone {
188			st := c.state
189			// Use RCDATA instead of parsing into JS or CSS styles.
190			if c.element != elementNone && !isInTag(st) {
191				st = stateRCDATA
192			}
193			d, nread := transitionFunc[st](c, s[i:])
194			i1 := i + nread
195			if c.state == stateText || c.state == stateRCDATA {
196				// Emit text up to the start of the tag or comment.
197				j := i1
198				if d.state != c.state {
199					for j1 := j - 1; j1 >= i; j1-- {
200						if s[j1] == '<' {
201							j = j1
202							break
203						}
204					}
205				}
206				b.Write(s[i:j])
207			} else {
208				allText = false
209			}
210			c, i = d, i1
211			continue
212		}
213		i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
214		if i1 < i {
215			break
216		}
217		if c.delim != delimSpaceOrTagEnd {
218			// Consume any quote.
219			i1++
220		}
221		c, i = context{state: stateTag, element: c.element}, i1
222	}
223	if allText {
224		return html
225	} else if c.state == stateText || c.state == stateRCDATA {
226		b.Write(s[i:])
227	}
228	return b.String()
229}
230
231// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
232// a known-safe HTML attribute.
233func htmlNameFilter(args ...any) string {
234	s, t := stringify(args...)
235	if t == contentTypeHTMLAttr {
236		return s
237	}
238	if len(s) == 0 {
239		// Avoid violation of structure preservation.
240		// <input checked {{.K}}={{.V}}>.
241		// Without this, if .K is empty then .V is the value of
242		// checked, but otherwise .V is the value of the attribute
243		// named .K.
244		return filterFailsafe
245	}
246	s = strings.ToLower(s)
247	if t := attrType(s); t != contentTypePlain {
248		// TODO: Split attr and element name part filters so we can recognize known attributes.
249		return filterFailsafe
250	}
251	for _, r := range s {
252		switch {
253		case '0' <= r && r <= '9':
254		case 'a' <= r && r <= 'z':
255		default:
256			return filterFailsafe
257		}
258	}
259	return s
260}
261
262// commentEscaper returns the empty string regardless of input.
263// Comment content does not correspond to any parsed structure or
264// human-readable content, so the simplest and most secure policy is to drop
265// content interpolated into comments.
266// This approach is equally valid whether or not static comment content is
267// removed from the template.
268func commentEscaper(args ...any) string {
269	return ""
270}
271