1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"bytes"
9	"fmt"
10	"strings"
11	"unicode"
12	"unicode/utf8"
13)
14
15// endsWithCSSKeyword reports whether b ends with an ident that
16// case-insensitively matches the lower-case kw.
17func endsWithCSSKeyword(b []byte, kw string) bool {
18	i := len(b) - len(kw)
19	if i < 0 {
20		// Too short.
21		return false
22	}
23	if i != 0 {
24		r, _ := utf8.DecodeLastRune(b[:i])
25		if isCSSNmchar(r) {
26			// Too long.
27			return false
28		}
29	}
30	// Many CSS keywords, such as "!important" can have characters encoded,
31	// but the URI production does not allow that according to
32	// https://www.w3.org/TR/css3-syntax/#TOK-URI
33	// This does not attempt to recognize encoded keywords. For example,
34	// given "\75\72\6c" and "url" this return false.
35	return string(bytes.ToLower(b[i:])) == kw
36}
37
38// isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
39func isCSSNmchar(r rune) bool {
40	// Based on the CSS3 nmchar production but ignores multi-rune escape
41	// sequences.
42	// https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
43	return 'a' <= r && r <= 'z' ||
44		'A' <= r && r <= 'Z' ||
45		'0' <= r && r <= '9' ||
46		r == '-' ||
47		r == '_' ||
48		// Non-ASCII cases below.
49		0x80 <= r && r <= 0xd7ff ||
50		0xe000 <= r && r <= 0xfffd ||
51		0x10000 <= r && r <= 0x10ffff
52}
53
54// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
55// If there is no change, it returns the input, otherwise it returns a slice
56// backed by a new array.
57// https://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
58func decodeCSS(s []byte) []byte {
59	i := bytes.IndexByte(s, '\\')
60	if i == -1 {
61		return s
62	}
63	// The UTF-8 sequence for a codepoint is never longer than 1 + the
64	// number hex digits need to represent that codepoint, so len(s) is an
65	// upper bound on the output length.
66	b := make([]byte, 0, len(s))
67	for len(s) != 0 {
68		i := bytes.IndexByte(s, '\\')
69		if i == -1 {
70			i = len(s)
71		}
72		b, s = append(b, s[:i]...), s[i:]
73		if len(s) < 2 {
74			break
75		}
76		// https://www.w3.org/TR/css3-syntax/#SUBTOK-escape
77		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
78		if isHex(s[1]) {
79			// https://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
80			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
81			j := 2
82			for j < len(s) && j < 7 && isHex(s[j]) {
83				j++
84			}
85			r := hexDecode(s[1:j])
86			if r > unicode.MaxRune {
87				r, j = r/16, j-1
88			}
89			n := utf8.EncodeRune(b[len(b):cap(b)], r)
90			// The optional space at the end allows a hex
91			// sequence to be followed by a literal hex.
92			// string(decodeCSS([]byte(`\A B`))) == "\nB"
93			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
94		} else {
95			// `\\` decodes to `\` and `\"` to `"`.
96			_, n := utf8.DecodeRune(s[1:])
97			b, s = append(b, s[1:1+n]...), s[1+n:]
98		}
99	}
100	return b
101}
102
103// isHex reports whether the given character is a hex digit.
104func isHex(c byte) bool {
105	return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
106}
107
108// hexDecode decodes a short hex digit sequence: "10" -> 16.
109func hexDecode(s []byte) rune {
110	n := '\x00'
111	for _, c := range s {
112		n <<= 4
113		switch {
114		case '0' <= c && c <= '9':
115			n |= rune(c - '0')
116		case 'a' <= c && c <= 'f':
117			n |= rune(c-'a') + 10
118		case 'A' <= c && c <= 'F':
119			n |= rune(c-'A') + 10
120		default:
121			panic(fmt.Sprintf("Bad hex digit in %q", s))
122		}
123	}
124	return n
125}
126
127// skipCSSSpace returns a suffix of c, skipping over a single space.
128func skipCSSSpace(c []byte) []byte {
129	if len(c) == 0 {
130		return c
131	}
132	// wc ::= #x9 | #xA | #xC | #xD | #x20
133	switch c[0] {
134	case '\t', '\n', '\f', ' ':
135		return c[1:]
136	case '\r':
137		// This differs from CSS3's wc production because it contains a
138		// probable spec error whereby wc contains all the single byte
139		// sequences in nl (newline) but not CRLF.
140		if len(c) >= 2 && c[1] == '\n' {
141			return c[2:]
142		}
143		return c[1:]
144	}
145	return c
146}
147
148// isCSSSpace reports whether b is a CSS space char as defined in wc.
149func isCSSSpace(b byte) bool {
150	switch b {
151	case '\t', '\n', '\f', '\r', ' ':
152		return true
153	}
154	return false
155}
156
157// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
158func cssEscaper(args ...any) string {
159	s, _ := stringify(args...)
160	var b strings.Builder
161	r, w, written := rune(0), 0, 0
162	for i := 0; i < len(s); i += w {
163		// See comment in htmlEscaper.
164		r, w = utf8.DecodeRuneInString(s[i:])
165		var repl string
166		switch {
167		case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
168			repl = cssReplacementTable[r]
169		default:
170			continue
171		}
172		if written == 0 {
173			b.Grow(len(s))
174		}
175		b.WriteString(s[written:i])
176		b.WriteString(repl)
177		written = i + w
178		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
179			b.WriteByte(' ')
180		}
181	}
182	if written == 0 {
183		return s
184	}
185	b.WriteString(s[written:])
186	return b.String()
187}
188
189var cssReplacementTable = []string{
190	0:    `\0`,
191	'\t': `\9`,
192	'\n': `\a`,
193	'\f': `\c`,
194	'\r': `\d`,
195	// Encode HTML specials as hex so the output can be embedded
196	// in HTML attributes without further encoding.
197	'"':  `\22`,
198	'&':  `\26`,
199	'\'': `\27`,
200	'(':  `\28`,
201	')':  `\29`,
202	'+':  `\2b`,
203	'/':  `\2f`,
204	':':  `\3a`,
205	';':  `\3b`,
206	'<':  `\3c`,
207	'>':  `\3e`,
208	'\\': `\\`,
209	'{':  `\7b`,
210	'}':  `\7d`,
211}
212
213var expressionBytes = []byte("expression")
214var mozBindingBytes = []byte("mozbinding")
215
216// cssValueFilter allows innocuous CSS values in the output including CSS
217// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
218// (inherit, blue), and colors (#888).
219// It filters out unsafe values, such as those that affect token boundaries,
220// and anything that might execute scripts.
221func cssValueFilter(args ...any) string {
222	s, t := stringify(args...)
223	if t == contentTypeCSS {
224		return s
225	}
226	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
227
228	// CSS3 error handling is specified as honoring string boundaries per
229	// https://www.w3.org/TR/css3-syntax/#error-handling :
230	//     Malformed declarations. User agents must handle unexpected
231	//     tokens encountered while parsing a declaration by reading until
232	//     the end of the declaration, while observing the rules for
233	//     matching pairs of (), [], {}, "", and '', and correctly handling
234	//     escapes. For example, a malformed declaration may be missing a
235	//     property, colon (:) or value.
236	// So we need to make sure that values do not have mismatched bracket
237	// or quote characters to prevent the browser from restarting parsing
238	// inside a string that might embed JavaScript source.
239	for i, c := range b {
240		switch c {
241		case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}', '<', '>':
242			return filterFailsafe
243		case '-':
244			// Disallow <!-- or -->.
245			// -- should not appear in valid identifiers.
246			if i != 0 && b[i-1] == '-' {
247				return filterFailsafe
248			}
249		default:
250			if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
251				id = append(id, c)
252			}
253		}
254	}
255	id = bytes.ToLower(id)
256	if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
257		return filterFailsafe
258	}
259	return string(b)
260}
261