1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package template
6
7import (
8	"fmt"
9	"strings"
10)
11
12// urlFilter returns its input unless it contains an unsafe scheme in which
13// case it defangs the entire URL.
14//
15// Schemes that cause unintended side effects that are irreversible without user
16// interaction are considered unsafe. For example, clicking on a "javascript:"
17// link can immediately trigger JavaScript code execution.
18//
19// This filter conservatively assumes that all schemes other than the following
20// are unsafe:
21//   - http:   Navigates to a new website, and may open a new window or tab.
22//     These side effects can be reversed by navigating back to the
23//     previous website, or closing the window or tab. No irreversible
24//     changes will take place without further user interaction with
25//     the new website.
26//   - https:  Same as http.
27//   - mailto: Opens an email program and starts a new draft. This side effect
28//     is not irreversible until the user explicitly clicks send; it
29//     can be undone by closing the email program.
30//
31// To allow URLs containing other schemes to bypass this filter, developers must
32// explicitly indicate that such a URL is expected and safe by encapsulating it
33// in a template.URL value.
34func urlFilter(args ...any) string {
35	s, t := stringify(args...)
36	if t == contentTypeURL {
37		return s
38	}
39	if !isSafeURL(s) {
40		return "#" + filterFailsafe
41	}
42	return s
43}
44
45// isSafeURL is true if s is a relative URL or if URL has a protocol in
46// (http, https, mailto).
47func isSafeURL(s string) bool {
48	if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") {
49		if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
50			return false
51		}
52	}
53	return true
54}
55
56// urlEscaper produces an output that can be embedded in a URL query.
57// The output can be embedded in an HTML attribute without further escaping.
58func urlEscaper(args ...any) string {
59	return urlProcessor(false, args...)
60}
61
62// urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
63// string or parenthesis delimited url(...).
64// The normalizer does not encode all HTML specials. Specifically, it does not
65// encode '&' so correct embedding in an HTML attribute requires escaping of
66// '&' to '&'.
67func urlNormalizer(args ...any) string {
68	return urlProcessor(true, args...)
69}
70
71// urlProcessor normalizes (when norm is true) or escapes its input to produce
72// a valid hierarchical or opaque URL part.
73func urlProcessor(norm bool, args ...any) string {
74	s, t := stringify(args...)
75	if t == contentTypeURL {
76		norm = true
77	}
78	var b strings.Builder
79	if processURLOnto(s, norm, &b) {
80		return b.String()
81	}
82	return s
83}
84
85// processURLOnto appends a normalized URL corresponding to its input to b
86// and reports whether the appended content differs from s.
87func processURLOnto(s string, norm bool, b *strings.Builder) bool {
88	b.Grow(len(s) + 16)
89	written := 0
90	// The byte loop below assumes that all URLs use UTF-8 as the
91	// content-encoding. This is similar to the URI to IRI encoding scheme
92	// defined in section 3.1 of  RFC 3987, and behaves the same as the
93	// EcmaScript builtin encodeURIComponent.
94	// It should not cause any misencoding of URLs in pages with
95	// Content-type: text/html;charset=UTF-8.
96	for i, n := 0, len(s); i < n; i++ {
97		c := s[i]
98		switch c {
99		// Single quote and parens are sub-delims in RFC 3986, but we
100		// escape them so the output can be embedded in single
101		// quoted attributes and unquoted CSS url(...) constructs.
102		// Single quotes are reserved in URLs, but are only used in
103		// the obsolete "mark" rule in an appendix in RFC 3986
104		// so can be safely encoded.
105		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
106			if norm {
107				continue
108			}
109		// Unreserved according to RFC 3986 sec 2.3
110		// "For consistency, percent-encoded octets in the ranges of
111		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
112		// period (%2E), underscore (%5F), or tilde (%7E) should not be
113		// created by URI producers
114		case '-', '.', '_', '~':
115			continue
116		case '%':
117			// When normalizing do not re-encode valid escapes.
118			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
119				continue
120			}
121		default:
122			// Unreserved according to RFC 3986 sec 2.3
123			if 'a' <= c && c <= 'z' {
124				continue
125			}
126			if 'A' <= c && c <= 'Z' {
127				continue
128			}
129			if '0' <= c && c <= '9' {
130				continue
131			}
132		}
133		b.WriteString(s[written:i])
134		fmt.Fprintf(b, "%%%02x", c)
135		written = i + 1
136	}
137	b.WriteString(s[written:])
138	return written != 0
139}
140
141// Filters and normalizes srcset values which are comma separated
142// URLs followed by metadata.
143func srcsetFilterAndEscaper(args ...any) string {
144	s, t := stringify(args...)
145	switch t {
146	case contentTypeSrcset:
147		return s
148	case contentTypeURL:
149		// Normalizing gets rid of all HTML whitespace
150		// which separate the image URL from its metadata.
151		var b strings.Builder
152		if processURLOnto(s, true, &b) {
153			s = b.String()
154		}
155		// Additionally, commas separate one source from another.
156		return strings.ReplaceAll(s, ",", "%2c")
157	}
158
159	var b strings.Builder
160	written := 0
161	for i := 0; i < len(s); i++ {
162		if s[i] == ',' {
163			filterSrcsetElement(s, written, i, &b)
164			b.WriteString(",")
165			written = i + 1
166		}
167	}
168	filterSrcsetElement(s, written, len(s), &b)
169	return b.String()
170}
171
172// Derived from https://play.golang.org/p/Dhmj7FORT5
173const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
174
175// isHTMLSpace is true iff c is a whitespace character per
176// https://infra.spec.whatwg.org/#ascii-whitespace
177func isHTMLSpace(c byte) bool {
178	return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
179}
180
181func isHTMLSpaceOrASCIIAlnum(c byte) bool {
182	return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
183}
184
185func filterSrcsetElement(s string, left int, right int, b *strings.Builder) {
186	start := left
187	for start < right && isHTMLSpace(s[start]) {
188		start++
189	}
190	end := right
191	for i := start; i < right; i++ {
192		if isHTMLSpace(s[i]) {
193			end = i
194			break
195		}
196	}
197	if url := s[start:end]; isSafeURL(url) {
198		// If image metadata is only spaces or alnums then
199		// we don't need to URL normalize it.
200		metadataOk := true
201		for i := end; i < right; i++ {
202			if !isHTMLSpaceOrASCIIAlnum(s[i]) {
203				metadataOk = false
204				break
205			}
206		}
207		if metadataOk {
208			b.WriteString(s[left:start])
209			processURLOnto(url, true, b)
210			b.WriteString(s[end:right])
211			return
212		}
213	}
214	b.WriteString("#")
215	b.WriteString(filterFailsafe)
216}
217