1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package httpguts
6
7import (
8	"net"
9	"strings"
10	"unicode/utf8"
11
12	"golang.org/x/net/idna"
13)
14
15var isTokenTable = [256]bool{
16	'!':  true,
17	'#':  true,
18	'$':  true,
19	'%':  true,
20	'&':  true,
21	'\'': true,
22	'*':  true,
23	'+':  true,
24	'-':  true,
25	'.':  true,
26	'0':  true,
27	'1':  true,
28	'2':  true,
29	'3':  true,
30	'4':  true,
31	'5':  true,
32	'6':  true,
33	'7':  true,
34	'8':  true,
35	'9':  true,
36	'A':  true,
37	'B':  true,
38	'C':  true,
39	'D':  true,
40	'E':  true,
41	'F':  true,
42	'G':  true,
43	'H':  true,
44	'I':  true,
45	'J':  true,
46	'K':  true,
47	'L':  true,
48	'M':  true,
49	'N':  true,
50	'O':  true,
51	'P':  true,
52	'Q':  true,
53	'R':  true,
54	'S':  true,
55	'T':  true,
56	'U':  true,
57	'W':  true,
58	'V':  true,
59	'X':  true,
60	'Y':  true,
61	'Z':  true,
62	'^':  true,
63	'_':  true,
64	'`':  true,
65	'a':  true,
66	'b':  true,
67	'c':  true,
68	'd':  true,
69	'e':  true,
70	'f':  true,
71	'g':  true,
72	'h':  true,
73	'i':  true,
74	'j':  true,
75	'k':  true,
76	'l':  true,
77	'm':  true,
78	'n':  true,
79	'o':  true,
80	'p':  true,
81	'q':  true,
82	'r':  true,
83	's':  true,
84	't':  true,
85	'u':  true,
86	'v':  true,
87	'w':  true,
88	'x':  true,
89	'y':  true,
90	'z':  true,
91	'|':  true,
92	'~':  true,
93}
94
95func IsTokenRune(r rune) bool {
96	return r < utf8.RuneSelf && isTokenTable[byte(r)]
97}
98
99// HeaderValuesContainsToken reports whether any string in values
100// contains the provided token, ASCII case-insensitively.
101func HeaderValuesContainsToken(values []string, token string) bool {
102	for _, v := range values {
103		if headerValueContainsToken(v, token) {
104			return true
105		}
106	}
107	return false
108}
109
110// isOWS reports whether b is an optional whitespace byte, as defined
111// by RFC 7230 section 3.2.3.
112func isOWS(b byte) bool { return b == ' ' || b == '\t' }
113
114// trimOWS returns x with all optional whitespace removes from the
115// beginning and end.
116func trimOWS(x string) string {
117	// TODO: consider using strings.Trim(x, " \t") instead,
118	// if and when it's fast enough. See issue 10292.
119	// But this ASCII-only code will probably always beat UTF-8
120	// aware code.
121	for len(x) > 0 && isOWS(x[0]) {
122		x = x[1:]
123	}
124	for len(x) > 0 && isOWS(x[len(x)-1]) {
125		x = x[:len(x)-1]
126	}
127	return x
128}
129
130// headerValueContainsToken reports whether v (assumed to be a
131// 0#element, in the ABNF extension described in RFC 7230 section 7)
132// contains token amongst its comma-separated tokens, ASCII
133// case-insensitively.
134func headerValueContainsToken(v string, token string) bool {
135	for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {
136		if tokenEqual(trimOWS(v[:comma]), token) {
137			return true
138		}
139		v = v[comma+1:]
140	}
141	return tokenEqual(trimOWS(v), token)
142}
143
144// lowerASCII returns the ASCII lowercase version of b.
145func lowerASCII(b byte) byte {
146	if 'A' <= b && b <= 'Z' {
147		return b + ('a' - 'A')
148	}
149	return b
150}
151
152// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
153func tokenEqual(t1, t2 string) bool {
154	if len(t1) != len(t2) {
155		return false
156	}
157	for i, b := range t1 {
158		if b >= utf8.RuneSelf {
159			// No UTF-8 or non-ASCII allowed in tokens.
160			return false
161		}
162		if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
163			return false
164		}
165	}
166	return true
167}
168
169// isLWS reports whether b is linear white space, according
170// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
171//
172//	LWS            = [CRLF] 1*( SP | HT )
173func isLWS(b byte) bool { return b == ' ' || b == '\t' }
174
175// isCTL reports whether b is a control byte, according
176// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
177//
178//	CTL            = <any US-ASCII control character
179//	                 (octets 0 - 31) and DEL (127)>
180func isCTL(b byte) bool {
181	const del = 0x7f // a CTL
182	return b < ' ' || b == del
183}
184
185// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
186// HTTP/2 imposes the additional restriction that uppercase ASCII
187// letters are not allowed.
188//
189// RFC 7230 says:
190//
191//	header-field   = field-name ":" OWS field-value OWS
192//	field-name     = token
193//	token          = 1*tchar
194//	tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
195//	        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
196func ValidHeaderFieldName(v string) bool {
197	if len(v) == 0 {
198		return false
199	}
200	for i := 0; i < len(v); i++ {
201		if !isTokenTable[v[i]] {
202			return false
203		}
204	}
205	return true
206}
207
208// ValidHostHeader reports whether h is a valid host header.
209func ValidHostHeader(h string) bool {
210	// The latest spec is actually this:
211	//
212	// http://tools.ietf.org/html/rfc7230#section-5.4
213	//     Host = uri-host [ ":" port ]
214	//
215	// Where uri-host is:
216	//     http://tools.ietf.org/html/rfc3986#section-3.2.2
217	//
218	// But we're going to be much more lenient for now and just
219	// search for any byte that's not a valid byte in any of those
220	// expressions.
221	for i := 0; i < len(h); i++ {
222		if !validHostByte[h[i]] {
223			return false
224		}
225	}
226	return true
227}
228
229// See the validHostHeader comment.
230var validHostByte = [256]bool{
231	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
232	'8': true, '9': true,
233
234	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
235	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
236	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
237	'y': true, 'z': true,
238
239	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
240	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
241	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
242	'Y': true, 'Z': true,
243
244	'!':  true, // sub-delims
245	'$':  true, // sub-delims
246	'%':  true, // pct-encoded (and used in IPv6 zones)
247	'&':  true, // sub-delims
248	'(':  true, // sub-delims
249	')':  true, // sub-delims
250	'*':  true, // sub-delims
251	'+':  true, // sub-delims
252	',':  true, // sub-delims
253	'-':  true, // unreserved
254	'.':  true, // unreserved
255	':':  true, // IPv6address + Host expression's optional port
256	';':  true, // sub-delims
257	'=':  true, // sub-delims
258	'[':  true,
259	'\'': true, // sub-delims
260	']':  true,
261	'_':  true, // unreserved
262	'~':  true, // unreserved
263}
264
265// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
266// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
267//
268//	message-header = field-name ":" [ field-value ]
269//	field-value    = *( field-content | LWS )
270//	field-content  = <the OCTETs making up the field-value
271//	                 and consisting of either *TEXT or combinations
272//	                 of token, separators, and quoted-string>
273//
274// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
275//
276//	TEXT           = <any OCTET except CTLs,
277//	                  but including LWS>
278//	LWS            = [CRLF] 1*( SP | HT )
279//	CTL            = <any US-ASCII control character
280//	                 (octets 0 - 31) and DEL (127)>
281//
282// RFC 7230 says:
283//
284//	field-value    = *( field-content / obs-fold )
285//	obj-fold       =  N/A to http2, and deprecated
286//	field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
287//	field-vchar    = VCHAR / obs-text
288//	obs-text       = %x80-FF
289//	VCHAR          = "any visible [USASCII] character"
290//
291// http2 further says: "Similarly, HTTP/2 allows header field values
292// that are not valid. While most of the values that can be encoded
293// will not alter header field parsing, carriage return (CR, ASCII
294// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
295// 0x0) might be exploited by an attacker if they are translated
296// verbatim. Any request or response that contains a character not
297// permitted in a header field value MUST be treated as malformed
298// (Section 8.1.2.6). Valid characters are defined by the
299// field-content ABNF rule in Section 3.2 of [RFC7230]."
300//
301// This function does not (yet?) properly handle the rejection of
302// strings that begin or end with SP or HTAB.
303func ValidHeaderFieldValue(v string) bool {
304	for i := 0; i < len(v); i++ {
305		b := v[i]
306		if isCTL(b) && !isLWS(b) {
307			return false
308		}
309	}
310	return true
311}
312
313func isASCII(s string) bool {
314	for i := 0; i < len(s); i++ {
315		if s[i] >= utf8.RuneSelf {
316			return false
317		}
318	}
319	return true
320}
321
322// PunycodeHostPort returns the IDNA Punycode version
323// of the provided "host" or "host:port" string.
324func PunycodeHostPort(v string) (string, error) {
325	if isASCII(v) {
326		return v, nil
327	}
328
329	host, port, err := net.SplitHostPort(v)
330	if err != nil {
331		// The input 'v' argument was just a "host" argument,
332		// without a port. This error should not be returned
333		// to the caller.
334		host = v
335		port = ""
336	}
337	host, err = idna.ToASCII(host)
338	if err != nil {
339		// Non-UTF-8? Not representable in Punycode, in any
340		// case.
341		return "", err
342	}
343	if port == "" {
344		return host, nil
345	}
346	return net.JoinHostPort(host, port), nil
347}
348