1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package url parses URLs and implements query escaping.
6package url
7
8// See RFC 3986. This package generally follows RFC 3986, except where
9// it deviates for compatibility reasons. When sending changes, first
10// search old issues for history on decisions. Unit tests should also
11// contain references to issue numbers with details.
12
13import (
14	"errors"
15	"fmt"
16	"path"
17	"slices"
18	"strconv"
19	"strings"
20	_ "unsafe" // for linkname
21)
22
23// Error reports an error and the operation and URL that caused it.
24type Error struct {
25	Op  string
26	URL string
27	Err error
28}
29
30func (e *Error) Unwrap() error { return e.Err }
31func (e *Error) Error() string { return fmt.Sprintf("%s %q: %s", e.Op, e.URL, e.Err) }
32
33func (e *Error) Timeout() bool {
34	t, ok := e.Err.(interface {
35		Timeout() bool
36	})
37	return ok && t.Timeout()
38}
39
40func (e *Error) Temporary() bool {
41	t, ok := e.Err.(interface {
42		Temporary() bool
43	})
44	return ok && t.Temporary()
45}
46
47const upperhex = "0123456789ABCDEF"
48
49func ishex(c byte) bool {
50	switch {
51	case '0' <= c && c <= '9':
52		return true
53	case 'a' <= c && c <= 'f':
54		return true
55	case 'A' <= c && c <= 'F':
56		return true
57	}
58	return false
59}
60
61func unhex(c byte) byte {
62	switch {
63	case '0' <= c && c <= '9':
64		return c - '0'
65	case 'a' <= c && c <= 'f':
66		return c - 'a' + 10
67	case 'A' <= c && c <= 'F':
68		return c - 'A' + 10
69	}
70	return 0
71}
72
73type encoding int
74
75const (
76	encodePath encoding = 1 + iota
77	encodePathSegment
78	encodeHost
79	encodeZone
80	encodeUserPassword
81	encodeQueryComponent
82	encodeFragment
83)
84
85type EscapeError string
86
87func (e EscapeError) Error() string {
88	return "invalid URL escape " + strconv.Quote(string(e))
89}
90
91type InvalidHostError string
92
93func (e InvalidHostError) Error() string {
94	return "invalid character " + strconv.Quote(string(e)) + " in host name"
95}
96
97// Return true if the specified character should be escaped when
98// appearing in a URL string, according to RFC 3986.
99//
100// Please be informed that for now shouldEscape does not check all
101// reserved characters correctly. See golang.org/issue/5684.
102func shouldEscape(c byte, mode encoding) bool {
103	// §2.3 Unreserved characters (alphanum)
104	if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
105		return false
106	}
107
108	if mode == encodeHost || mode == encodeZone {
109		// §3.2.2 Host allows
110		//	sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
111		// as part of reg-name.
112		// We add : because we include :port as part of host.
113		// We add [ ] because we include [ipv6]:port as part of host.
114		// We add < > because they're the only characters left that
115		// we could possibly allow, and Parse will reject them if we
116		// escape them (because hosts can't use %-encoding for
117		// ASCII bytes).
118		switch c {
119		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
120			return false
121		}
122	}
123
124	switch c {
125	case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
126		return false
127
128	case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
129		// Different sections of the URL allow a few of
130		// the reserved characters to appear unescaped.
131		switch mode {
132		case encodePath: // §3.3
133			// The RFC allows : @ & = + $ but saves / ; , for assigning
134			// meaning to individual path segments. This package
135			// only manipulates the path as a whole, so we allow those
136			// last three as well. That leaves only ? to escape.
137			return c == '?'
138
139		case encodePathSegment: // §3.3
140			// The RFC allows : @ & = + $ but saves / ; , for assigning
141			// meaning to individual path segments.
142			return c == '/' || c == ';' || c == ',' || c == '?'
143
144		case encodeUserPassword: // §3.2.1
145			// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
146			// userinfo, so we must escape only '@', '/', and '?'.
147			// The parsing of userinfo treats ':' as special so we must escape
148			// that too.
149			return c == '@' || c == '/' || c == '?' || c == ':'
150
151		case encodeQueryComponent: // §3.4
152			// The RFC reserves (so we must escape) everything.
153			return true
154
155		case encodeFragment: // §4.1
156			// The RFC text is silent but the grammar allows
157			// everything, so escape nothing.
158			return false
159		}
160	}
161
162	if mode == encodeFragment {
163		// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
164		// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
165		// need to be escaped. To minimize potential breakage, we apply two restrictions:
166		// (1) we always escape sub-delims outside of the fragment, and (2) we always
167		// escape single quote to avoid breaking callers that had previously assumed that
168		// single quotes would be escaped. See issue #19917.
169		switch c {
170		case '!', '(', ')', '*':
171			return false
172		}
173	}
174
175	// Everything else must be escaped.
176	return true
177}
178
179// QueryUnescape does the inverse transformation of [QueryEscape],
180// converting each 3-byte encoded substring of the form "%AB" into the
181// hex-decoded byte 0xAB.
182// It returns an error if any % is not followed by two hexadecimal
183// digits.
184func QueryUnescape(s string) (string, error) {
185	return unescape(s, encodeQueryComponent)
186}
187
188// PathUnescape does the inverse transformation of [PathEscape],
189// converting each 3-byte encoded substring of the form "%AB" into the
190// hex-decoded byte 0xAB. It returns an error if any % is not followed
191// by two hexadecimal digits.
192//
193// PathUnescape is identical to [QueryUnescape] except that it does not
194// unescape '+' to ' ' (space).
195func PathUnescape(s string) (string, error) {
196	return unescape(s, encodePathSegment)
197}
198
199// unescape unescapes a string; the mode specifies
200// which section of the URL string is being unescaped.
201func unescape(s string, mode encoding) (string, error) {
202	// Count %, check that they're well-formed.
203	n := 0
204	hasPlus := false
205	for i := 0; i < len(s); {
206		switch s[i] {
207		case '%':
208			n++
209			if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
210				s = s[i:]
211				if len(s) > 3 {
212					s = s[:3]
213				}
214				return "", EscapeError(s)
215			}
216			// Per https://tools.ietf.org/html/rfc3986#page-21
217			// in the host component %-encoding can only be used
218			// for non-ASCII bytes.
219			// But https://tools.ietf.org/html/rfc6874#section-2
220			// introduces %25 being allowed to escape a percent sign
221			// in IPv6 scoped-address literals. Yay.
222			if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
223				return "", EscapeError(s[i : i+3])
224			}
225			if mode == encodeZone {
226				// RFC 6874 says basically "anything goes" for zone identifiers
227				// and that even non-ASCII can be redundantly escaped,
228				// but it seems prudent to restrict %-escaped bytes here to those
229				// that are valid host name bytes in their unescaped form.
230				// That is, you can use escaping in the zone identifier but not
231				// to introduce bytes you couldn't just write directly.
232				// But Windows puts spaces here! Yay.
233				v := unhex(s[i+1])<<4 | unhex(s[i+2])
234				if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) {
235					return "", EscapeError(s[i : i+3])
236				}
237			}
238			i += 3
239		case '+':
240			hasPlus = mode == encodeQueryComponent
241			i++
242		default:
243			if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
244				return "", InvalidHostError(s[i : i+1])
245			}
246			i++
247		}
248	}
249
250	if n == 0 && !hasPlus {
251		return s, nil
252	}
253
254	var t strings.Builder
255	t.Grow(len(s) - 2*n)
256	for i := 0; i < len(s); i++ {
257		switch s[i] {
258		case '%':
259			t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
260			i += 2
261		case '+':
262			if mode == encodeQueryComponent {
263				t.WriteByte(' ')
264			} else {
265				t.WriteByte('+')
266			}
267		default:
268			t.WriteByte(s[i])
269		}
270	}
271	return t.String(), nil
272}
273
274// QueryEscape escapes the string so it can be safely placed
275// inside a [URL] query.
276func QueryEscape(s string) string {
277	return escape(s, encodeQueryComponent)
278}
279
280// PathEscape escapes the string so it can be safely placed inside a [URL] path segment,
281// replacing special characters (including /) with %XX sequences as needed.
282func PathEscape(s string) string {
283	return escape(s, encodePathSegment)
284}
285
286func escape(s string, mode encoding) string {
287	spaceCount, hexCount := 0, 0
288	for i := 0; i < len(s); i++ {
289		c := s[i]
290		if shouldEscape(c, mode) {
291			if c == ' ' && mode == encodeQueryComponent {
292				spaceCount++
293			} else {
294				hexCount++
295			}
296		}
297	}
298
299	if spaceCount == 0 && hexCount == 0 {
300		return s
301	}
302
303	var buf [64]byte
304	var t []byte
305
306	required := len(s) + 2*hexCount
307	if required <= len(buf) {
308		t = buf[:required]
309	} else {
310		t = make([]byte, required)
311	}
312
313	if hexCount == 0 {
314		copy(t, s)
315		for i := 0; i < len(s); i++ {
316			if s[i] == ' ' {
317				t[i] = '+'
318			}
319		}
320		return string(t)
321	}
322
323	j := 0
324	for i := 0; i < len(s); i++ {
325		switch c := s[i]; {
326		case c == ' ' && mode == encodeQueryComponent:
327			t[j] = '+'
328			j++
329		case shouldEscape(c, mode):
330			t[j] = '%'
331			t[j+1] = upperhex[c>>4]
332			t[j+2] = upperhex[c&15]
333			j += 3
334		default:
335			t[j] = s[i]
336			j++
337		}
338	}
339	return string(t)
340}
341
342// A URL represents a parsed URL (technically, a URI reference).
343//
344// The general form represented is:
345//
346//	[scheme:][//[userinfo@]host][/]path[?query][#fragment]
347//
348// URLs that do not start with a slash after the scheme are interpreted as:
349//
350//	scheme:opaque[?query][#fragment]
351//
352// The Host field contains the host and port subcomponents of the URL.
353// When the port is present, it is separated from the host with a colon.
354// When the host is an IPv6 address, it must be enclosed in square brackets:
355// "[fe80::1]:80". The [net.JoinHostPort] function combines a host and port
356// into a string suitable for the Host field, adding square brackets to
357// the host when necessary.
358//
359// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
360// A consequence is that it is impossible to tell which slashes in the Path were
361// slashes in the raw URL and which were %2f. This distinction is rarely important,
362// but when it is, the code should use the [URL.EscapedPath] method, which preserves
363// the original encoding of Path.
364//
365// The RawPath field is an optional field which is only set when the default
366// encoding of Path is different from the escaped path. See the EscapedPath method
367// for more details.
368//
369// URL's String method uses the EscapedPath method to obtain the path.
370type URL struct {
371	Scheme      string
372	Opaque      string    // encoded opaque data
373	User        *Userinfo // username and password information
374	Host        string    // host or host:port (see Hostname and Port methods)
375	Path        string    // path (relative paths may omit leading slash)
376	RawPath     string    // encoded path hint (see EscapedPath method)
377	OmitHost    bool      // do not emit empty host (authority)
378	ForceQuery  bool      // append a query ('?') even if RawQuery is empty
379	RawQuery    string    // encoded query values, without '?'
380	Fragment    string    // fragment for references, without '#'
381	RawFragment string    // encoded fragment hint (see EscapedFragment method)
382}
383
384// User returns a [Userinfo] containing the provided username
385// and no password set.
386func User(username string) *Userinfo {
387	return &Userinfo{username, "", false}
388}
389
390// UserPassword returns a [Userinfo] containing the provided username
391// and password.
392//
393// This functionality should only be used with legacy web sites.
394// RFC 2396 warns that interpreting Userinfo this way
395// “is NOT RECOMMENDED, because the passing of authentication
396// information in clear text (such as URI) has proven to be a
397// security risk in almost every case where it has been used.”
398func UserPassword(username, password string) *Userinfo {
399	return &Userinfo{username, password, true}
400}
401
402// The Userinfo type is an immutable encapsulation of username and
403// password details for a [URL]. An existing Userinfo value is guaranteed
404// to have a username set (potentially empty, as allowed by RFC 2396),
405// and optionally a password.
406type Userinfo struct {
407	username    string
408	password    string
409	passwordSet bool
410}
411
412// Username returns the username.
413func (u *Userinfo) Username() string {
414	if u == nil {
415		return ""
416	}
417	return u.username
418}
419
420// Password returns the password in case it is set, and whether it is set.
421func (u *Userinfo) Password() (string, bool) {
422	if u == nil {
423		return "", false
424	}
425	return u.password, u.passwordSet
426}
427
428// String returns the encoded userinfo information in the standard form
429// of "username[:password]".
430func (u *Userinfo) String() string {
431	if u == nil {
432		return ""
433	}
434	s := escape(u.username, encodeUserPassword)
435	if u.passwordSet {
436		s += ":" + escape(u.password, encodeUserPassword)
437	}
438	return s
439}
440
441// Maybe rawURL is of the form scheme:path.
442// (Scheme must be [a-zA-Z][a-zA-Z0-9+.-]*)
443// If so, return scheme, path; else return "", rawURL.
444func getScheme(rawURL string) (scheme, path string, err error) {
445	for i := 0; i < len(rawURL); i++ {
446		c := rawURL[i]
447		switch {
448		case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
449		// do nothing
450		case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
451			if i == 0 {
452				return "", rawURL, nil
453			}
454		case c == ':':
455			if i == 0 {
456				return "", "", errors.New("missing protocol scheme")
457			}
458			return rawURL[:i], rawURL[i+1:], nil
459		default:
460			// we have encountered an invalid character,
461			// so there is no valid scheme
462			return "", rawURL, nil
463		}
464	}
465	return "", rawURL, nil
466}
467
468// Parse parses a raw url into a [URL] structure.
469//
470// The url may be relative (a path, without a host) or absolute
471// (starting with a scheme). Trying to parse a hostname and path
472// without a scheme is invalid but may not necessarily return an
473// error, due to parsing ambiguities.
474func Parse(rawURL string) (*URL, error) {
475	// Cut off #frag
476	u, frag, _ := strings.Cut(rawURL, "#")
477	url, err := parse(u, false)
478	if err != nil {
479		return nil, &Error{"parse", u, err}
480	}
481	if frag == "" {
482		return url, nil
483	}
484	if err = url.setFragment(frag); err != nil {
485		return nil, &Error{"parse", rawURL, err}
486	}
487	return url, nil
488}
489
490// ParseRequestURI parses a raw url into a [URL] structure. It assumes that
491// url was received in an HTTP request, so the url is interpreted
492// only as an absolute URI or an absolute path.
493// The string url is assumed not to have a #fragment suffix.
494// (Web browsers strip #fragment before sending the URL to a web server.)
495func ParseRequestURI(rawURL string) (*URL, error) {
496	url, err := parse(rawURL, true)
497	if err != nil {
498		return nil, &Error{"parse", rawURL, err}
499	}
500	return url, nil
501}
502
503// parse parses a URL from a string in one of two contexts. If
504// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
505// in which case only absolute URLs or path-absolute relative URLs are allowed.
506// If viaRequest is false, all forms of relative URLs are allowed.
507func parse(rawURL string, viaRequest bool) (*URL, error) {
508	var rest string
509	var err error
510
511	if stringContainsCTLByte(rawURL) {
512		return nil, errors.New("net/url: invalid control character in URL")
513	}
514
515	if rawURL == "" && viaRequest {
516		return nil, errors.New("empty url")
517	}
518	url := new(URL)
519
520	if rawURL == "*" {
521		url.Path = "*"
522		return url, nil
523	}
524
525	// Split off possible leading "http:", "mailto:", etc.
526	// Cannot contain escaped characters.
527	if url.Scheme, rest, err = getScheme(rawURL); err != nil {
528		return nil, err
529	}
530	url.Scheme = strings.ToLower(url.Scheme)
531
532	if strings.HasSuffix(rest, "?") && strings.Count(rest, "?") == 1 {
533		url.ForceQuery = true
534		rest = rest[:len(rest)-1]
535	} else {
536		rest, url.RawQuery, _ = strings.Cut(rest, "?")
537	}
538
539	if !strings.HasPrefix(rest, "/") {
540		if url.Scheme != "" {
541			// We consider rootless paths per RFC 3986 as opaque.
542			url.Opaque = rest
543			return url, nil
544		}
545		if viaRequest {
546			return nil, errors.New("invalid URI for request")
547		}
548
549		// Avoid confusion with malformed schemes, like cache_object:foo/bar.
550		// See golang.org/issue/16822.
551		//
552		// RFC 3986, §3.3:
553		// In addition, a URI reference (Section 4.1) may be a relative-path reference,
554		// in which case the first path segment cannot contain a colon (":") character.
555		if segment, _, _ := strings.Cut(rest, "/"); strings.Contains(segment, ":") {
556			// First path segment has colon. Not allowed in relative URL.
557			return nil, errors.New("first path segment in URL cannot contain colon")
558		}
559	}
560
561	if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
562		var authority string
563		authority, rest = rest[2:], ""
564		if i := strings.Index(authority, "/"); i >= 0 {
565			authority, rest = authority[:i], authority[i:]
566		}
567		url.User, url.Host, err = parseAuthority(authority)
568		if err != nil {
569			return nil, err
570		}
571	} else if url.Scheme != "" && strings.HasPrefix(rest, "/") {
572		// OmitHost is set to true when rawURL has an empty host (authority).
573		// See golang.org/issue/46059.
574		url.OmitHost = true
575	}
576
577	// Set Path and, optionally, RawPath.
578	// RawPath is a hint of the encoding of Path. We don't want to set it if
579	// the default escaping of Path is equivalent, to help make sure that people
580	// don't rely on it in general.
581	if err := url.setPath(rest); err != nil {
582		return nil, err
583	}
584	return url, nil
585}
586
587func parseAuthority(authority string) (user *Userinfo, host string, err error) {
588	i := strings.LastIndex(authority, "@")
589	if i < 0 {
590		host, err = parseHost(authority)
591	} else {
592		host, err = parseHost(authority[i+1:])
593	}
594	if err != nil {
595		return nil, "", err
596	}
597	if i < 0 {
598		return nil, host, nil
599	}
600	userinfo := authority[:i]
601	if !validUserinfo(userinfo) {
602		return nil, "", errors.New("net/url: invalid userinfo")
603	}
604	if !strings.Contains(userinfo, ":") {
605		if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
606			return nil, "", err
607		}
608		user = User(userinfo)
609	} else {
610		username, password, _ := strings.Cut(userinfo, ":")
611		if username, err = unescape(username, encodeUserPassword); err != nil {
612			return nil, "", err
613		}
614		if password, err = unescape(password, encodeUserPassword); err != nil {
615			return nil, "", err
616		}
617		user = UserPassword(username, password)
618	}
619	return user, host, nil
620}
621
622// parseHost parses host as an authority without user
623// information. That is, as host[:port].
624func parseHost(host string) (string, error) {
625	if strings.HasPrefix(host, "[") {
626		// Parse an IP-Literal in RFC 3986 and RFC 6874.
627		// E.g., "[fe80::1]", "[fe80::1%25en0]", "[fe80::1]:80".
628		i := strings.LastIndex(host, "]")
629		if i < 0 {
630			return "", errors.New("missing ']' in host")
631		}
632		colonPort := host[i+1:]
633		if !validOptionalPort(colonPort) {
634			return "", fmt.Errorf("invalid port %q after host", colonPort)
635		}
636
637		// RFC 6874 defines that %25 (%-encoded percent) introduces
638		// the zone identifier, and the zone identifier can use basically
639		// any %-encoding it likes. That's different from the host, which
640		// can only %-encode non-ASCII bytes.
641		// We do impose some restrictions on the zone, to avoid stupidity
642		// like newlines.
643		zone := strings.Index(host[:i], "%25")
644		if zone >= 0 {
645			host1, err := unescape(host[:zone], encodeHost)
646			if err != nil {
647				return "", err
648			}
649			host2, err := unescape(host[zone:i], encodeZone)
650			if err != nil {
651				return "", err
652			}
653			host3, err := unescape(host[i:], encodeHost)
654			if err != nil {
655				return "", err
656			}
657			return host1 + host2 + host3, nil
658		}
659	} else if i := strings.LastIndex(host, ":"); i != -1 {
660		colonPort := host[i:]
661		if !validOptionalPort(colonPort) {
662			return "", fmt.Errorf("invalid port %q after host", colonPort)
663		}
664	}
665
666	var err error
667	if host, err = unescape(host, encodeHost); err != nil {
668		return "", err
669	}
670	return host, nil
671}
672
673// setPath sets the Path and RawPath fields of the URL based on the provided
674// escaped path p. It maintains the invariant that RawPath is only specified
675// when it differs from the default encoding of the path.
676// For example:
677// - setPath("/foo/bar")   will set Path="/foo/bar" and RawPath=""
678// - setPath("/foo%2fbar") will set Path="/foo/bar" and RawPath="/foo%2fbar"
679// setPath will return an error only if the provided path contains an invalid
680// escaping.
681//
682// setPath should be an internal detail,
683// but widely used packages access it using linkname.
684// Notable members of the hall of shame include:
685//   - github.com/sagernet/sing
686//
687// Do not remove or change the type signature.
688// See go.dev/issue/67401.
689//
690//go:linkname badSetPath net/url.(*URL).setPath
691func (u *URL) setPath(p string) error {
692	path, err := unescape(p, encodePath)
693	if err != nil {
694		return err
695	}
696	u.Path = path
697	if escp := escape(path, encodePath); p == escp {
698		// Default encoding is fine.
699		u.RawPath = ""
700	} else {
701		u.RawPath = p
702	}
703	return nil
704}
705
706// for linkname because we cannot linkname methods directly
707func badSetPath(*URL, string) error
708
709// EscapedPath returns the escaped form of u.Path.
710// In general there are multiple possible escaped forms of any path.
711// EscapedPath returns u.RawPath when it is a valid escaping of u.Path.
712// Otherwise EscapedPath ignores u.RawPath and computes an escaped
713// form on its own.
714// The [URL.String] and [URL.RequestURI] methods use EscapedPath to construct
715// their results.
716// In general, code should call EscapedPath instead of
717// reading u.RawPath directly.
718func (u *URL) EscapedPath() string {
719	if u.RawPath != "" && validEncoded(u.RawPath, encodePath) {
720		p, err := unescape(u.RawPath, encodePath)
721		if err == nil && p == u.Path {
722			return u.RawPath
723		}
724	}
725	if u.Path == "*" {
726		return "*" // don't escape (Issue 11202)
727	}
728	return escape(u.Path, encodePath)
729}
730
731// validEncoded reports whether s is a valid encoded path or fragment,
732// according to mode.
733// It must not contain any bytes that require escaping during encoding.
734func validEncoded(s string, mode encoding) bool {
735	for i := 0; i < len(s); i++ {
736		// RFC 3986, Appendix A.
737		// pchar = unreserved / pct-encoded / sub-delims / ":" / "@".
738		// shouldEscape is not quite compliant with the RFC,
739		// so we check the sub-delims ourselves and let
740		// shouldEscape handle the others.
741		switch s[i] {
742		case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '@':
743			// ok
744		case '[', ']':
745			// ok - not specified in RFC 3986 but left alone by modern browsers
746		case '%':
747			// ok - percent encoded, will decode
748		default:
749			if shouldEscape(s[i], mode) {
750				return false
751			}
752		}
753	}
754	return true
755}
756
757// setFragment is like setPath but for Fragment/RawFragment.
758func (u *URL) setFragment(f string) error {
759	frag, err := unescape(f, encodeFragment)
760	if err != nil {
761		return err
762	}
763	u.Fragment = frag
764	if escf := escape(frag, encodeFragment); f == escf {
765		// Default encoding is fine.
766		u.RawFragment = ""
767	} else {
768		u.RawFragment = f
769	}
770	return nil
771}
772
773// EscapedFragment returns the escaped form of u.Fragment.
774// In general there are multiple possible escaped forms of any fragment.
775// EscapedFragment returns u.RawFragment when it is a valid escaping of u.Fragment.
776// Otherwise EscapedFragment ignores u.RawFragment and computes an escaped
777// form on its own.
778// The [URL.String] method uses EscapedFragment to construct its result.
779// In general, code should call EscapedFragment instead of
780// reading u.RawFragment directly.
781func (u *URL) EscapedFragment() string {
782	if u.RawFragment != "" && validEncoded(u.RawFragment, encodeFragment) {
783		f, err := unescape(u.RawFragment, encodeFragment)
784		if err == nil && f == u.Fragment {
785			return u.RawFragment
786		}
787	}
788	return escape(u.Fragment, encodeFragment)
789}
790
791// validOptionalPort reports whether port is either an empty string
792// or matches /^:\d*$/
793func validOptionalPort(port string) bool {
794	if port == "" {
795		return true
796	}
797	if port[0] != ':' {
798		return false
799	}
800	for _, b := range port[1:] {
801		if b < '0' || b > '9' {
802			return false
803		}
804	}
805	return true
806}
807
808// String reassembles the [URL] into a valid URL string.
809// The general form of the result is one of:
810//
811//	scheme:opaque?query#fragment
812//	scheme://userinfo@host/path?query#fragment
813//
814// If u.Opaque is non-empty, String uses the first form;
815// otherwise it uses the second form.
816// Any non-ASCII characters in host are escaped.
817// To obtain the path, String uses u.EscapedPath().
818//
819// In the second form, the following rules apply:
820//   - if u.Scheme is empty, scheme: is omitted.
821//   - if u.User is nil, userinfo@ is omitted.
822//   - if u.Host is empty, host/ is omitted.
823//   - if u.Scheme and u.Host are empty and u.User is nil,
824//     the entire scheme://userinfo@host/ is omitted.
825//   - if u.Host is non-empty and u.Path begins with a /,
826//     the form host/path does not add its own /.
827//   - if u.RawQuery is empty, ?query is omitted.
828//   - if u.Fragment is empty, #fragment is omitted.
829func (u *URL) String() string {
830	var buf strings.Builder
831
832	n := len(u.Scheme)
833	if u.Opaque != "" {
834		n += len(u.Opaque)
835	} else {
836		if !u.OmitHost && (u.Scheme != "" || u.Host != "" || u.User != nil) {
837			username := u.User.Username()
838			password, _ := u.User.Password()
839			n += len(username) + len(password) + len(u.Host)
840		}
841		n += len(u.Path)
842	}
843	n += len(u.RawQuery) + len(u.RawFragment)
844	n += len(":" + "//" + "//" + ":" + "@" + "/" + "./" + "?" + "#")
845	buf.Grow(n)
846
847	if u.Scheme != "" {
848		buf.WriteString(u.Scheme)
849		buf.WriteByte(':')
850	}
851	if u.Opaque != "" {
852		buf.WriteString(u.Opaque)
853	} else {
854		if u.Scheme != "" || u.Host != "" || u.User != nil {
855			if u.OmitHost && u.Host == "" && u.User == nil {
856				// omit empty host
857			} else {
858				if u.Host != "" || u.Path != "" || u.User != nil {
859					buf.WriteString("//")
860				}
861				if ui := u.User; ui != nil {
862					buf.WriteString(ui.String())
863					buf.WriteByte('@')
864				}
865				if h := u.Host; h != "" {
866					buf.WriteString(escape(h, encodeHost))
867				}
868			}
869		}
870		path := u.EscapedPath()
871		if path != "" && path[0] != '/' && u.Host != "" {
872			buf.WriteByte('/')
873		}
874		if buf.Len() == 0 {
875			// RFC 3986 §4.2
876			// A path segment that contains a colon character (e.g., "this:that")
877			// cannot be used as the first segment of a relative-path reference, as
878			// it would be mistaken for a scheme name. Such a segment must be
879			// preceded by a dot-segment (e.g., "./this:that") to make a relative-
880			// path reference.
881			if segment, _, _ := strings.Cut(path, "/"); strings.Contains(segment, ":") {
882				buf.WriteString("./")
883			}
884		}
885		buf.WriteString(path)
886	}
887	if u.ForceQuery || u.RawQuery != "" {
888		buf.WriteByte('?')
889		buf.WriteString(u.RawQuery)
890	}
891	if u.Fragment != "" {
892		buf.WriteByte('#')
893		buf.WriteString(u.EscapedFragment())
894	}
895	return buf.String()
896}
897
898// Redacted is like [URL.String] but replaces any password with "xxxxx".
899// Only the password in u.User is redacted.
900func (u *URL) Redacted() string {
901	if u == nil {
902		return ""
903	}
904
905	ru := *u
906	if _, has := ru.User.Password(); has {
907		ru.User = UserPassword(ru.User.Username(), "xxxxx")
908	}
909	return ru.String()
910}
911
912// Values maps a string key to a list of values.
913// It is typically used for query parameters and form values.
914// Unlike in the http.Header map, the keys in a Values map
915// are case-sensitive.
916type Values map[string][]string
917
918// Get gets the first value associated with the given key.
919// If there are no values associated with the key, Get returns
920// the empty string. To access multiple values, use the map
921// directly.
922func (v Values) Get(key string) string {
923	vs := v[key]
924	if len(vs) == 0 {
925		return ""
926	}
927	return vs[0]
928}
929
930// Set sets the key to value. It replaces any existing
931// values.
932func (v Values) Set(key, value string) {
933	v[key] = []string{value}
934}
935
936// Add adds the value to key. It appends to any existing
937// values associated with key.
938func (v Values) Add(key, value string) {
939	v[key] = append(v[key], value)
940}
941
942// Del deletes the values associated with key.
943func (v Values) Del(key string) {
944	delete(v, key)
945}
946
947// Has checks whether a given key is set.
948func (v Values) Has(key string) bool {
949	_, ok := v[key]
950	return ok
951}
952
953// ParseQuery parses the URL-encoded query string and returns
954// a map listing the values specified for each key.
955// ParseQuery always returns a non-nil map containing all the
956// valid query parameters found; err describes the first decoding error
957// encountered, if any.
958//
959// Query is expected to be a list of key=value settings separated by ampersands.
960// A setting without an equals sign is interpreted as a key set to an empty
961// value.
962// Settings containing a non-URL-encoded semicolon are considered invalid.
963func ParseQuery(query string) (Values, error) {
964	m := make(Values)
965	err := parseQuery(m, query)
966	return m, err
967}
968
969func parseQuery(m Values, query string) (err error) {
970	for query != "" {
971		var key string
972		key, query, _ = strings.Cut(query, "&")
973		if strings.Contains(key, ";") {
974			err = fmt.Errorf("invalid semicolon separator in query")
975			continue
976		}
977		if key == "" {
978			continue
979		}
980		key, value, _ := strings.Cut(key, "=")
981		key, err1 := QueryUnescape(key)
982		if err1 != nil {
983			if err == nil {
984				err = err1
985			}
986			continue
987		}
988		value, err1 = QueryUnescape(value)
989		if err1 != nil {
990			if err == nil {
991				err = err1
992			}
993			continue
994		}
995		m[key] = append(m[key], value)
996	}
997	return err
998}
999
1000// Encode encodes the values into “URL encoded” form
1001// ("bar=baz&foo=quux") sorted by key.
1002func (v Values) Encode() string {
1003	if len(v) == 0 {
1004		return ""
1005	}
1006	var buf strings.Builder
1007	keys := make([]string, 0, len(v))
1008	for k := range v {
1009		keys = append(keys, k)
1010	}
1011	slices.Sort(keys)
1012	for _, k := range keys {
1013		vs := v[k]
1014		keyEscaped := QueryEscape(k)
1015		for _, v := range vs {
1016			if buf.Len() > 0 {
1017				buf.WriteByte('&')
1018			}
1019			buf.WriteString(keyEscaped)
1020			buf.WriteByte('=')
1021			buf.WriteString(QueryEscape(v))
1022		}
1023	}
1024	return buf.String()
1025}
1026
1027// resolvePath applies special path segments from refs and applies
1028// them to base, per RFC 3986.
1029func resolvePath(base, ref string) string {
1030	var full string
1031	if ref == "" {
1032		full = base
1033	} else if ref[0] != '/' {
1034		i := strings.LastIndex(base, "/")
1035		full = base[:i+1] + ref
1036	} else {
1037		full = ref
1038	}
1039	if full == "" {
1040		return ""
1041	}
1042
1043	var (
1044		elem string
1045		dst  strings.Builder
1046	)
1047	first := true
1048	remaining := full
1049	// We want to return a leading '/', so write it now.
1050	dst.WriteByte('/')
1051	found := true
1052	for found {
1053		elem, remaining, found = strings.Cut(remaining, "/")
1054		if elem == "." {
1055			first = false
1056			// drop
1057			continue
1058		}
1059
1060		if elem == ".." {
1061			// Ignore the leading '/' we already wrote.
1062			str := dst.String()[1:]
1063			index := strings.LastIndexByte(str, '/')
1064
1065			dst.Reset()
1066			dst.WriteByte('/')
1067			if index == -1 {
1068				first = true
1069			} else {
1070				dst.WriteString(str[:index])
1071			}
1072		} else {
1073			if !first {
1074				dst.WriteByte('/')
1075			}
1076			dst.WriteString(elem)
1077			first = false
1078		}
1079	}
1080
1081	if elem == "." || elem == ".." {
1082		dst.WriteByte('/')
1083	}
1084
1085	// We wrote an initial '/', but we don't want two.
1086	r := dst.String()
1087	if len(r) > 1 && r[1] == '/' {
1088		r = r[1:]
1089	}
1090	return r
1091}
1092
1093// IsAbs reports whether the [URL] is absolute.
1094// Absolute means that it has a non-empty scheme.
1095func (u *URL) IsAbs() bool {
1096	return u.Scheme != ""
1097}
1098
1099// Parse parses a [URL] in the context of the receiver. The provided URL
1100// may be relative or absolute. Parse returns nil, err on parse
1101// failure, otherwise its return value is the same as [URL.ResolveReference].
1102func (u *URL) Parse(ref string) (*URL, error) {
1103	refURL, err := Parse(ref)
1104	if err != nil {
1105		return nil, err
1106	}
1107	return u.ResolveReference(refURL), nil
1108}
1109
1110// ResolveReference resolves a URI reference to an absolute URI from
1111// an absolute base URI u, per RFC 3986 Section 5.2. The URI reference
1112// may be relative or absolute. ResolveReference always returns a new
1113// [URL] instance, even if the returned URL is identical to either the
1114// base or reference. If ref is an absolute URL, then ResolveReference
1115// ignores base and returns a copy of ref.
1116func (u *URL) ResolveReference(ref *URL) *URL {
1117	url := *ref
1118	if ref.Scheme == "" {
1119		url.Scheme = u.Scheme
1120	}
1121	if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
1122		// The "absoluteURI" or "net_path" cases.
1123		// We can ignore the error from setPath since we know we provided a
1124		// validly-escaped path.
1125		url.setPath(resolvePath(ref.EscapedPath(), ""))
1126		return &url
1127	}
1128	if ref.Opaque != "" {
1129		url.User = nil
1130		url.Host = ""
1131		url.Path = ""
1132		return &url
1133	}
1134	if ref.Path == "" && !ref.ForceQuery && ref.RawQuery == "" {
1135		url.RawQuery = u.RawQuery
1136		if ref.Fragment == "" {
1137			url.Fragment = u.Fragment
1138			url.RawFragment = u.RawFragment
1139		}
1140	}
1141	if ref.Path == "" && u.Opaque != "" {
1142		url.Opaque = u.Opaque
1143		url.User = nil
1144		url.Host = ""
1145		url.Path = ""
1146		return &url
1147	}
1148	// The "abs_path" or "rel_path" cases.
1149	url.Host = u.Host
1150	url.User = u.User
1151	url.setPath(resolvePath(u.EscapedPath(), ref.EscapedPath()))
1152	return &url
1153}
1154
1155// Query parses RawQuery and returns the corresponding values.
1156// It silently discards malformed value pairs.
1157// To check errors use [ParseQuery].
1158func (u *URL) Query() Values {
1159	v, _ := ParseQuery(u.RawQuery)
1160	return v
1161}
1162
1163// RequestURI returns the encoded path?query or opaque?query
1164// string that would be used in an HTTP request for u.
1165func (u *URL) RequestURI() string {
1166	result := u.Opaque
1167	if result == "" {
1168		result = u.EscapedPath()
1169		if result == "" {
1170			result = "/"
1171		}
1172	} else {
1173		if strings.HasPrefix(result, "//") {
1174			result = u.Scheme + ":" + result
1175		}
1176	}
1177	if u.ForceQuery || u.RawQuery != "" {
1178		result += "?" + u.RawQuery
1179	}
1180	return result
1181}
1182
1183// Hostname returns u.Host, stripping any valid port number if present.
1184//
1185// If the result is enclosed in square brackets, as literal IPv6 addresses are,
1186// the square brackets are removed from the result.
1187func (u *URL) Hostname() string {
1188	host, _ := splitHostPort(u.Host)
1189	return host
1190}
1191
1192// Port returns the port part of u.Host, without the leading colon.
1193//
1194// If u.Host doesn't contain a valid numeric port, Port returns an empty string.
1195func (u *URL) Port() string {
1196	_, port := splitHostPort(u.Host)
1197	return port
1198}
1199
1200// splitHostPort separates host and port. If the port is not valid, it returns
1201// the entire input as host, and it doesn't check the validity of the host.
1202// Unlike net.SplitHostPort, but per RFC 3986, it requires ports to be numeric.
1203func splitHostPort(hostPort string) (host, port string) {
1204	host = hostPort
1205
1206	colon := strings.LastIndexByte(host, ':')
1207	if colon != -1 && validOptionalPort(host[colon:]) {
1208		host, port = host[:colon], host[colon+1:]
1209	}
1210
1211	if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
1212		host = host[1 : len(host)-1]
1213	}
1214
1215	return
1216}
1217
1218// Marshaling interface implementations.
1219// Would like to implement MarshalText/UnmarshalText but that will change the JSON representation of URLs.
1220
1221func (u *URL) MarshalBinary() (text []byte, err error) {
1222	return []byte(u.String()), nil
1223}
1224
1225func (u *URL) UnmarshalBinary(text []byte) error {
1226	u1, err := Parse(string(text))
1227	if err != nil {
1228		return err
1229	}
1230	*u = *u1
1231	return nil
1232}
1233
1234// JoinPath returns a new [URL] with the provided path elements joined to
1235// any existing path and the resulting path cleaned of any ./ or ../ elements.
1236// Any sequences of multiple / characters will be reduced to a single /.
1237func (u *URL) JoinPath(elem ...string) *URL {
1238	elem = append([]string{u.EscapedPath()}, elem...)
1239	var p string
1240	if !strings.HasPrefix(elem[0], "/") {
1241		// Return a relative path if u is relative,
1242		// but ensure that it contains no ../ elements.
1243		elem[0] = "/" + elem[0]
1244		p = path.Join(elem...)[1:]
1245	} else {
1246		p = path.Join(elem...)
1247	}
1248	// path.Join will remove any trailing slashes.
1249	// Preserve at least one.
1250	if strings.HasSuffix(elem[len(elem)-1], "/") && !strings.HasSuffix(p, "/") {
1251		p += "/"
1252	}
1253	url := *u
1254	url.setPath(p)
1255	return &url
1256}
1257
1258// validUserinfo reports whether s is a valid userinfo string per RFC 3986
1259// Section 3.2.1:
1260//
1261//	userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
1262//	unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
1263//	sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
1264//	              / "*" / "+" / "," / ";" / "="
1265//
1266// It doesn't validate pct-encoded. The caller does that via func unescape.
1267func validUserinfo(s string) bool {
1268	for _, r := range s {
1269		if 'A' <= r && r <= 'Z' {
1270			continue
1271		}
1272		if 'a' <= r && r <= 'z' {
1273			continue
1274		}
1275		if '0' <= r && r <= '9' {
1276			continue
1277		}
1278		switch r {
1279		case '-', '.', '_', ':', '~', '!', '$', '&', '\'',
1280			'(', ')', '*', '+', ',', ';', '=', '%', '@':
1281			continue
1282		default:
1283			return false
1284		}
1285	}
1286	return true
1287}
1288
1289// stringContainsCTLByte reports whether s contains any ASCII control character.
1290func stringContainsCTLByte(s string) bool {
1291	for i := 0; i < len(s); i++ {
1292		b := s[i]
1293		if b < ' ' || b == 0x7f {
1294			return true
1295		}
1296	}
1297	return false
1298}
1299
1300// JoinPath returns a [URL] string with the provided path elements joined to
1301// the existing path of base and the resulting path cleaned of any ./ or ../ elements.
1302func JoinPath(base string, elem ...string) (result string, err error) {
1303	url, err := Parse(base)
1304	if err != nil {
1305		return
1306	}
1307	result = url.JoinPath(elem...).String()
1308	return
1309}
1310