1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package httpproxy provides support for HTTP proxy determination
6// based on environment variables, as provided by net/http's
7// ProxyFromEnvironment function.
8//
9// The API is not subject to the Go 1 compatibility promise and may change at
10// any time.
11package httpproxy
12
13import (
14	"errors"
15	"fmt"
16	"net"
17	"net/url"
18	"os"
19	"strings"
20	"unicode/utf8"
21
22	"golang.org/x/net/idna"
23)
24
25// Config holds configuration for HTTP proxy settings. See
26// FromEnvironment for details.
27type Config struct {
28	// HTTPProxy represents the value of the HTTP_PROXY or
29	// http_proxy environment variable. It will be used as the proxy
30	// URL for HTTP requests unless overridden by NoProxy.
31	HTTPProxy string
32
33	// HTTPSProxy represents the HTTPS_PROXY or https_proxy
34	// environment variable. It will be used as the proxy URL for
35	// HTTPS requests unless overridden by NoProxy.
36	HTTPSProxy string
37
38	// NoProxy represents the NO_PROXY or no_proxy environment
39	// variable. It specifies a string that contains comma-separated values
40	// specifying hosts that should be excluded from proxying. Each value is
41	// represented by an IP address prefix (1.2.3.4), an IP address prefix in
42	// CIDR notation (1.2.3.4/8), a domain name, or a special DNS label (*).
43	// An IP address prefix and domain name can also include a literal port
44	// number (1.2.3.4:80).
45	// A domain name matches that name and all subdomains. A domain name with
46	// a leading "." matches subdomains only. For example "foo.com" matches
47	// "foo.com" and "bar.foo.com"; ".y.com" matches "x.y.com" but not "y.com".
48	// A single asterisk (*) indicates that no proxying should be done.
49	// A best effort is made to parse the string and errors are
50	// ignored.
51	NoProxy string
52
53	// CGI holds whether the current process is running
54	// as a CGI handler (FromEnvironment infers this from the
55	// presence of a REQUEST_METHOD environment variable).
56	// When this is set, ProxyForURL will return an error
57	// when HTTPProxy applies, because a client could be
58	// setting HTTP_PROXY maliciously. See https://golang.org/s/cgihttpproxy.
59	CGI bool
60}
61
62// config holds the parsed configuration for HTTP proxy settings.
63type config struct {
64	// Config represents the original configuration as defined above.
65	Config
66
67	// httpsProxy is the parsed URL of the HTTPSProxy if defined.
68	httpsProxy *url.URL
69
70	// httpProxy is the parsed URL of the HTTPProxy if defined.
71	httpProxy *url.URL
72
73	// ipMatchers represent all values in the NoProxy that are IP address
74	// prefixes or an IP address in CIDR notation.
75	ipMatchers []matcher
76
77	// domainMatchers represent all values in the NoProxy that are a domain
78	// name or hostname & domain name
79	domainMatchers []matcher
80}
81
82// FromEnvironment returns a Config instance populated from the
83// environment variables HTTP_PROXY, HTTPS_PROXY and NO_PROXY (or the
84// lowercase versions thereof).
85//
86// The environment values may be either a complete URL or a
87// "host[:port]", in which case the "http" scheme is assumed. An error
88// is returned if the value is a different form.
89func FromEnvironment() *Config {
90	return &Config{
91		HTTPProxy:  getEnvAny("HTTP_PROXY", "http_proxy"),
92		HTTPSProxy: getEnvAny("HTTPS_PROXY", "https_proxy"),
93		NoProxy:    getEnvAny("NO_PROXY", "no_proxy"),
94		CGI:        os.Getenv("REQUEST_METHOD") != "",
95	}
96}
97
98func getEnvAny(names ...string) string {
99	for _, n := range names {
100		if val := os.Getenv(n); val != "" {
101			return val
102		}
103	}
104	return ""
105}
106
107// ProxyFunc returns a function that determines the proxy URL to use for
108// a given request URL. Changing the contents of cfg will not affect
109// proxy functions created earlier.
110//
111// A nil URL and nil error are returned if no proxy is defined in the
112// environment, or a proxy should not be used for the given request, as
113// defined by NO_PROXY.
114//
115// As a special case, if req.URL.Host is "localhost" or a loopback address
116// (with or without a port number), then a nil URL and nil error will be returned.
117func (cfg *Config) ProxyFunc() func(reqURL *url.URL) (*url.URL, error) {
118	// Preprocess the Config settings for more efficient evaluation.
119	cfg1 := &config{
120		Config: *cfg,
121	}
122	cfg1.init()
123	return cfg1.proxyForURL
124}
125
126func (cfg *config) proxyForURL(reqURL *url.URL) (*url.URL, error) {
127	var proxy *url.URL
128	if reqURL.Scheme == "https" {
129		proxy = cfg.httpsProxy
130	} else if reqURL.Scheme == "http" {
131		proxy = cfg.httpProxy
132		if proxy != nil && cfg.CGI {
133			return nil, errors.New("refusing to use HTTP_PROXY value in CGI environment; see golang.org/s/cgihttpproxy")
134		}
135	}
136	if proxy == nil {
137		return nil, nil
138	}
139	if !cfg.useProxy(canonicalAddr(reqURL)) {
140		return nil, nil
141	}
142
143	return proxy, nil
144}
145
146func parseProxy(proxy string) (*url.URL, error) {
147	if proxy == "" {
148		return nil, nil
149	}
150
151	proxyURL, err := url.Parse(proxy)
152	if err != nil || proxyURL.Scheme == "" || proxyURL.Host == "" {
153		// proxy was bogus. Try prepending "http://" to it and
154		// see if that parses correctly. If not, we fall
155		// through and complain about the original one.
156		if proxyURL, err := url.Parse("http://" + proxy); err == nil {
157			return proxyURL, nil
158		}
159	}
160	if err != nil {
161		return nil, fmt.Errorf("invalid proxy address %q: %v", proxy, err)
162	}
163	return proxyURL, nil
164}
165
166// useProxy reports whether requests to addr should use a proxy,
167// according to the NO_PROXY or no_proxy environment variable.
168// addr is always a canonicalAddr with a host and port.
169func (cfg *config) useProxy(addr string) bool {
170	if len(addr) == 0 {
171		return true
172	}
173	host, port, err := net.SplitHostPort(addr)
174	if err != nil {
175		return false
176	}
177	if host == "localhost" {
178		return false
179	}
180	ip := net.ParseIP(host)
181	if ip != nil {
182		if ip.IsLoopback() {
183			return false
184		}
185	}
186
187	addr = strings.ToLower(strings.TrimSpace(host))
188
189	if ip != nil {
190		for _, m := range cfg.ipMatchers {
191			if m.match(addr, port, ip) {
192				return false
193			}
194		}
195	}
196	for _, m := range cfg.domainMatchers {
197		if m.match(addr, port, ip) {
198			return false
199		}
200	}
201	return true
202}
203
204func (c *config) init() {
205	if parsed, err := parseProxy(c.HTTPProxy); err == nil {
206		c.httpProxy = parsed
207	}
208	if parsed, err := parseProxy(c.HTTPSProxy); err == nil {
209		c.httpsProxy = parsed
210	}
211
212	for _, p := range strings.Split(c.NoProxy, ",") {
213		p = strings.ToLower(strings.TrimSpace(p))
214		if len(p) == 0 {
215			continue
216		}
217
218		if p == "*" {
219			c.ipMatchers = []matcher{allMatch{}}
220			c.domainMatchers = []matcher{allMatch{}}
221			return
222		}
223
224		// IPv4/CIDR, IPv6/CIDR
225		if _, pnet, err := net.ParseCIDR(p); err == nil {
226			c.ipMatchers = append(c.ipMatchers, cidrMatch{cidr: pnet})
227			continue
228		}
229
230		// IPv4:port, [IPv6]:port
231		phost, pport, err := net.SplitHostPort(p)
232		if err == nil {
233			if len(phost) == 0 {
234				// There is no host part, likely the entry is malformed; ignore.
235				continue
236			}
237			if phost[0] == '[' && phost[len(phost)-1] == ']' {
238				phost = phost[1 : len(phost)-1]
239			}
240		} else {
241			phost = p
242		}
243		// IPv4, IPv6
244		if pip := net.ParseIP(phost); pip != nil {
245			c.ipMatchers = append(c.ipMatchers, ipMatch{ip: pip, port: pport})
246			continue
247		}
248
249		if len(phost) == 0 {
250			// There is no host part, likely the entry is malformed; ignore.
251			continue
252		}
253
254		// domain.com or domain.com:80
255		// foo.com matches bar.foo.com
256		// .domain.com or .domain.com:port
257		// *.domain.com or *.domain.com:port
258		if strings.HasPrefix(phost, "*.") {
259			phost = phost[1:]
260		}
261		matchHost := false
262		if phost[0] != '.' {
263			matchHost = true
264			phost = "." + phost
265		}
266		if v, err := idnaASCII(phost); err == nil {
267			phost = v
268		}
269		c.domainMatchers = append(c.domainMatchers, domainMatch{host: phost, port: pport, matchHost: matchHost})
270	}
271}
272
273var portMap = map[string]string{
274	"http":   "80",
275	"https":  "443",
276	"socks5": "1080",
277}
278
279// canonicalAddr returns url.Host but always with a ":port" suffix
280func canonicalAddr(url *url.URL) string {
281	addr := url.Hostname()
282	if v, err := idnaASCII(addr); err == nil {
283		addr = v
284	}
285	port := url.Port()
286	if port == "" {
287		port = portMap[url.Scheme]
288	}
289	return net.JoinHostPort(addr, port)
290}
291
292// Given a string of the form "host", "host:port", or "[ipv6::address]:port",
293// return true if the string includes a port.
294func hasPort(s string) bool { return strings.LastIndex(s, ":") > strings.LastIndex(s, "]") }
295
296func idnaASCII(v string) (string, error) {
297	// TODO: Consider removing this check after verifying performance is okay.
298	// Right now punycode verification, length checks, context checks, and the
299	// permissible character tests are all omitted. It also prevents the ToASCII
300	// call from salvaging an invalid IDN, when possible. As a result it may be
301	// possible to have two IDNs that appear identical to the user where the
302	// ASCII-only version causes an error downstream whereas the non-ASCII
303	// version does not.
304	// Note that for correct ASCII IDNs ToASCII will only do considerably more
305	// work, but it will not cause an allocation.
306	if isASCII(v) {
307		return v, nil
308	}
309	return idna.Lookup.ToASCII(v)
310}
311
312func isASCII(s string) bool {
313	for i := 0; i < len(s); i++ {
314		if s[i] >= utf8.RuneSelf {
315			return false
316		}
317	}
318	return true
319}
320
321// matcher represents the matching rule for a given value in the NO_PROXY list
322type matcher interface {
323	// match returns true if the host and optional port or ip and optional port
324	// are allowed
325	match(host, port string, ip net.IP) bool
326}
327
328// allMatch matches on all possible inputs
329type allMatch struct{}
330
331func (a allMatch) match(host, port string, ip net.IP) bool {
332	return true
333}
334
335type cidrMatch struct {
336	cidr *net.IPNet
337}
338
339func (m cidrMatch) match(host, port string, ip net.IP) bool {
340	return m.cidr.Contains(ip)
341}
342
343type ipMatch struct {
344	ip   net.IP
345	port string
346}
347
348func (m ipMatch) match(host, port string, ip net.IP) bool {
349	if m.ip.Equal(ip) {
350		return m.port == "" || m.port == port
351	}
352	return false
353}
354
355type domainMatch struct {
356	host string
357	port string
358
359	matchHost bool
360}
361
362func (m domainMatch) match(host, port string, ip net.IP) bool {
363	if strings.HasSuffix(host, m.host) || (m.matchHost && host == m.host[1:]) {
364		return m.port == "" || m.port == port
365	}
366	return false
367}
368