1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package modfetch
6
7import (
8	"context"
9	"encoding/json"
10	"errors"
11	"fmt"
12	"io"
13	"io/fs"
14	"net/url"
15	"path"
16	pathpkg "path"
17	"path/filepath"
18	"strings"
19	"sync"
20	"time"
21
22	"cmd/go/internal/base"
23	"cmd/go/internal/cfg"
24	"cmd/go/internal/modfetch/codehost"
25	"cmd/go/internal/web"
26
27	"golang.org/x/mod/module"
28	"golang.org/x/mod/semver"
29)
30
31var HelpGoproxy = &base.Command{
32	UsageLine: "goproxy",
33	Short:     "module proxy protocol",
34	Long: `
35A Go module proxy is any web server that can respond to GET requests for
36URLs of a specified form. The requests have no query parameters, so even
37a site serving from a fixed file system (including a file:/// URL)
38can be a module proxy.
39
40For details on the GOPROXY protocol, see
41https://golang.org/ref/mod#goproxy-protocol.
42`,
43}
44
45var proxyOnce struct {
46	sync.Once
47	list []proxySpec
48	err  error
49}
50
51type proxySpec struct {
52	// url is the proxy URL or one of "off", "direct", "noproxy".
53	url string
54
55	// fallBackOnError is true if a request should be attempted on the next proxy
56	// in the list after any error from this proxy. If fallBackOnError is false,
57	// the request will only be attempted on the next proxy if the error is
58	// equivalent to os.ErrNotFound, which is true for 404 and 410 responses.
59	fallBackOnError bool
60}
61
62func proxyList() ([]proxySpec, error) {
63	proxyOnce.Do(func() {
64		if cfg.GONOPROXY != "" && cfg.GOPROXY != "direct" {
65			proxyOnce.list = append(proxyOnce.list, proxySpec{url: "noproxy"})
66		}
67
68		goproxy := cfg.GOPROXY
69		for goproxy != "" {
70			var url string
71			fallBackOnError := false
72			if i := strings.IndexAny(goproxy, ",|"); i >= 0 {
73				url = goproxy[:i]
74				fallBackOnError = goproxy[i] == '|'
75				goproxy = goproxy[i+1:]
76			} else {
77				url = goproxy
78				goproxy = ""
79			}
80
81			url = strings.TrimSpace(url)
82			if url == "" {
83				continue
84			}
85			if url == "off" {
86				// "off" always fails hard, so can stop walking list.
87				proxyOnce.list = append(proxyOnce.list, proxySpec{url: "off"})
88				break
89			}
90			if url == "direct" {
91				proxyOnce.list = append(proxyOnce.list, proxySpec{url: "direct"})
92				// For now, "direct" is the end of the line. We may decide to add some
93				// sort of fallback behavior for them in the future, so ignore
94				// subsequent entries for forward-compatibility.
95				break
96			}
97
98			// Single-word tokens are reserved for built-in behaviors, and anything
99			// containing the string ":/" or matching an absolute file path must be a
100			// complete URL. For all other paths, implicitly add "https://".
101			if strings.ContainsAny(url, ".:/") && !strings.Contains(url, ":/") && !filepath.IsAbs(url) && !path.IsAbs(url) {
102				url = "https://" + url
103			}
104
105			// Check that newProxyRepo accepts the URL.
106			// It won't do anything with the path.
107			if _, err := newProxyRepo(url, "golang.org/x/text"); err != nil {
108				proxyOnce.err = err
109				return
110			}
111
112			proxyOnce.list = append(proxyOnce.list, proxySpec{
113				url:             url,
114				fallBackOnError: fallBackOnError,
115			})
116		}
117
118		if len(proxyOnce.list) == 0 ||
119			len(proxyOnce.list) == 1 && proxyOnce.list[0].url == "noproxy" {
120			// There were no proxies, other than the implicit "noproxy" added when
121			// GONOPROXY is set. This can happen if GOPROXY is a non-empty string
122			// like "," or " ".
123			proxyOnce.err = fmt.Errorf("GOPROXY list is not the empty string, but contains no entries")
124		}
125	})
126
127	return proxyOnce.list, proxyOnce.err
128}
129
130// TryProxies iterates f over each configured proxy (including "noproxy" and
131// "direct" if applicable) until f returns no error or until f returns an
132// error that is not equivalent to fs.ErrNotExist on a proxy configured
133// not to fall back on errors.
134//
135// TryProxies then returns that final error.
136//
137// If GOPROXY is set to "off", TryProxies invokes f once with the argument
138// "off".
139func TryProxies(f func(proxy string) error) error {
140	proxies, err := proxyList()
141	if err != nil {
142		return err
143	}
144	if len(proxies) == 0 {
145		panic("GOPROXY list is empty")
146	}
147
148	// We try to report the most helpful error to the user. "direct" and "noproxy"
149	// errors are best, followed by proxy errors other than ErrNotExist, followed
150	// by ErrNotExist.
151	//
152	// Note that errProxyOff, errNoproxy, and errUseProxy are equivalent to
153	// ErrNotExist. errUseProxy should only be returned if "noproxy" is the only
154	// proxy. errNoproxy should never be returned, since there should always be a
155	// more useful error from "noproxy" first.
156	const (
157		notExistRank = iota
158		proxyRank
159		directRank
160	)
161	var bestErr error
162	bestErrRank := notExistRank
163	for _, proxy := range proxies {
164		err := f(proxy.url)
165		if err == nil {
166			return nil
167		}
168		isNotExistErr := errors.Is(err, fs.ErrNotExist)
169
170		if proxy.url == "direct" || (proxy.url == "noproxy" && err != errUseProxy) {
171			bestErr = err
172			bestErrRank = directRank
173		} else if bestErrRank <= proxyRank && !isNotExistErr {
174			bestErr = err
175			bestErrRank = proxyRank
176		} else if bestErrRank == notExistRank {
177			bestErr = err
178		}
179
180		if !proxy.fallBackOnError && !isNotExistErr {
181			break
182		}
183	}
184	return bestErr
185}
186
187type proxyRepo struct {
188	url          *url.URL // The combined module proxy URL joined with the module path.
189	path         string   // The module path (unescaped).
190	redactedBase string   // The base module proxy URL in [url.URL.Redacted] form.
191
192	listLatestOnce sync.Once
193	listLatest     *RevInfo
194	listLatestErr  error
195}
196
197func newProxyRepo(baseURL, path string) (Repo, error) {
198	// Parse the base proxy URL.
199	base, err := url.Parse(baseURL)
200	if err != nil {
201		return nil, err
202	}
203	redactedBase := base.Redacted()
204	switch base.Scheme {
205	case "http", "https":
206		// ok
207	case "file":
208		if *base != (url.URL{Scheme: base.Scheme, Path: base.Path, RawPath: base.RawPath}) {
209			return nil, fmt.Errorf("invalid file:// proxy URL with non-path elements: %s", redactedBase)
210		}
211	case "":
212		return nil, fmt.Errorf("invalid proxy URL missing scheme: %s", redactedBase)
213	default:
214		return nil, fmt.Errorf("invalid proxy URL scheme (must be https, http, file): %s", redactedBase)
215	}
216
217	// Append the module path to the URL.
218	url := base
219	enc, err := module.EscapePath(path)
220	if err != nil {
221		return nil, err
222	}
223	url.Path = strings.TrimSuffix(base.Path, "/") + "/" + enc
224	url.RawPath = strings.TrimSuffix(base.RawPath, "/") + "/" + pathEscape(enc)
225
226	return &proxyRepo{url, path, redactedBase, sync.Once{}, nil, nil}, nil
227}
228
229func (p *proxyRepo) ModulePath() string {
230	return p.path
231}
232
233var errProxyReuse = fmt.Errorf("proxy does not support CheckReuse")
234
235func (p *proxyRepo) CheckReuse(ctx context.Context, old *codehost.Origin) error {
236	return errProxyReuse
237}
238
239// versionError returns err wrapped in a ModuleError for p.path.
240func (p *proxyRepo) versionError(version string, err error) error {
241	if version != "" && version != module.CanonicalVersion(version) {
242		return &module.ModuleError{
243			Path: p.path,
244			Err: &module.InvalidVersionError{
245				Version: version,
246				Pseudo:  module.IsPseudoVersion(version),
247				Err:     err,
248			},
249		}
250	}
251
252	return &module.ModuleError{
253		Path:    p.path,
254		Version: version,
255		Err:     err,
256	}
257}
258
259func (p *proxyRepo) getBytes(ctx context.Context, path string) ([]byte, error) {
260	body, redactedURL, err := p.getBody(ctx, path)
261	if err != nil {
262		return nil, err
263	}
264	defer body.Close()
265
266	b, err := io.ReadAll(body)
267	if err != nil {
268		// net/http doesn't add context to Body read errors, so add it here.
269		// (See https://go.dev/issue/52727.)
270		return b, &url.Error{Op: "read", URL: redactedURL, Err: err}
271	}
272	return b, nil
273}
274
275func (p *proxyRepo) getBody(ctx context.Context, path string) (r io.ReadCloser, redactedURL string, err error) {
276	fullPath := pathpkg.Join(p.url.Path, path)
277
278	target := *p.url
279	target.Path = fullPath
280	target.RawPath = pathpkg.Join(target.RawPath, pathEscape(path))
281
282	resp, err := web.Get(web.DefaultSecurity, &target)
283	if err != nil {
284		return nil, "", err
285	}
286	if err := resp.Err(); err != nil {
287		resp.Body.Close()
288		return nil, "", err
289	}
290	return resp.Body, resp.URL, nil
291}
292
293func (p *proxyRepo) Versions(ctx context.Context, prefix string) (*Versions, error) {
294	data, err := p.getBytes(ctx, "@v/list")
295	if err != nil {
296		p.listLatestOnce.Do(func() {
297			p.listLatest, p.listLatestErr = nil, p.versionError("", err)
298		})
299		return nil, p.versionError("", err)
300	}
301	var list []string
302	allLine := strings.Split(string(data), "\n")
303	for _, line := range allLine {
304		f := strings.Fields(line)
305		if len(f) >= 1 && semver.IsValid(f[0]) && strings.HasPrefix(f[0], prefix) && !module.IsPseudoVersion(f[0]) {
306			list = append(list, f[0])
307		}
308	}
309	p.listLatestOnce.Do(func() {
310		p.listLatest, p.listLatestErr = p.latestFromList(ctx, allLine)
311	})
312	semver.Sort(list)
313	return &Versions{List: list}, nil
314}
315
316func (p *proxyRepo) latest(ctx context.Context) (*RevInfo, error) {
317	p.listLatestOnce.Do(func() {
318		data, err := p.getBytes(ctx, "@v/list")
319		if err != nil {
320			p.listLatestErr = p.versionError("", err)
321			return
322		}
323		list := strings.Split(string(data), "\n")
324		p.listLatest, p.listLatestErr = p.latestFromList(ctx, list)
325	})
326	return p.listLatest, p.listLatestErr
327}
328
329func (p *proxyRepo) latestFromList(ctx context.Context, allLine []string) (*RevInfo, error) {
330	var (
331		bestTime    time.Time
332		bestVersion string
333	)
334	for _, line := range allLine {
335		f := strings.Fields(line)
336		if len(f) >= 1 && semver.IsValid(f[0]) {
337			// If the proxy includes timestamps, prefer the timestamp it reports.
338			// Otherwise, derive the timestamp from the pseudo-version.
339			var (
340				ft time.Time
341			)
342			if len(f) >= 2 {
343				ft, _ = time.Parse(time.RFC3339, f[1])
344			} else if module.IsPseudoVersion(f[0]) {
345				ft, _ = module.PseudoVersionTime(f[0])
346			} else {
347				// Repo.Latest promises that this method is only called where there are
348				// no tagged versions. Ignore any tagged versions that were added in the
349				// meantime.
350				continue
351			}
352			if bestTime.Before(ft) {
353				bestTime = ft
354				bestVersion = f[0]
355			}
356		}
357	}
358	if bestVersion == "" {
359		return nil, p.versionError("", codehost.ErrNoCommits)
360	}
361
362	// Call Stat to get all the other fields, including Origin information.
363	return p.Stat(ctx, bestVersion)
364}
365
366func (p *proxyRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
367	encRev, err := module.EscapeVersion(rev)
368	if err != nil {
369		return nil, p.versionError(rev, err)
370	}
371	data, err := p.getBytes(ctx, "@v/"+encRev+".info")
372	if err != nil {
373		return nil, p.versionError(rev, err)
374	}
375	info := new(RevInfo)
376	if err := json.Unmarshal(data, info); err != nil {
377		return nil, p.versionError(rev, fmt.Errorf("invalid response from proxy %q: %w", p.redactedBase, err))
378	}
379	if info.Version != rev && rev == module.CanonicalVersion(rev) && module.Check(p.path, rev) == nil {
380		// If we request a correct, appropriate version for the module path, the
381		// proxy must return either exactly that version or an error — not some
382		// arbitrary other version.
383		return nil, p.versionError(rev, fmt.Errorf("proxy returned info for version %s instead of requested version", info.Version))
384	}
385	return info, nil
386}
387
388func (p *proxyRepo) Latest(ctx context.Context) (*RevInfo, error) {
389	data, err := p.getBytes(ctx, "@latest")
390	if err != nil {
391		if !errors.Is(err, fs.ErrNotExist) {
392			return nil, p.versionError("", err)
393		}
394		return p.latest(ctx)
395	}
396	info := new(RevInfo)
397	if err := json.Unmarshal(data, info); err != nil {
398		return nil, p.versionError("", fmt.Errorf("invalid response from proxy %q: %w", p.redactedBase, err))
399	}
400	return info, nil
401}
402
403func (p *proxyRepo) GoMod(ctx context.Context, version string) ([]byte, error) {
404	if version != module.CanonicalVersion(version) {
405		return nil, p.versionError(version, fmt.Errorf("internal error: version passed to GoMod is not canonical"))
406	}
407
408	encVer, err := module.EscapeVersion(version)
409	if err != nil {
410		return nil, p.versionError(version, err)
411	}
412	data, err := p.getBytes(ctx, "@v/"+encVer+".mod")
413	if err != nil {
414		return nil, p.versionError(version, err)
415	}
416	return data, nil
417}
418
419func (p *proxyRepo) Zip(ctx context.Context, dst io.Writer, version string) error {
420	if version != module.CanonicalVersion(version) {
421		return p.versionError(version, fmt.Errorf("internal error: version passed to Zip is not canonical"))
422	}
423
424	encVer, err := module.EscapeVersion(version)
425	if err != nil {
426		return p.versionError(version, err)
427	}
428	path := "@v/" + encVer + ".zip"
429	body, redactedURL, err := p.getBody(ctx, path)
430	if err != nil {
431		return p.versionError(version, err)
432	}
433	defer body.Close()
434
435	lr := &io.LimitedReader{R: body, N: codehost.MaxZipFile + 1}
436	if _, err := io.Copy(dst, lr); err != nil {
437		// net/http doesn't add context to Body read errors, so add it here.
438		// (See https://go.dev/issue/52727.)
439		err = &url.Error{Op: "read", URL: redactedURL, Err: err}
440		return p.versionError(version, err)
441	}
442	if lr.N <= 0 {
443		return p.versionError(version, fmt.Errorf("downloaded zip file too large"))
444	}
445	return nil
446}
447
448// pathEscape escapes s so it can be used in a path.
449// That is, it escapes things like ? and # (which really shouldn't appear anyway).
450// It does not escape / to %2F: our REST API is designed so that / can be left as is.
451func pathEscape(s string) string {
452	return strings.ReplaceAll(url.PathEscape(s), "%2F", "/")
453}
454