1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package codehost defines the interface implemented by a code hosting source,
6// along with support code for use by implementations.
7package codehost
8
9import (
10	"bytes"
11	"context"
12	"crypto/sha256"
13	"fmt"
14	"io"
15	"io/fs"
16	"os"
17	"os/exec"
18	"path/filepath"
19	"strings"
20	"sync"
21	"time"
22
23	"cmd/go/internal/cfg"
24	"cmd/go/internal/lockedfile"
25	"cmd/go/internal/str"
26
27	"golang.org/x/mod/module"
28	"golang.org/x/mod/semver"
29)
30
31// Downloaded size limits.
32const (
33	MaxGoMod   = 16 << 20  // maximum size of go.mod file
34	MaxLICENSE = 16 << 20  // maximum size of LICENSE file
35	MaxZipFile = 500 << 20 // maximum size of downloaded zip file
36)
37
38// A Repo represents a code hosting source.
39// Typical implementations include local version control repositories,
40// remote version control servers, and code hosting sites.
41//
42// A Repo must be safe for simultaneous use by multiple goroutines,
43// and callers must not modify returned values, which may be cached and shared.
44type Repo interface {
45	// CheckReuse checks whether the old origin information
46	// remains up to date. If so, whatever cached object it was
47	// taken from can be reused.
48	// The subdir gives subdirectory name where the module root is expected to be found,
49	// "" for the root or "sub/dir" for a subdirectory (no trailing slash).
50	CheckReuse(ctx context.Context, old *Origin, subdir string) error
51
52	// Tags lists all tags with the given prefix.
53	Tags(ctx context.Context, prefix string) (*Tags, error)
54
55	// Stat returns information about the revision rev.
56	// A revision can be any identifier known to the underlying service:
57	// commit hash, branch, tag, and so on.
58	Stat(ctx context.Context, rev string) (*RevInfo, error)
59
60	// Latest returns the latest revision on the default branch,
61	// whatever that means in the underlying implementation.
62	Latest(ctx context.Context) (*RevInfo, error)
63
64	// ReadFile reads the given file in the file tree corresponding to revision rev.
65	// It should refuse to read more than maxSize bytes.
66	//
67	// If the requested file does not exist it should return an error for which
68	// os.IsNotExist(err) returns true.
69	ReadFile(ctx context.Context, rev, file string, maxSize int64) (data []byte, err error)
70
71	// ReadZip downloads a zip file for the subdir subdirectory
72	// of the given revision to a new file in a given temporary directory.
73	// It should refuse to read more than maxSize bytes.
74	// It returns a ReadCloser for a streamed copy of the zip file.
75	// All files in the zip file are expected to be
76	// nested in a single top-level directory, whose name is not specified.
77	ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error)
78
79	// RecentTag returns the most recent tag on rev or one of its predecessors
80	// with the given prefix. allowed may be used to filter out unwanted versions.
81	RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error)
82
83	// DescendsFrom reports whether rev or any of its ancestors has the given tag.
84	//
85	// DescendsFrom must return true for any tag returned by RecentTag for the
86	// same revision.
87	DescendsFrom(ctx context.Context, rev, tag string) (bool, error)
88}
89
90// An Origin describes the provenance of a given repo method result.
91// It can be passed to CheckReuse (usually in a different go command invocation)
92// to see whether the result remains up-to-date.
93type Origin struct {
94	VCS    string `json:",omitempty"` // "git" etc
95	URL    string `json:",omitempty"` // URL of repository
96	Subdir string `json:",omitempty"` // subdirectory in repo
97
98	Hash string `json:",omitempty"` // commit hash or ID
99
100	// If TagSum is non-empty, then the resolution of this module version
101	// depends on the set of tags present in the repo, specifically the tags
102	// of the form TagPrefix + a valid semver version.
103	// If the matching repo tags and their commit hashes still hash to TagSum,
104	// the Origin is still valid (at least as far as the tags are concerned).
105	// The exact checksum is up to the Repo implementation; see (*gitRepo).Tags.
106	TagPrefix string `json:",omitempty"`
107	TagSum    string `json:",omitempty"`
108
109	// If Ref is non-empty, then the resolution of this module version
110	// depends on Ref resolving to the revision identified by Hash.
111	// If Ref still resolves to Hash, the Origin is still valid (at least as far as Ref is concerned).
112	// For Git, the Ref is a full ref like "refs/heads/main" or "refs/tags/v1.2.3",
113	// and the Hash is the Git object hash the ref maps to.
114	// Other VCS might choose differently, but the idea is that Ref is the name
115	// with a mutable meaning while Hash is a name with an immutable meaning.
116	Ref string `json:",omitempty"`
117
118	// If RepoSum is non-empty, then the resolution of this module version
119	// failed due to the repo being available but the version not being present.
120	// This depends on the entire state of the repo, which RepoSum summarizes.
121	// For Git, this is a hash of all the refs and their hashes.
122	RepoSum string `json:",omitempty"`
123}
124
125// A Tags describes the available tags in a code repository.
126type Tags struct {
127	Origin *Origin
128	List   []Tag
129}
130
131// A Tag describes a single tag in a code repository.
132type Tag struct {
133	Name string
134	Hash string // content hash identifying tag's content, if available
135}
136
137// isOriginTag reports whether tag should be preserved
138// in the Tags method's Origin calculation.
139// We can safely ignore tags that are not look like pseudo-versions,
140// because ../coderepo.go's (*codeRepo).Versions ignores them too.
141// We can also ignore non-semver tags, but we have to include semver
142// tags with extra suffixes, because the pseudo-version base finder uses them.
143func isOriginTag(tag string) bool {
144	// modfetch.(*codeRepo).Versions uses Canonical == tag,
145	// but pseudo-version calculation has a weaker condition that
146	// the canonical is a prefix of the tag.
147	// Include those too, so that if any new one appears, we'll invalidate the cache entry.
148	// This will lead to spurious invalidation of version list results,
149	// but tags of this form being created should be fairly rare
150	// (and invalidate pseudo-version results anyway).
151	c := semver.Canonical(tag)
152	return c != "" && strings.HasPrefix(tag, c) && !module.IsPseudoVersion(tag)
153}
154
155// A RevInfo describes a single revision in a source code repository.
156type RevInfo struct {
157	Origin  *Origin
158	Name    string    // complete ID in underlying repository
159	Short   string    // shortened ID, for use in pseudo-version
160	Version string    // version used in lookup
161	Time    time.Time // commit time
162	Tags    []string  // known tags for commit
163}
164
165// UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a
166// revision rather than a file.
167type UnknownRevisionError struct {
168	Rev string
169}
170
171func (e *UnknownRevisionError) Error() string {
172	return "unknown revision " + e.Rev
173}
174func (UnknownRevisionError) Is(err error) bool {
175	return err == fs.ErrNotExist
176}
177
178// ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given
179// repository or module contains no commits.
180var ErrNoCommits error = noCommitsError{}
181
182type noCommitsError struct{}
183
184func (noCommitsError) Error() string {
185	return "no commits"
186}
187func (noCommitsError) Is(err error) bool {
188	return err == fs.ErrNotExist
189}
190
191// AllHex reports whether the revision rev is entirely lower-case hexadecimal digits.
192func AllHex(rev string) bool {
193	for i := 0; i < len(rev); i++ {
194		c := rev[i]
195		if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' {
196			continue
197		}
198		return false
199	}
200	return true
201}
202
203// ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length
204// used in pseudo-versions (12 hex digits).
205func ShortenSHA1(rev string) string {
206	if AllHex(rev) && len(rev) == 40 {
207		return rev[:12]
208	}
209	return rev
210}
211
212// WorkDir returns the name of the cached work directory to use for the
213// given repository type and name.
214func WorkDir(ctx context.Context, typ, name string) (dir, lockfile string, err error) {
215	if cfg.GOMODCACHE == "" {
216		return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set")
217	}
218
219	// We name the work directory for the SHA256 hash of the type and name.
220	// We intentionally avoid the actual name both because of possible
221	// conflicts with valid file system paths and because we want to ensure
222	// that one checkout is never nested inside another. That nesting has
223	// led to security problems in the past.
224	if strings.Contains(typ, ":") {
225		return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon")
226	}
227	key := typ + ":" + name
228	dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key))))
229
230	xLog, buildX := cfg.BuildXWriter(ctx)
231	if buildX {
232		fmt.Fprintf(xLog, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name)
233	}
234	if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil {
235		return "", "", err
236	}
237
238	lockfile = dir + ".lock"
239	if buildX {
240		fmt.Fprintf(xLog, "# lock %s\n", lockfile)
241	}
242
243	unlock, err := lockedfile.MutexAt(lockfile).Lock()
244	if err != nil {
245		return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err)
246	}
247	defer unlock()
248
249	data, err := os.ReadFile(dir + ".info")
250	info, err2 := os.Stat(dir)
251	if err == nil && err2 == nil && info.IsDir() {
252		// Info file and directory both already exist: reuse.
253		have := strings.TrimSuffix(string(data), "\n")
254		if have != key {
255			return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key)
256		}
257		if buildX {
258			fmt.Fprintf(xLog, "# %s for %s %s\n", dir, typ, name)
259		}
260		return dir, lockfile, nil
261	}
262
263	// Info file or directory missing. Start from scratch.
264	if xLog != nil {
265		fmt.Fprintf(xLog, "mkdir -p %s # %s %s\n", dir, typ, name)
266	}
267	os.RemoveAll(dir)
268	if err := os.MkdirAll(dir, 0777); err != nil {
269		return "", "", err
270	}
271	if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil {
272		os.RemoveAll(dir)
273		return "", "", err
274	}
275	return dir, lockfile, nil
276}
277
278type RunError struct {
279	Cmd      string
280	Err      error
281	Stderr   []byte
282	HelpText string
283}
284
285func (e *RunError) Error() string {
286	text := e.Cmd + ": " + e.Err.Error()
287	stderr := bytes.TrimRight(e.Stderr, "\n")
288	if len(stderr) > 0 {
289		text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t")
290	}
291	if len(e.HelpText) > 0 {
292		text += "\n" + e.HelpText
293	}
294	return text
295}
296
297var dirLock sync.Map
298
299// Run runs the command line in the given directory
300// (an empty dir means the current directory).
301// It returns the standard output and, for a non-zero exit,
302// a *RunError indicating the command, exit status, and standard error.
303// Standard error is unavailable for commands that exit successfully.
304func Run(ctx context.Context, dir string, cmdline ...any) ([]byte, error) {
305	return RunWithStdin(ctx, dir, nil, cmdline...)
306}
307
308// bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell.
309// See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html.
310var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`)
311
312func RunWithStdin(ctx context.Context, dir string, stdin io.Reader, cmdline ...any) ([]byte, error) {
313	if dir != "" {
314		muIface, ok := dirLock.Load(dir)
315		if !ok {
316			muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex))
317		}
318		mu := muIface.(*sync.Mutex)
319		mu.Lock()
320		defer mu.Unlock()
321	}
322
323	cmd := str.StringList(cmdline...)
324	if os.Getenv("TESTGOVCS") == "panic" {
325		panic(fmt.Sprintf("use of vcs: %v", cmd))
326	}
327	if xLog, ok := cfg.BuildXWriter(ctx); ok {
328		text := new(strings.Builder)
329		if dir != "" {
330			text.WriteString("cd ")
331			text.WriteString(dir)
332			text.WriteString("; ")
333		}
334		for i, arg := range cmd {
335			if i > 0 {
336				text.WriteByte(' ')
337			}
338			switch {
339			case strings.ContainsAny(arg, "'"):
340				// Quote args that could be mistaken for quoted args.
341				text.WriteByte('"')
342				text.WriteString(bashQuoter.Replace(arg))
343				text.WriteByte('"')
344			case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"):
345				// Quote args that contain special characters, glob patterns, or spaces.
346				text.WriteByte('\'')
347				text.WriteString(arg)
348				text.WriteByte('\'')
349			default:
350				text.WriteString(arg)
351			}
352		}
353		fmt.Fprintf(xLog, "%s\n", text)
354		start := time.Now()
355		defer func() {
356			fmt.Fprintf(xLog, "%.3fs # %s\n", time.Since(start).Seconds(), text)
357		}()
358	}
359	// TODO: Impose limits on command output size.
360	// TODO: Set environment to get English error messages.
361	var stderr bytes.Buffer
362	var stdout bytes.Buffer
363	c := exec.CommandContext(ctx, cmd[0], cmd[1:]...)
364	c.Cancel = func() error { return c.Process.Signal(os.Interrupt) }
365	c.Dir = dir
366	c.Stdin = stdin
367	c.Stderr = &stderr
368	c.Stdout = &stdout
369	// For Git commands, manually supply GIT_DIR so Git works with safe.bareRepository=explicit set. Noop for other commands.
370	c.Env = append(c.Environ(), "GIT_DIR="+dir)
371	err := c.Run()
372	if err != nil {
373		err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err}
374	}
375	return stdout.Bytes(), err
376}
377