xref: /aosp_15_r20/external/bazelbuild-rules_go/go/tools/builders/embedcfg.go (revision 9bb1b549b6a84214c53be0924760be030e66b93a)
1// Copyright 2021 The Bazel Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
18	"encoding/json"
19	"errors"
20	"fmt"
21	"io/ioutil"
22	"os"
23	"path"
24	"path/filepath"
25	"runtime"
26	"sort"
27	"strings"
28)
29
30// buildEmbedcfgFile writes an embedcfg file to be read by the compiler.
31// An embedcfg file can be used in Go 1.16 or higher if the "embed" package
32// is imported and there are one or more //go:embed comments in .go files.
33// The embedcfg file maps //go:embed patterns to actual file names.
34//
35// The embedcfg file will be created in workDir, and its name is returned.
36// The caller is responsible for deleting it. If no embedcfg file is needed,
37// "" is returned with no error.
38//
39// All source files listed in goSrcs with //go:embed comments must be in one
40// of the directories in embedRootDirs (not in a subdirectory). Embed patterns
41// are evaluated relative to the source directory. Embed sources (embedSrcs)
42// outside those directories are ignored, since they can't be matched by any
43// valid pattern.
44func buildEmbedcfgFile(goSrcs []fileInfo, embedSrcs, embedRootDirs []string, workDir string) (string, error) {
45	// Check whether this package uses embedding and whether the toolchain
46	// supports it (Go 1.16+). With Go 1.15 and lower, we'll try to compile
47	// without an embedcfg file, and the compiler will complain the "embed"
48	// package is missing.
49	var major, minor int
50	if n, err := fmt.Sscanf(runtime.Version(), "go%d.%d", &major, &minor); n != 2 || err != nil {
51		// Can't parse go version. Maybe it's a development version; fall through.
52	} else if major < 1 || (major == 1 && minor < 16) {
53		return "", nil
54	}
55	importEmbed := false
56	haveEmbed := false
57	for _, src := range goSrcs {
58		if len(src.embeds) > 0 {
59			haveEmbed = true
60			rootDir := findInRootDirs(src.filename, embedRootDirs)
61			if rootDir == "" || strings.Contains(src.filename[len(rootDir)+1:], string(filepath.Separator)) {
62				// Report an error if a source files appears in a subdirectory of
63				// another source directory. In this situation, the same file could be
64				// referenced with different paths.
65				return "", fmt.Errorf("%s: source files with //go:embed should be in same directory. Allowed directories are:\n\t%s",
66					src.filename,
67					strings.Join(embedRootDirs, "\n\t"))
68			}
69		}
70		for _, imp := range src.imports {
71			if imp.path == "embed" {
72				importEmbed = true
73			}
74		}
75	}
76	if !importEmbed || !haveEmbed {
77		return "", nil
78	}
79
80	// Build a tree of embeddable files. This includes paths listed with
81	// -embedsrc. If one of those paths is a directory, the tree includes
82	// its files and subdirectories. Paths in the tree are relative to the
83	// path in embedRootDirs that contains them.
84	root, err := buildEmbedTree(embedSrcs, embedRootDirs)
85	if err != nil {
86		return "", err
87	}
88
89	// Resolve patterns to sets of files.
90	var embedcfg struct {
91		Patterns map[string][]string
92		Files    map[string]string
93	}
94	embedcfg.Patterns = make(map[string][]string)
95	embedcfg.Files = make(map[string]string)
96	for _, src := range goSrcs {
97		for _, embed := range src.embeds {
98			matchedPaths, matchedFiles, err := resolveEmbed(embed, root)
99			if err != nil {
100				return "", err
101			}
102			embedcfg.Patterns[embed.pattern] = matchedPaths
103			for i, rel := range matchedPaths {
104				embedcfg.Files[rel] = matchedFiles[i]
105			}
106		}
107	}
108
109	// Write the configuration to a JSON file.
110	embedcfgData, err := json.MarshalIndent(&embedcfg, "", "\t")
111	if err != nil {
112		return "", err
113	}
114	embedcfgName := filepath.Join(workDir, "embedcfg")
115	if err := ioutil.WriteFile(embedcfgName, embedcfgData, 0o666); err != nil {
116		return "", err
117	}
118	return embedcfgName, nil
119}
120
121// findInRootDirs returns a string from rootDirs which is a parent of the
122// file path p. If there is no such string, findInRootDirs returns "".
123func findInRootDirs(p string, rootDirs []string) string {
124	dir := filepath.Dir(p)
125	for _, rootDir := range rootDirs {
126		if rootDir == dir ||
127			(strings.HasPrefix(dir, rootDir) && len(dir) > len(rootDir)+1 && dir[len(rootDir)] == filepath.Separator) {
128			return rootDir
129		}
130	}
131	return ""
132}
133
134// embedNode represents an embeddable file or directory in a tree.
135type embedNode struct {
136	name       string                // base name
137	path       string                // absolute file path
138	children   map[string]*embedNode // non-nil for directory
139	childNames []string              // sorted
140}
141
142// add inserts file nodes into the tree rooted at f for the slash-separated
143// path src, relative to the absolute file path rootDir. If src points to a
144// directory, add recursively inserts nodes for its contents. If a node already
145// exists (for example, if a source file and a generated file have the same
146// name), add leaves the existing node in place.
147func (n *embedNode) add(rootDir, src string) error {
148	// Create nodes for parents of src.
149	parent := n
150	parts := strings.Split(src, "/")
151	for _, p := range parts[:len(parts)-1] {
152		if parent.children[p] == nil {
153			parent.children[p] = &embedNode{
154				name:     p,
155				children: make(map[string]*embedNode),
156			}
157		}
158		parent = parent.children[p]
159	}
160
161	// Create a node for src. If src is a directory, recursively create nodes for
162	// its contents. Go embedding ignores symbolic links, but Bazel may use links
163	// for generated files and directories, so we follow them here.
164	var visit func(*embedNode, string, os.FileInfo) error
165	visit = func(parent *embedNode, path string, fi os.FileInfo) error {
166		base := filepath.Base(path)
167		if parent.children[base] == nil {
168			parent.children[base] = &embedNode{name: base, path: path}
169		}
170		if !fi.IsDir() {
171			return nil
172		}
173		node := parent.children[base]
174		node.children = make(map[string]*embedNode)
175		f, err := os.Open(path)
176		if err != nil {
177			return err
178		}
179		names, err := f.Readdirnames(0)
180		f.Close()
181		if err != nil {
182			return err
183		}
184		for _, name := range names {
185			cPath := filepath.Join(path, name)
186			cfi, err := os.Stat(cPath)
187			if err != nil {
188				return err
189			}
190			if err := visit(node, cPath, cfi); err != nil {
191				return err
192			}
193		}
194		return nil
195	}
196
197	path := filepath.Join(rootDir, src)
198	fi, err := os.Stat(path)
199	if err != nil {
200		return err
201	}
202	return visit(parent, path, fi)
203}
204
205func (n *embedNode) isDir() bool {
206	return n.children != nil
207}
208
209// get returns a tree node, given a slash-separated path relative to the
210// receiver. get returns nil if no node exists with that path.
211func (n *embedNode) get(path string) *embedNode {
212	if path == "." || path == "" {
213		return n
214	}
215	for _, part := range strings.Split(path, "/") {
216		n = n.children[part]
217		if n == nil {
218			return nil
219		}
220	}
221	return n
222}
223
224var errSkip = errors.New("skip")
225
226// walk calls fn on each node in the tree rooted at n in depth-first pre-order.
227func (n *embedNode) walk(fn func(rel string, n *embedNode) error) error {
228	var visit func(string, *embedNode) error
229	visit = func(rel string, node *embedNode) error {
230		err := fn(rel, node)
231		if err == errSkip {
232			return nil
233		} else if err != nil {
234			return err
235		}
236		for _, name := range node.childNames {
237			if err := visit(path.Join(rel, name), node.children[name]); err != nil && err != errSkip {
238				return err
239			}
240		}
241		return nil
242	}
243	err := visit("", n)
244	if err == errSkip {
245		return nil
246	}
247	return err
248}
249
250// buildEmbedTree constructs a logical directory tree of embeddable files.
251// The tree may contain a mix of static and generated files from multiple
252// root directories. Directory artifacts are recursively expanded.
253func buildEmbedTree(embedSrcs, embedRootDirs []string) (root *embedNode, err error) {
254	defer func() {
255		if err != nil {
256			err = fmt.Errorf("building tree of embeddable files in directories %s: %v", strings.Join(embedRootDirs, string(filepath.ListSeparator)), err)
257		}
258	}()
259
260	// Add each path to the tree.
261	root = &embedNode{name: "", children: make(map[string]*embedNode)}
262	for _, src := range embedSrcs {
263		rootDir := findInRootDirs(src, embedRootDirs)
264		if rootDir == "" {
265			// Embedded path cannot be matched by any valid pattern. Ignore.
266			continue
267		}
268		rel := filepath.ToSlash(src[len(rootDir)+1:])
269		if err := root.add(rootDir, rel); err != nil {
270			return nil, err
271		}
272	}
273
274	// Sort children in each directory node.
275	var visit func(*embedNode)
276	visit = func(node *embedNode) {
277		node.childNames = make([]string, 0, len(node.children))
278		for name, child := range node.children {
279			node.childNames = append(node.childNames, name)
280			visit(child)
281		}
282		sort.Strings(node.childNames)
283	}
284	visit(root)
285
286	return root, nil
287}
288
289// resolveEmbed matches a //go:embed pattern in a source file to a set of
290// embeddable files in the given tree.
291func resolveEmbed(embed fileEmbed, root *embedNode) (matchedPaths, matchedFiles []string, err error) {
292	defer func() {
293		if err != nil {
294			err = fmt.Errorf("%v: could not embed %s: %v", embed.pos, embed.pattern, err)
295		}
296	}()
297
298	// Remove optional "all:" prefix from pattern and set matchAll flag if present.
299	// See https://pkg.go.dev/embed#hdr-Directives for details.
300	pattern := embed.pattern
301	var matchAll bool
302	if strings.HasPrefix(pattern, "all:") {
303		matchAll = true
304		pattern = pattern[4:]
305	}
306
307	// Check that the pattern has valid syntax.
308	if _, err := path.Match(pattern, ""); err != nil || !validEmbedPattern(pattern) {
309		return nil, nil, fmt.Errorf("invalid pattern syntax")
310	}
311
312	// Search for matching files.
313	err = root.walk(func(matchRel string, matchNode *embedNode) error {
314		if ok, _ := path.Match(pattern, matchRel); !ok {
315			// Non-matching file or directory.
316			return nil
317		}
318
319		// TODO: Should check that directories along path do not begin a new module
320		// (do not contain a go.mod).
321		// https://cs.opensource.google/go/go/+/master:src/cmd/go/internal/load/pkg.go;l=2158;drc=261fe25c83a94fc3defe064baed3944cd3d16959
322		for dir := matchRel; len(dir) > 1; dir = filepath.Dir(dir) {
323			if base := path.Base(matchRel); isBadEmbedName(base) {
324				what := "file"
325				if matchNode.isDir() {
326					what = "directory"
327				}
328				if dir == matchRel {
329					return fmt.Errorf("cannot embed %s %s: invalid name %s", what, matchRel, base)
330				} else {
331					return fmt.Errorf("cannot embed %s %s: in invalid directory %s", what, matchRel, base)
332				}
333			}
334		}
335
336		if !matchNode.isDir() {
337			// Matching file. Add to list.
338			matchedPaths = append(matchedPaths, matchRel)
339			matchedFiles = append(matchedFiles, matchNode.path)
340			return nil
341		}
342
343		// Matching directory. Recursively add all files in subdirectories.
344		// Don't add hidden files or directories (starting with "." or "_"),
345		// unless "all:" prefix was set.
346		// See golang/go#42328.
347		matchTreeErr := matchNode.walk(func(childRel string, childNode *embedNode) error {
348			// TODO: Should check that directories along path do not begin a new module
349			// https://cs.opensource.google/go/go/+/master:src/cmd/go/internal/load/pkg.go;l=2158;drc=261fe25c83a94fc3defe064baed3944cd3d16959
350			if childRel != "" {
351				base := path.Base(childRel)
352				if isBadEmbedName(base) || (!matchAll && (strings.HasPrefix(base, ".") || strings.HasPrefix(base, "_"))) {
353					if childNode.isDir() {
354						return errSkip
355					}
356					return nil
357				}
358			}
359			if !childNode.isDir() {
360				matchedPaths = append(matchedPaths, path.Join(matchRel, childRel))
361				matchedFiles = append(matchedFiles, childNode.path)
362			}
363			return nil
364		})
365		if matchTreeErr != nil {
366			return matchTreeErr
367		}
368		return errSkip
369	})
370	if err != nil && err != errSkip {
371		return nil, nil, err
372	}
373	if len(matchedPaths) == 0 {
374		return nil, nil, fmt.Errorf("no matching files found")
375	}
376	return matchedPaths, matchedFiles, nil
377}
378
379func validEmbedPattern(pattern string) bool {
380	return pattern != "." && fsValidPath(pattern)
381}
382
383// validPath reports whether the given path name
384// is valid for use in a call to Open.
385// Path names passed to open are unrooted, slash-separated
386// sequences of path elements, like “x/y/z”.
387// Path names must not contain a “.” or “..” or empty element,
388// except for the special case that the root directory is named “.”.
389//
390// Paths are slash-separated on all systems, even Windows.
391// Backslashes must not appear in path names.
392//
393// Copied from io/fs.ValidPath in Go 1.16beta1.
394func fsValidPath(name string) bool {
395	if name == "." {
396		// special case
397		return true
398	}
399
400	// Iterate over elements in name, checking each.
401	for {
402		i := 0
403		for i < len(name) && name[i] != '/' {
404			if name[i] == '\\' {
405				return false
406			}
407			i++
408		}
409		elem := name[:i]
410		if elem == "" || elem == "." || elem == ".." {
411			return false
412		}
413		if i == len(name) {
414			return true // reached clean ending
415		}
416		name = name[i+1:]
417	}
418}
419
420// isBadEmbedName reports whether name is the base name of a file that
421// can't or won't be included in modules and therefore shouldn't be treated
422// as existing for embedding.
423//
424// TODO: This should use the equivalent of golang.org/x/mod/module.CheckFilePath instead of fsValidPath.
425// https://cs.opensource.google/go/go/+/master:src/cmd/go/internal/load/pkg.go;l=2200;drc=261fe25c83a94fc3defe064baed3944cd3d16959
426func isBadEmbedName(name string) bool {
427	if !fsValidPath(name) {
428		return true
429	}
430	switch name {
431	// Empty string should be impossible but make it bad.
432	case "":
433		return true
434	// Version control directories won't be present in module.
435	case ".bzr", ".hg", ".git", ".svn":
436		return true
437	}
438	return false
439}
440