1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package pods
6
7import (
8	"cmp"
9	"fmt"
10	"internal/coverage"
11	"os"
12	"path/filepath"
13	"regexp"
14	"slices"
15	"strconv"
16	"strings"
17)
18
19// Pod encapsulates a set of files emitted during the executions of a
20// coverage-instrumented binary. Each pod contains a single meta-data
21// file, and then 0 or more counter data files that refer to that
22// meta-data file. Pods are intended to simplify processing of
23// coverage output files in the case where we have several coverage
24// output directories containing output files derived from more
25// than one instrumented executable. In the case where the files that
26// make up a pod are spread out across multiple directories, each
27// element of the "Origins" field below will be populated with the
28// index of the originating directory for the corresponding counter
29// data file (within the slice of input dirs handed to CollectPods).
30// The ProcessIDs field will be populated with the process ID of each
31// data file in the CounterDataFiles slice.
32type Pod struct {
33	MetaFile         string
34	CounterDataFiles []string
35	Origins          []int
36	ProcessIDs       []int
37}
38
39// CollectPods visits the files contained within the directories in
40// the list 'dirs', collects any coverage-related files, partitions
41// them into pods, and returns a list of the pods to the caller, along
42// with an error if something went wrong during directory/file
43// reading.
44//
45// CollectPods skips over any file that is not related to coverage
46// (e.g. avoids looking at things that are not meta-data files or
47// counter-data files). CollectPods also skips over 'orphaned' counter
48// data files (e.g. counter data files for which we can't find the
49// corresponding meta-data file). If "warn" is true, CollectPods will
50// issue warnings to stderr when it encounters non-fatal problems (for
51// orphans or a directory with no meta-data files).
52func CollectPods(dirs []string, warn bool) ([]Pod, error) {
53	files := []string{}
54	dirIndices := []int{}
55	for k, dir := range dirs {
56		dents, err := os.ReadDir(dir)
57		if err != nil {
58			return nil, err
59		}
60		for _, e := range dents {
61			if e.IsDir() {
62				continue
63			}
64			files = append(files, filepath.Join(dir, e.Name()))
65			dirIndices = append(dirIndices, k)
66		}
67	}
68	return collectPodsImpl(files, dirIndices, warn), nil
69}
70
71// CollectPodsFromFiles functions the same as "CollectPods" but
72// operates on an explicit list of files instead of a directory.
73func CollectPodsFromFiles(files []string, warn bool) []Pod {
74	return collectPodsImpl(files, nil, warn)
75}
76
77type fileWithAnnotations struct {
78	file   string
79	origin int
80	pid    int
81}
82
83type protoPod struct {
84	mf       string
85	elements []fileWithAnnotations
86}
87
88// collectPodsImpl examines the specified list of files and picks out
89// subsets that correspond to coverage pods. The first stage in this
90// process is collecting a set { M1, M2, ... MN } where each M_k is a
91// distinct coverage meta-data file. We then create a single pod for
92// each meta-data file M_k, then find all of the counter data files
93// that refer to that meta-data file (recall that the counter data
94// file name incorporates the meta-data hash), and add the counter
95// data file to the appropriate pod.
96//
97// This process is complicated by the fact that we need to keep track
98// of directory indices for counter data files. Here is an example to
99// motivate:
100//
101//	directory 1:
102//
103// M1   covmeta.9bbf1777f47b3fcacb05c38b035512d6
104// C1   covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677673.1662138360208416486
105// C2   covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677637.1662138359974441782
106//
107//	directory 2:
108//
109// M2   covmeta.9bbf1777f47b3fcacb05c38b035512d6
110// C3   covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677445.1662138360208416480
111// C4   covcounters.9bbf1777f47b3fcacb05c38b035512d6.1677677.1662138359974441781
112// M3   covmeta.a723844208cea2ae80c63482c78b2245
113// C5   covcounters.a723844208cea2ae80c63482c78b2245.3677445.1662138360208416480
114// C6   covcounters.a723844208cea2ae80c63482c78b2245.1877677.1662138359974441781
115//
116// In these two directories we have three meta-data files, but only
117// two are distinct, meaning that we'll wind up with two pods. The
118// first pod (with meta-file M1) will have four counter data files
119// (C1, C2, C3, C4) and the second pod will have two counter data files
120// (C5, C6).
121func collectPodsImpl(files []string, dirIndices []int, warn bool) []Pod {
122	metaRE := regexp.MustCompile(fmt.Sprintf(`^%s\.(\S+)$`, coverage.MetaFilePref))
123	mm := make(map[string]protoPod)
124	for _, f := range files {
125		base := filepath.Base(f)
126		if m := metaRE.FindStringSubmatch(base); m != nil {
127			tag := m[1]
128			// We need to allow for the possibility of duplicate
129			// meta-data files. If we hit this case, use the
130			// first encountered as the canonical version.
131			if _, ok := mm[tag]; !ok {
132				mm[tag] = protoPod{mf: f}
133			}
134			// FIXME: should probably check file length and hash here for
135			// the duplicate.
136		}
137	}
138	counterRE := regexp.MustCompile(fmt.Sprintf(coverage.CounterFileRegexp, coverage.CounterFilePref))
139	for k, f := range files {
140		base := filepath.Base(f)
141		if m := counterRE.FindStringSubmatch(base); m != nil {
142			tag := m[1] // meta hash
143			pid, err := strconv.Atoi(m[2])
144			if err != nil {
145				continue
146			}
147			if v, ok := mm[tag]; ok {
148				idx := -1
149				if dirIndices != nil {
150					idx = dirIndices[k]
151				}
152				fo := fileWithAnnotations{file: f, origin: idx, pid: pid}
153				v.elements = append(v.elements, fo)
154				mm[tag] = v
155			} else {
156				if warn {
157					warning("skipping orphaned counter file: %s", f)
158				}
159			}
160		}
161	}
162	if len(mm) == 0 {
163		if warn {
164			warning("no coverage data files found")
165		}
166		return nil
167	}
168	pods := make([]Pod, 0, len(mm))
169	for _, p := range mm {
170		slices.SortFunc(p.elements, func(a, b fileWithAnnotations) int {
171			if r := cmp.Compare(a.origin, b.origin); r != 0 {
172				return r
173			}
174			return strings.Compare(a.file, b.file)
175		})
176		pod := Pod{
177			MetaFile:         p.mf,
178			CounterDataFiles: make([]string, 0, len(p.elements)),
179			Origins:          make([]int, 0, len(p.elements)),
180			ProcessIDs:       make([]int, 0, len(p.elements)),
181		}
182		for _, e := range p.elements {
183			pod.CounterDataFiles = append(pod.CounterDataFiles, e.file)
184			pod.Origins = append(pod.Origins, e.origin)
185			pod.ProcessIDs = append(pod.ProcessIDs, e.pid)
186		}
187		pods = append(pods, pod)
188	}
189	slices.SortFunc(pods, func(a, b Pod) int {
190		return strings.Compare(a.MetaFile, b.MetaFile)
191	})
192	return pods
193}
194
195func warning(s string, a ...interface{}) {
196	fmt.Fprintf(os.Stderr, "warning: ")
197	fmt.Fprintf(os.Stderr, s, a...)
198	fmt.Fprintf(os.Stderr, "\n")
199}
200