1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package buildid
6
7import (
8	"bytes"
9	"debug/elf"
10	"fmt"
11	"internal/xcoff"
12	"io"
13	"io/fs"
14	"os"
15	"strconv"
16	"strings"
17)
18
19var (
20	errBuildIDMalformed = fmt.Errorf("malformed object file")
21
22	bangArch = []byte("!<arch>")
23	pkgdef   = []byte("__.PKGDEF")
24	goobject = []byte("go object ")
25	buildid  = []byte("build id ")
26)
27
28// ReadFile reads the build ID from an archive or executable file.
29func ReadFile(name string) (id string, err error) {
30	f, err := os.Open(name)
31	if err != nil {
32		return "", err
33	}
34	defer f.Close()
35
36	buf := make([]byte, 8)
37	if _, err := f.ReadAt(buf, 0); err != nil {
38		return "", err
39	}
40	if string(buf) != "!<arch>\n" {
41		if string(buf) == "<bigaf>\n" {
42			return readGccgoBigArchive(name, f)
43		}
44		return readBinary(name, f)
45	}
46
47	// Read just enough of the target to fetch the build ID.
48	// The archive is expected to look like:
49	//
50	//	!<arch>
51	//	__.PKGDEF       0           0     0     644     7955      `
52	//	go object darwin amd64 devel X:none
53	//	build id "b41e5c45250e25c9fd5e9f9a1de7857ea0d41224"
54	//
55	// The variable-sized strings are GOOS, GOARCH, and the experiment list (X:none).
56	// Reading the first 1024 bytes should be plenty.
57	data := make([]byte, 1024)
58	n, err := io.ReadFull(f, data)
59	if err != nil && n == 0 {
60		return "", err
61	}
62
63	tryGccgo := func() (string, error) {
64		return readGccgoArchive(name, f)
65	}
66
67	// Archive header.
68	for i := 0; ; i++ { // returns during i==3
69		j := bytes.IndexByte(data, '\n')
70		if j < 0 {
71			return tryGccgo()
72		}
73		line := data[:j]
74		data = data[j+1:]
75		switch i {
76		case 0:
77			if !bytes.Equal(line, bangArch) {
78				return tryGccgo()
79			}
80		case 1:
81			if !bytes.HasPrefix(line, pkgdef) {
82				return tryGccgo()
83			}
84		case 2:
85			if !bytes.HasPrefix(line, goobject) {
86				return tryGccgo()
87			}
88		case 3:
89			if !bytes.HasPrefix(line, buildid) {
90				// Found the object header, just doesn't have a build id line.
91				// Treat as successful, with empty build id.
92				return "", nil
93			}
94			id, err := strconv.Unquote(string(line[len(buildid):]))
95			if err != nil {
96				return tryGccgo()
97			}
98			return id, nil
99		}
100	}
101}
102
103// readGccgoArchive tries to parse the archive as a standard Unix
104// archive file, and fetch the build ID from the _buildid.o entry.
105// The _buildid.o entry is written by (*Builder).gccgoBuildIDELFFile
106// in cmd/go/internal/work/exec.go.
107func readGccgoArchive(name string, f *os.File) (string, error) {
108	bad := func() (string, error) {
109		return "", &fs.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed}
110	}
111
112	off := int64(8)
113	for {
114		if _, err := f.Seek(off, io.SeekStart); err != nil {
115			return "", err
116		}
117
118		// TODO(iant): Make a debug/ar package, and use it
119		// here and in cmd/link.
120		var hdr [60]byte
121		if _, err := io.ReadFull(f, hdr[:]); err != nil {
122			if err == io.EOF {
123				// No more entries, no build ID.
124				return "", nil
125			}
126			return "", err
127		}
128		off += 60
129
130		sizeStr := strings.TrimSpace(string(hdr[48:58]))
131		size, err := strconv.ParseInt(sizeStr, 0, 64)
132		if err != nil {
133			return bad()
134		}
135
136		name := strings.TrimSpace(string(hdr[:16]))
137		if name == "_buildid.o/" {
138			sr := io.NewSectionReader(f, off, size)
139			e, err := elf.NewFile(sr)
140			if err != nil {
141				return bad()
142			}
143			s := e.Section(".go.buildid")
144			if s == nil {
145				return bad()
146			}
147			data, err := s.Data()
148			if err != nil {
149				return bad()
150			}
151			return string(data), nil
152		}
153
154		off += size
155		if off&1 != 0 {
156			off++
157		}
158	}
159}
160
161// readGccgoBigArchive tries to parse the archive as an AIX big
162// archive file, and fetch the build ID from the _buildid.o entry.
163// The _buildid.o entry is written by (*Builder).gccgoBuildIDXCOFFFile
164// in cmd/go/internal/work/exec.go.
165func readGccgoBigArchive(name string, f *os.File) (string, error) {
166	bad := func() (string, error) {
167		return "", &fs.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed}
168	}
169
170	// Read fixed-length header.
171	if _, err := f.Seek(0, io.SeekStart); err != nil {
172		return "", err
173	}
174	var flhdr [128]byte
175	if _, err := io.ReadFull(f, flhdr[:]); err != nil {
176		return "", err
177	}
178	// Read first member offset.
179	offStr := strings.TrimSpace(string(flhdr[68:88]))
180	off, err := strconv.ParseInt(offStr, 10, 64)
181	if err != nil {
182		return bad()
183	}
184	for {
185		if off == 0 {
186			// No more entries, no build ID.
187			return "", nil
188		}
189		if _, err := f.Seek(off, io.SeekStart); err != nil {
190			return "", err
191		}
192		// Read member header.
193		var hdr [112]byte
194		if _, err := io.ReadFull(f, hdr[:]); err != nil {
195			return "", err
196		}
197		// Read member name length.
198		namLenStr := strings.TrimSpace(string(hdr[108:112]))
199		namLen, err := strconv.ParseInt(namLenStr, 10, 32)
200		if err != nil {
201			return bad()
202		}
203		if namLen == 10 {
204			var nam [10]byte
205			if _, err := io.ReadFull(f, nam[:]); err != nil {
206				return "", err
207			}
208			if string(nam[:]) == "_buildid.o" {
209				sizeStr := strings.TrimSpace(string(hdr[0:20]))
210				size, err := strconv.ParseInt(sizeStr, 10, 64)
211				if err != nil {
212					return bad()
213				}
214				off += int64(len(hdr)) + namLen + 2
215				if off&1 != 0 {
216					off++
217				}
218				sr := io.NewSectionReader(f, off, size)
219				x, err := xcoff.NewFile(sr)
220				if err != nil {
221					return bad()
222				}
223				data := x.CSect(".go.buildid")
224				if data == nil {
225					return bad()
226				}
227				return string(data), nil
228			}
229		}
230
231		// Read next member offset.
232		offStr = strings.TrimSpace(string(hdr[20:40]))
233		off, err = strconv.ParseInt(offStr, 10, 64)
234		if err != nil {
235			return bad()
236		}
237	}
238}
239
240var (
241	goBuildPrefix = []byte("\xff Go build ID: \"")
242	goBuildEnd    = []byte("\"\n \xff")
243
244	elfPrefix = []byte("\x7fELF")
245
246	machoPrefixes = [][]byte{
247		{0xfe, 0xed, 0xfa, 0xce},
248		{0xfe, 0xed, 0xfa, 0xcf},
249		{0xce, 0xfa, 0xed, 0xfe},
250		{0xcf, 0xfa, 0xed, 0xfe},
251	}
252)
253
254var readSize = 32 * 1024 // changed for testing
255
256// readBinary reads the build ID from a binary.
257//
258// ELF binaries store the build ID in a proper PT_NOTE section.
259//
260// Other binary formats are not so flexible. For those, the linker
261// stores the build ID as non-instruction bytes at the very beginning
262// of the text segment, which should appear near the beginning
263// of the file. This is clumsy but fairly portable. Custom locations
264// can be added for other binary types as needed, like we did for ELF.
265func readBinary(name string, f *os.File) (id string, err error) {
266	// Read the first 32 kB of the binary file.
267	// That should be enough to find the build ID.
268	// In ELF files, the build ID is in the leading headers,
269	// which are typically less than 4 kB, not to mention 32 kB.
270	// In Mach-O files, there's no limit, so we have to parse the file.
271	// On other systems, we're trying to read enough that
272	// we get the beginning of the text segment in the read.
273	// The offset where the text segment begins in a hello
274	// world compiled for each different object format today:
275	//
276	//	Plan 9: 0x20
277	//	Windows: 0x600
278	//
279	data := make([]byte, readSize)
280	_, err = io.ReadFull(f, data)
281	if err == io.ErrUnexpectedEOF {
282		err = nil
283	}
284	if err != nil {
285		return "", err
286	}
287
288	if bytes.HasPrefix(data, elfPrefix) {
289		return readELF(name, f, data)
290	}
291	for _, m := range machoPrefixes {
292		if bytes.HasPrefix(data, m) {
293			return readMacho(name, f, data)
294		}
295	}
296	return readRaw(name, data)
297}
298
299// readRaw finds the raw build ID stored in text segment data.
300func readRaw(name string, data []byte) (id string, err error) {
301	i := bytes.Index(data, goBuildPrefix)
302	if i < 0 {
303		// Missing. Treat as successful but build ID empty.
304		return "", nil
305	}
306
307	j := bytes.Index(data[i+len(goBuildPrefix):], goBuildEnd)
308	if j < 0 {
309		return "", &fs.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed}
310	}
311
312	quoted := data[i+len(goBuildPrefix)-1 : i+len(goBuildPrefix)+j+1]
313	id, err = strconv.Unquote(string(quoted))
314	if err != nil {
315		return "", &fs.PathError{Op: "parse", Path: name, Err: errBuildIDMalformed}
316	}
317	return id, nil
318}
319
320// HashToString converts the hash h to a string to be recorded
321// in package archives and binaries as part of the build ID.
322// We use the first 120 bits of the hash (5 chunks of 24 bits each) and encode
323// it in base64, resulting in a 20-byte string. Because this is only used for
324// detecting the need to rebuild installed files (not for lookups
325// in the object file cache), 120 bits are sufficient to drive the
326// probability of a false "do not need to rebuild" decision to effectively zero.
327// We embed two different hashes in archives and four in binaries,
328// so cutting to 20 bytes is a significant savings when build IDs are displayed.
329// (20*4+3 = 83 bytes compared to 64*4+3 = 259 bytes for the
330// more straightforward option of printing the entire h in base64).
331func HashToString(h [32]byte) string {
332	const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
333	const chunks = 5
334	var dst [chunks * 4]byte
335	for i := 0; i < chunks; i++ {
336		v := uint32(h[3*i])<<16 | uint32(h[3*i+1])<<8 | uint32(h[3*i+2])
337		dst[4*i+0] = b64[(v>>18)&0x3F]
338		dst[4*i+1] = b64[(v>>12)&0x3F]
339		dst[4*i+2] = b64[(v>>6)&0x3F]
340		dst[4*i+3] = b64[v&0x3F]
341	}
342	return string(dst[:])
343}
344