1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package buildinfo provides access to information embedded in a Go binary
6// about how it was built. This includes the Go toolchain version, and the
7// set of modules used (for binaries built in module mode).
8//
9// Build information is available for the currently running binary in
10// runtime/debug.ReadBuildInfo.
11package buildinfo
12
13import (
14	"bytes"
15	"debug/elf"
16	"debug/macho"
17	"debug/pe"
18	"debug/plan9obj"
19	"encoding/binary"
20	"errors"
21	"fmt"
22	"internal/saferio"
23	"internal/xcoff"
24	"io"
25	"io/fs"
26	"os"
27	"runtime/debug"
28	_ "unsafe" // for linkname
29)
30
31// Type alias for build info. We cannot move the types here, since
32// runtime/debug would need to import this package, which would make it
33// a much larger dependency.
34type BuildInfo = debug.BuildInfo
35
36// errUnrecognizedFormat is returned when a given executable file doesn't
37// appear to be in a known format, or it breaks the rules of that format,
38// or when there are I/O errors reading the file.
39var errUnrecognizedFormat = errors.New("unrecognized file format")
40
41// errNotGoExe is returned when a given executable file is valid but does
42// not contain Go build information.
43//
44// errNotGoExe should be an internal detail,
45// but widely used packages access it using linkname.
46// Notable members of the hall of shame include:
47//   - github.com/quay/claircore
48//
49// Do not remove or change the type signature.
50// See go.dev/issue/67401.
51//
52//go:linkname errNotGoExe
53var errNotGoExe = errors.New("not a Go executable")
54
55// The build info blob left by the linker is identified by
56// a 16-byte header, consisting of buildInfoMagic (14 bytes),
57// the binary's pointer size (1 byte),
58// and whether the binary is big endian (1 byte).
59var buildInfoMagic = []byte("\xff Go buildinf:")
60
61// ReadFile returns build information embedded in a Go binary
62// file at the given path. Most information is only available for binaries built
63// with module support.
64func ReadFile(name string) (info *BuildInfo, err error) {
65	defer func() {
66		if pathErr := (*fs.PathError)(nil); errors.As(err, &pathErr) {
67			err = fmt.Errorf("could not read Go build info: %w", err)
68		} else if err != nil {
69			err = fmt.Errorf("could not read Go build info from %s: %w", name, err)
70		}
71	}()
72
73	f, err := os.Open(name)
74	if err != nil {
75		return nil, err
76	}
77	defer f.Close()
78	return Read(f)
79}
80
81// Read returns build information embedded in a Go binary file
82// accessed through the given ReaderAt. Most information is only available for
83// binaries built with module support.
84func Read(r io.ReaderAt) (*BuildInfo, error) {
85	vers, mod, err := readRawBuildInfo(r)
86	if err != nil {
87		return nil, err
88	}
89	bi, err := debug.ParseBuildInfo(mod)
90	if err != nil {
91		return nil, err
92	}
93	bi.GoVersion = vers
94	return bi, nil
95}
96
97type exe interface {
98	// ReadData reads and returns up to size bytes starting at virtual address addr.
99	ReadData(addr, size uint64) ([]byte, error)
100
101	// DataStart returns the virtual address and size of the segment or section that
102	// should contain build information. This is either a specially named section
103	// or the first writable non-zero data segment.
104	DataStart() (uint64, uint64)
105}
106
107// readRawBuildInfo extracts the Go toolchain version and module information
108// strings from a Go binary. On success, vers should be non-empty. mod
109// is empty if the binary was not built with modules enabled.
110func readRawBuildInfo(r io.ReaderAt) (vers, mod string, err error) {
111	// Read the first bytes of the file to identify the format, then delegate to
112	// a format-specific function to load segment and section headers.
113	ident := make([]byte, 16)
114	if n, err := r.ReadAt(ident, 0); n < len(ident) || err != nil {
115		return "", "", errUnrecognizedFormat
116	}
117
118	var x exe
119	switch {
120	case bytes.HasPrefix(ident, []byte("\x7FELF")):
121		f, err := elf.NewFile(r)
122		if err != nil {
123			return "", "", errUnrecognizedFormat
124		}
125		x = &elfExe{f}
126	case bytes.HasPrefix(ident, []byte("MZ")):
127		f, err := pe.NewFile(r)
128		if err != nil {
129			return "", "", errUnrecognizedFormat
130		}
131		x = &peExe{f}
132	case bytes.HasPrefix(ident, []byte("\xFE\xED\xFA")) || bytes.HasPrefix(ident[1:], []byte("\xFA\xED\xFE")):
133		f, err := macho.NewFile(r)
134		if err != nil {
135			return "", "", errUnrecognizedFormat
136		}
137		x = &machoExe{f}
138	case bytes.HasPrefix(ident, []byte("\xCA\xFE\xBA\xBE")) || bytes.HasPrefix(ident, []byte("\xCA\xFE\xBA\xBF")):
139		f, err := macho.NewFatFile(r)
140		if err != nil || len(f.Arches) == 0 {
141			return "", "", errUnrecognizedFormat
142		}
143		x = &machoExe{f.Arches[0].File}
144	case bytes.HasPrefix(ident, []byte{0x01, 0xDF}) || bytes.HasPrefix(ident, []byte{0x01, 0xF7}):
145		f, err := xcoff.NewFile(r)
146		if err != nil {
147			return "", "", errUnrecognizedFormat
148		}
149		x = &xcoffExe{f}
150	case hasPlan9Magic(ident):
151		f, err := plan9obj.NewFile(r)
152		if err != nil {
153			return "", "", errUnrecognizedFormat
154		}
155		x = &plan9objExe{f}
156	default:
157		return "", "", errUnrecognizedFormat
158	}
159
160	// Read segment or section to find the build info blob.
161	// On some platforms, the blob will be in its own section, and DataStart
162	// returns the address of that section. On others, it's somewhere in the
163	// data segment; the linker puts it near the beginning.
164	// See cmd/link/internal/ld.Link.buildinfo.
165	dataAddr, dataSize := x.DataStart()
166	if dataSize == 0 {
167		return "", "", errNotGoExe
168	}
169	data, err := x.ReadData(dataAddr, dataSize)
170	if err != nil {
171		return "", "", err
172	}
173	const (
174		buildInfoAlign = 16
175		buildInfoSize  = 32
176	)
177	for {
178		i := bytes.Index(data, buildInfoMagic)
179		if i < 0 || len(data)-i < buildInfoSize {
180			return "", "", errNotGoExe
181		}
182		if i%buildInfoAlign == 0 && len(data)-i >= buildInfoSize {
183			data = data[i:]
184			break
185		}
186		data = data[(i+buildInfoAlign-1)&^(buildInfoAlign-1):]
187	}
188
189	// Decode the blob.
190	// The first 14 bytes are buildInfoMagic.
191	// The next two bytes indicate pointer size in bytes (4 or 8) and endianness
192	// (0 for little, 1 for big).
193	// Two virtual addresses to Go strings follow that: runtime.buildVersion,
194	// and runtime.modinfo.
195	// On 32-bit platforms, the last 8 bytes are unused.
196	// If the endianness has the 2 bit set, then the pointers are zero
197	// and the 32-byte header is followed by varint-prefixed string data
198	// for the two string values we care about.
199	ptrSize := int(data[14])
200	if data[15]&2 != 0 {
201		vers, data = decodeString(data[32:])
202		mod, data = decodeString(data)
203	} else {
204		bigEndian := data[15] != 0
205		var bo binary.ByteOrder
206		if bigEndian {
207			bo = binary.BigEndian
208		} else {
209			bo = binary.LittleEndian
210		}
211		var readPtr func([]byte) uint64
212		if ptrSize == 4 {
213			readPtr = func(b []byte) uint64 { return uint64(bo.Uint32(b)) }
214		} else if ptrSize == 8 {
215			readPtr = bo.Uint64
216		} else {
217			return "", "", errNotGoExe
218		}
219		vers = readString(x, ptrSize, readPtr, readPtr(data[16:]))
220		mod = readString(x, ptrSize, readPtr, readPtr(data[16+ptrSize:]))
221	}
222	if vers == "" {
223		return "", "", errNotGoExe
224	}
225	if len(mod) >= 33 && mod[len(mod)-17] == '\n' {
226		// Strip module framing: sentinel strings delimiting the module info.
227		// These are cmd/go/internal/modload.infoStart and infoEnd.
228		mod = mod[16 : len(mod)-16]
229	} else {
230		mod = ""
231	}
232
233	return vers, mod, nil
234}
235
236func hasPlan9Magic(magic []byte) bool {
237	if len(magic) >= 4 {
238		m := binary.BigEndian.Uint32(magic)
239		switch m {
240		case plan9obj.Magic386, plan9obj.MagicAMD64, plan9obj.MagicARM:
241			return true
242		}
243	}
244	return false
245}
246
247func decodeString(data []byte) (s string, rest []byte) {
248	u, n := binary.Uvarint(data)
249	if n <= 0 || u > uint64(len(data)-n) {
250		return "", nil
251	}
252	return string(data[n : uint64(n)+u]), data[uint64(n)+u:]
253}
254
255// readString returns the string at address addr in the executable x.
256func readString(x exe, ptrSize int, readPtr func([]byte) uint64, addr uint64) string {
257	hdr, err := x.ReadData(addr, uint64(2*ptrSize))
258	if err != nil || len(hdr) < 2*ptrSize {
259		return ""
260	}
261	dataAddr := readPtr(hdr)
262	dataLen := readPtr(hdr[ptrSize:])
263	data, err := x.ReadData(dataAddr, dataLen)
264	if err != nil || uint64(len(data)) < dataLen {
265		return ""
266	}
267	return string(data)
268}
269
270// elfExe is the ELF implementation of the exe interface.
271type elfExe struct {
272	f *elf.File
273}
274
275func (x *elfExe) ReadData(addr, size uint64) ([]byte, error) {
276	for _, prog := range x.f.Progs {
277		if prog.Vaddr <= addr && addr <= prog.Vaddr+prog.Filesz-1 {
278			n := prog.Vaddr + prog.Filesz - addr
279			if n > size {
280				n = size
281			}
282			return saferio.ReadDataAt(prog, n, int64(addr-prog.Vaddr))
283		}
284	}
285	return nil, errUnrecognizedFormat
286}
287
288func (x *elfExe) DataStart() (uint64, uint64) {
289	for _, s := range x.f.Sections {
290		if s.Name == ".go.buildinfo" {
291			return s.Addr, s.Size
292		}
293	}
294	for _, p := range x.f.Progs {
295		if p.Type == elf.PT_LOAD && p.Flags&(elf.PF_X|elf.PF_W) == elf.PF_W {
296			return p.Vaddr, p.Memsz
297		}
298	}
299	return 0, 0
300}
301
302// peExe is the PE (Windows Portable Executable) implementation of the exe interface.
303type peExe struct {
304	f *pe.File
305}
306
307func (x *peExe) imageBase() uint64 {
308	switch oh := x.f.OptionalHeader.(type) {
309	case *pe.OptionalHeader32:
310		return uint64(oh.ImageBase)
311	case *pe.OptionalHeader64:
312		return oh.ImageBase
313	}
314	return 0
315}
316
317func (x *peExe) ReadData(addr, size uint64) ([]byte, error) {
318	addr -= x.imageBase()
319	for _, sect := range x.f.Sections {
320		if uint64(sect.VirtualAddress) <= addr && addr <= uint64(sect.VirtualAddress+sect.Size-1) {
321			n := uint64(sect.VirtualAddress+sect.Size) - addr
322			if n > size {
323				n = size
324			}
325			return saferio.ReadDataAt(sect, n, int64(addr-uint64(sect.VirtualAddress)))
326		}
327	}
328	return nil, errUnrecognizedFormat
329}
330
331func (x *peExe) DataStart() (uint64, uint64) {
332	// Assume data is first writable section.
333	const (
334		IMAGE_SCN_CNT_CODE               = 0x00000020
335		IMAGE_SCN_CNT_INITIALIZED_DATA   = 0x00000040
336		IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080
337		IMAGE_SCN_MEM_EXECUTE            = 0x20000000
338		IMAGE_SCN_MEM_READ               = 0x40000000
339		IMAGE_SCN_MEM_WRITE              = 0x80000000
340		IMAGE_SCN_MEM_DISCARDABLE        = 0x2000000
341		IMAGE_SCN_LNK_NRELOC_OVFL        = 0x1000000
342		IMAGE_SCN_ALIGN_32BYTES          = 0x600000
343	)
344	for _, sect := range x.f.Sections {
345		if sect.VirtualAddress != 0 && sect.Size != 0 &&
346			sect.Characteristics&^IMAGE_SCN_ALIGN_32BYTES == IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_MEM_READ|IMAGE_SCN_MEM_WRITE {
347			return uint64(sect.VirtualAddress) + x.imageBase(), uint64(sect.VirtualSize)
348		}
349	}
350	return 0, 0
351}
352
353// machoExe is the Mach-O (Apple macOS/iOS) implementation of the exe interface.
354type machoExe struct {
355	f *macho.File
356}
357
358func (x *machoExe) ReadData(addr, size uint64) ([]byte, error) {
359	for _, load := range x.f.Loads {
360		seg, ok := load.(*macho.Segment)
361		if !ok {
362			continue
363		}
364		if seg.Addr <= addr && addr <= seg.Addr+seg.Filesz-1 {
365			if seg.Name == "__PAGEZERO" {
366				continue
367			}
368			n := seg.Addr + seg.Filesz - addr
369			if n > size {
370				n = size
371			}
372			return saferio.ReadDataAt(seg, n, int64(addr-seg.Addr))
373		}
374	}
375	return nil, errUnrecognizedFormat
376}
377
378func (x *machoExe) DataStart() (uint64, uint64) {
379	// Look for section named "__go_buildinfo".
380	for _, sec := range x.f.Sections {
381		if sec.Name == "__go_buildinfo" {
382			return sec.Addr, sec.Size
383		}
384	}
385	// Try the first non-empty writable segment.
386	const RW = 3
387	for _, load := range x.f.Loads {
388		seg, ok := load.(*macho.Segment)
389		if ok && seg.Addr != 0 && seg.Filesz != 0 && seg.Prot == RW && seg.Maxprot == RW {
390			return seg.Addr, seg.Memsz
391		}
392	}
393	return 0, 0
394}
395
396// xcoffExe is the XCOFF (AIX eXtended COFF) implementation of the exe interface.
397type xcoffExe struct {
398	f *xcoff.File
399}
400
401func (x *xcoffExe) ReadData(addr, size uint64) ([]byte, error) {
402	for _, sect := range x.f.Sections {
403		if sect.VirtualAddress <= addr && addr <= sect.VirtualAddress+sect.Size-1 {
404			n := sect.VirtualAddress + sect.Size - addr
405			if n > size {
406				n = size
407			}
408			return saferio.ReadDataAt(sect, n, int64(addr-sect.VirtualAddress))
409		}
410	}
411	return nil, errors.New("address not mapped")
412}
413
414func (x *xcoffExe) DataStart() (uint64, uint64) {
415	if s := x.f.SectionByType(xcoff.STYP_DATA); s != nil {
416		return s.VirtualAddress, s.Size
417	}
418	return 0, 0
419}
420
421// plan9objExe is the Plan 9 a.out implementation of the exe interface.
422type plan9objExe struct {
423	f *plan9obj.File
424}
425
426func (x *plan9objExe) DataStart() (uint64, uint64) {
427	if s := x.f.Section("data"); s != nil {
428		return uint64(s.Offset), uint64(s.Size)
429	}
430	return 0, 0
431}
432
433func (x *plan9objExe) ReadData(addr, size uint64) ([]byte, error) {
434	for _, sect := range x.f.Sections {
435		if uint64(sect.Offset) <= addr && addr <= uint64(sect.Offset+sect.Size-1) {
436			n := uint64(sect.Offset+sect.Size) - addr
437			if n > size {
438				n = size
439			}
440			return saferio.ReadDataAt(sect, n, int64(addr-uint64(sect.Offset)))
441		}
442	}
443	return nil, errors.New("address not mapped")
444}
445