1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package elfexec provides utility routines to examine ELF binaries.
16package elfexec
17
18import (
19	"bufio"
20	"debug/elf"
21	"encoding/binary"
22	"fmt"
23	"io"
24)
25
26const (
27	maxNoteSize        = 1 << 20 // in bytes
28	noteTypeGNUBuildID = 3
29)
30
31// elfNote is the payload of a Note Section in an ELF file.
32type elfNote struct {
33	Name string // Contents of the "name" field, omitting the trailing zero byte.
34	Desc []byte // Contents of the "desc" field.
35	Type uint32 // Contents of the "type" field.
36}
37
38// parseNotes returns the notes from a SHT_NOTE section or PT_NOTE segment.
39func parseNotes(reader io.Reader, alignment int, order binary.ByteOrder) ([]elfNote, error) {
40	r := bufio.NewReader(reader)
41
42	// padding returns the number of bytes required to pad the given size to an
43	// alignment boundary.
44	padding := func(size int) int {
45		return ((size + (alignment - 1)) &^ (alignment - 1)) - size
46	}
47
48	var notes []elfNote
49	for {
50		noteHeader := make([]byte, 12) // 3 4-byte words
51		if _, err := io.ReadFull(r, noteHeader); err == io.EOF {
52			break
53		} else if err != nil {
54			return nil, err
55		}
56		namesz := order.Uint32(noteHeader[0:4])
57		descsz := order.Uint32(noteHeader[4:8])
58		typ := order.Uint32(noteHeader[8:12])
59
60		if uint64(namesz) > uint64(maxNoteSize) {
61			return nil, fmt.Errorf("note name too long (%d bytes)", namesz)
62		}
63		var name string
64		if namesz > 0 {
65			// Documentation differs as to whether namesz is meant to include the
66			// trailing zero, but everyone agrees that name is null-terminated.
67			// So we'll just determine the actual length after the fact.
68			var err error
69			name, err = r.ReadString('\x00')
70			if err == io.EOF {
71				return nil, fmt.Errorf("missing note name (want %d bytes)", namesz)
72			} else if err != nil {
73				return nil, err
74			}
75			namesz = uint32(len(name))
76			name = name[:len(name)-1]
77		}
78
79		// Drop padding bytes until the desc field.
80		for n := padding(len(noteHeader) + int(namesz)); n > 0; n-- {
81			if _, err := r.ReadByte(); err == io.EOF {
82				return nil, fmt.Errorf(
83					"missing %d bytes of padding after note name", n)
84			} else if err != nil {
85				return nil, err
86			}
87		}
88
89		if uint64(descsz) > uint64(maxNoteSize) {
90			return nil, fmt.Errorf("note desc too long (%d bytes)", descsz)
91		}
92		desc := make([]byte, int(descsz))
93		if _, err := io.ReadFull(r, desc); err == io.EOF {
94			return nil, fmt.Errorf("missing desc (want %d bytes)", len(desc))
95		} else if err != nil {
96			return nil, err
97		}
98
99		notes = append(notes, elfNote{Name: name, Desc: desc, Type: typ})
100
101		// Drop padding bytes until the next note or the end of the section,
102		// whichever comes first.
103		for n := padding(len(desc)); n > 0; n-- {
104			if _, err := r.ReadByte(); err == io.EOF {
105				// We hit the end of the section before an alignment boundary.
106				// This can happen if this section is at the end of the file or the next
107				// section has a smaller alignment requirement.
108				break
109			} else if err != nil {
110				return nil, err
111			}
112		}
113	}
114	return notes, nil
115}
116
117// GetBuildID returns the GNU build-ID for an ELF binary.
118//
119// If no build-ID was found but the binary was read without error, it returns
120// (nil, nil).
121func GetBuildID(f *elf.File) ([]byte, error) {
122	findBuildID := func(notes []elfNote) ([]byte, error) {
123		var buildID []byte
124		for _, note := range notes {
125			if note.Name == "GNU" && note.Type == noteTypeGNUBuildID {
126				if buildID == nil {
127					buildID = note.Desc
128				} else {
129					return nil, fmt.Errorf("multiple build ids found, don't know which to use")
130				}
131			}
132		}
133		return buildID, nil
134	}
135
136	for _, p := range f.Progs {
137		if p.Type != elf.PT_NOTE {
138			continue
139		}
140		notes, err := parseNotes(p.Open(), int(p.Align), f.ByteOrder)
141		if err != nil {
142			return nil, err
143		}
144		if b, err := findBuildID(notes); b != nil || err != nil {
145			return b, err
146		}
147	}
148	for _, s := range f.Sections {
149		if s.Type != elf.SHT_NOTE {
150			continue
151		}
152		notes, err := parseNotes(s.Open(), int(s.Addralign), f.ByteOrder)
153		if err != nil {
154			return nil, err
155		}
156		if b, err := findBuildID(notes); b != nil || err != nil {
157			return b, err
158		}
159	}
160	return nil, nil
161}
162
163// kernelBase calculates the base for kernel mappings, which usually require
164// special handling. For kernel mappings, tools (like perf) use the address of
165// the kernel relocation symbol (_text or _stext) as the mmap start. Additionally,
166// for obfuscation, ChromeOS profiles have the kernel image remapped to the 0-th page.
167func kernelBase(loadSegment *elf.ProgHeader, stextOffset *uint64, start, limit, offset uint64) (uint64, bool) {
168	const (
169		// PAGE_OFFSET for PowerPC64, see arch/powerpc/Kconfig in the kernel sources.
170		pageOffsetPpc64 = 0xc000000000000000
171		pageSize        = 4096
172	)
173
174	if loadSegment.Vaddr == start-offset {
175		return offset, true
176	}
177	if start == 0 && limit != 0 && stextOffset != nil {
178		// ChromeOS remaps its kernel to 0. Nothing else should come
179		// down this path. Empirical values:
180		//       VADDR=0xffffffff80200000
181		// stextOffset=0xffffffff80200198
182		return start - *stextOffset, true
183	}
184	if start >= loadSegment.Vaddr && limit > start && (offset == 0 || offset == pageOffsetPpc64 || offset == start) {
185		// Some kernels look like:
186		//       VADDR=0xffffffff80200000
187		// stextOffset=0xffffffff80200198
188		//       Start=0xffffffff83200000
189		//       Limit=0xffffffff84200000
190		//      Offset=0 (0xc000000000000000 for PowerPC64) (== Start for ASLR kernel)
191		// So the base should be:
192		if stextOffset != nil && (start%pageSize) == (*stextOffset%pageSize) {
193			// perf uses the address of _stext as start. Some tools may
194			// adjust for this before calling GetBase, in which case the page
195			// alignment should be different from that of stextOffset.
196			return start - *stextOffset, true
197		}
198
199		return start - loadSegment.Vaddr, true
200	}
201	if start%pageSize != 0 && stextOffset != nil && *stextOffset%pageSize == start%pageSize {
202		// ChromeOS remaps its kernel to 0 + start%pageSize. Nothing
203		// else should come down this path. Empirical values:
204		//       start=0x198 limit=0x2f9fffff offset=0
205		//       VADDR=0xffffffff81000000
206		// stextOffset=0xffffffff81000198
207		return start - *stextOffset, true
208	}
209	return 0, false
210}
211
212// GetBase determines the base address to subtract from virtual
213// address to get symbol table address. For an executable, the base
214// is 0. Otherwise, it's a shared library, and the base is the
215// address where the mapping starts. The kernel needs special handling.
216func GetBase(fh *elf.FileHeader, loadSegment *elf.ProgHeader, stextOffset *uint64, start, limit, offset uint64) (uint64, error) {
217
218	if start == 0 && offset == 0 && (limit == ^uint64(0) || limit == 0) {
219		// Some tools may introduce a fake mapping that spans the entire
220		// address space. Assume that the address has already been
221		// adjusted, so no additional base adjustment is necessary.
222		return 0, nil
223	}
224
225	switch fh.Type {
226	case elf.ET_EXEC:
227		if loadSegment == nil {
228			// Assume fixed-address executable and so no adjustment.
229			return 0, nil
230		}
231		if stextOffset == nil && start > 0 && start < 0x8000000000000000 {
232			// A regular user-mode executable. Compute the base offset using same
233			// arithmetics as in ET_DYN case below, see the explanation there.
234			// Ideally, the condition would just be "stextOffset == nil" as that
235			// represents the address of _stext symbol in the vmlinux image. Alas,
236			// the caller may skip reading it from the binary (it's expensive to scan
237			// all the symbols) and so it may be nil even for the kernel executable.
238			// So additionally check that the start is within the user-mode half of
239			// the 64-bit address space.
240			return start - offset + loadSegment.Off - loadSegment.Vaddr, nil
241		}
242		// Various kernel heuristics and cases are handled separately.
243		if base, match := kernelBase(loadSegment, stextOffset, start, limit, offset); match {
244			return base, nil
245		}
246		// ChromeOS can remap its kernel to 0, and the caller might have not found
247		// the _stext symbol. Split this case from kernelBase() above, since we don't
248		// want to apply it to an ET_DYN user-mode executable.
249		if start == 0 && limit != 0 && stextOffset == nil {
250			return start - loadSegment.Vaddr, nil
251		}
252
253		return 0, fmt.Errorf("don't know how to handle EXEC segment: %v start=0x%x limit=0x%x offset=0x%x", *loadSegment, start, limit, offset)
254	case elf.ET_REL:
255		if offset != 0 {
256			return 0, fmt.Errorf("don't know how to handle mapping.Offset")
257		}
258		return start, nil
259	case elf.ET_DYN:
260		// The process mapping information, start = start of virtual address range,
261		// and offset = offset in the executable file of the start address, tells us
262		// that a runtime virtual address x maps to a file offset
263		// fx = x - start + offset.
264		if loadSegment == nil {
265			return start - offset, nil
266		}
267		// Kernels compiled as PIE can be ET_DYN as well. Use heuristic, similar to
268		// the ET_EXEC case above.
269		if base, match := kernelBase(loadSegment, stextOffset, start, limit, offset); match {
270			return base, nil
271		}
272		// The program header, if not nil, indicates the offset in the file where
273		// the executable segment is located (loadSegment.Off), and the base virtual
274		// address where the first byte of the segment is loaded
275		// (loadSegment.Vaddr). A file offset fx maps to a virtual (symbol) address
276		// sx = fx - loadSegment.Off + loadSegment.Vaddr.
277		//
278		// Thus, a runtime virtual address x maps to a symbol address
279		// sx = x - start + offset - loadSegment.Off + loadSegment.Vaddr.
280		return start - offset + loadSegment.Off - loadSegment.Vaddr, nil
281	}
282	return 0, fmt.Errorf("don't know how to handle FileHeader.Type %v", fh.Type)
283}
284
285// FindTextProgHeader finds the program segment header containing the .text
286// section or nil if the segment cannot be found.
287func FindTextProgHeader(f *elf.File) *elf.ProgHeader {
288	for _, s := range f.Sections {
289		if s.Name == ".text" {
290			// Find the LOAD segment containing the .text section.
291			for _, p := range f.Progs {
292				if p.Type == elf.PT_LOAD && p.Flags&elf.PF_X != 0 && s.Addr >= p.Vaddr && s.Addr < p.Vaddr+p.Memsz {
293					return &p.ProgHeader
294				}
295			}
296		}
297	}
298	return nil
299}
300
301// ProgramHeadersForMapping returns the program segment headers that overlap
302// the runtime mapping with file offset mapOff and memory size mapSz. We skip
303// over segments zero file size because their file offset values are unreliable.
304// Even if overlapping, a segment is not selected if its aligned file offset is
305// greater than the mapping file offset, or if the mapping includes the last
306// page of the segment, but not the full segment and the mapping includes
307// additional pages after the segment end.
308// The function returns a slice of pointers to the headers in the input
309// slice, which are valid only while phdrs is not modified or discarded.
310func ProgramHeadersForMapping(phdrs []elf.ProgHeader, mapOff, mapSz uint64) []*elf.ProgHeader {
311	const (
312		// pageSize defines the virtual memory page size used by the loader. This
313		// value is dependent on the memory management unit of the CPU. The page
314		// size is 4KB virtually on all the architectures that we care about, so we
315		// define this metric as a constant. If we encounter architectures where
316		// page sie is not 4KB, we must try to guess the page size on the system
317		// where the profile was collected, possibly using the architecture
318		// specified in the ELF file header.
319		pageSize       = 4096
320		pageOffsetMask = pageSize - 1
321	)
322	mapLimit := mapOff + mapSz
323	var headers []*elf.ProgHeader
324	for i := range phdrs {
325		p := &phdrs[i]
326		// Skip over segments with zero file size. Their file offsets can have
327		// arbitrary values, see b/195427553.
328		if p.Filesz == 0 {
329			continue
330		}
331		segLimit := p.Off + p.Memsz
332		// The segment must overlap the mapping.
333		if p.Type == elf.PT_LOAD && mapOff < segLimit && p.Off < mapLimit {
334			// If the mapping offset is strictly less than the page aligned segment
335			// offset, then this mapping comes from a different segment, fixes
336			// b/179920361.
337			alignedSegOffset := uint64(0)
338			if p.Off > (p.Vaddr & pageOffsetMask) {
339				alignedSegOffset = p.Off - (p.Vaddr & pageOffsetMask)
340			}
341			if mapOff < alignedSegOffset {
342				continue
343			}
344			// If the mapping starts in the middle of the segment, it covers less than
345			// one page of the segment, and it extends at least one page past the
346			// segment, then this mapping comes from a different segment.
347			if mapOff > p.Off && (segLimit < mapOff+pageSize) && (mapLimit >= segLimit+pageSize) {
348				continue
349			}
350			headers = append(headers, p)
351		}
352	}
353	return headers
354}
355
356// HeaderForFileOffset attempts to identify a unique program header that
357// includes the given file offset. It returns an error if it cannot identify a
358// unique header.
359func HeaderForFileOffset(headers []*elf.ProgHeader, fileOffset uint64) (*elf.ProgHeader, error) {
360	var ph *elf.ProgHeader
361	for _, h := range headers {
362		if fileOffset >= h.Off && fileOffset < h.Off+h.Memsz {
363			if ph != nil {
364				// Assuming no other bugs, this can only happen if we have two or
365				// more small program segments that fit on the same page, and a
366				// segment other than the last one includes uninitialized data, or
367				// if the debug binary used for symbolization is stripped of some
368				// sections, so segment file sizes are smaller than memory sizes.
369				return nil, fmt.Errorf("found second program header (%#v) that matches file offset %x, first program header is %#v. Is this a stripped binary, or does the first program segment contain uninitialized data?", *h, fileOffset, *ph)
370			}
371			ph = h
372		}
373	}
374	if ph == nil {
375		return nil, fmt.Errorf("no program header matches file offset %x", fileOffset)
376	}
377	return ph, nil
378}
379