1// Copyright 2014 Google Inc. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package elfexec provides utility routines to examine ELF binaries. 16package elfexec 17 18import ( 19 "bufio" 20 "debug/elf" 21 "encoding/binary" 22 "fmt" 23 "io" 24) 25 26const ( 27 maxNoteSize = 1 << 20 // in bytes 28 noteTypeGNUBuildID = 3 29) 30 31// elfNote is the payload of a Note Section in an ELF file. 32type elfNote struct { 33 Name string // Contents of the "name" field, omitting the trailing zero byte. 34 Desc []byte // Contents of the "desc" field. 35 Type uint32 // Contents of the "type" field. 36} 37 38// parseNotes returns the notes from a SHT_NOTE section or PT_NOTE segment. 39func parseNotes(reader io.Reader, alignment int, order binary.ByteOrder) ([]elfNote, error) { 40 r := bufio.NewReader(reader) 41 42 // padding returns the number of bytes required to pad the given size to an 43 // alignment boundary. 44 padding := func(size int) int { 45 return ((size + (alignment - 1)) &^ (alignment - 1)) - size 46 } 47 48 var notes []elfNote 49 for { 50 noteHeader := make([]byte, 12) // 3 4-byte words 51 if _, err := io.ReadFull(r, noteHeader); err == io.EOF { 52 break 53 } else if err != nil { 54 return nil, err 55 } 56 namesz := order.Uint32(noteHeader[0:4]) 57 descsz := order.Uint32(noteHeader[4:8]) 58 typ := order.Uint32(noteHeader[8:12]) 59 60 if uint64(namesz) > uint64(maxNoteSize) { 61 return nil, fmt.Errorf("note name too long (%d bytes)", namesz) 62 } 63 var name string 64 if namesz > 0 { 65 // Documentation differs as to whether namesz is meant to include the 66 // trailing zero, but everyone agrees that name is null-terminated. 67 // So we'll just determine the actual length after the fact. 68 var err error 69 name, err = r.ReadString('\x00') 70 if err == io.EOF { 71 return nil, fmt.Errorf("missing note name (want %d bytes)", namesz) 72 } else if err != nil { 73 return nil, err 74 } 75 namesz = uint32(len(name)) 76 name = name[:len(name)-1] 77 } 78 79 // Drop padding bytes until the desc field. 80 for n := padding(len(noteHeader) + int(namesz)); n > 0; n-- { 81 if _, err := r.ReadByte(); err == io.EOF { 82 return nil, fmt.Errorf( 83 "missing %d bytes of padding after note name", n) 84 } else if err != nil { 85 return nil, err 86 } 87 } 88 89 if uint64(descsz) > uint64(maxNoteSize) { 90 return nil, fmt.Errorf("note desc too long (%d bytes)", descsz) 91 } 92 desc := make([]byte, int(descsz)) 93 if _, err := io.ReadFull(r, desc); err == io.EOF { 94 return nil, fmt.Errorf("missing desc (want %d bytes)", len(desc)) 95 } else if err != nil { 96 return nil, err 97 } 98 99 notes = append(notes, elfNote{Name: name, Desc: desc, Type: typ}) 100 101 // Drop padding bytes until the next note or the end of the section, 102 // whichever comes first. 103 for n := padding(len(desc)); n > 0; n-- { 104 if _, err := r.ReadByte(); err == io.EOF { 105 // We hit the end of the section before an alignment boundary. 106 // This can happen if this section is at the end of the file or the next 107 // section has a smaller alignment requirement. 108 break 109 } else if err != nil { 110 return nil, err 111 } 112 } 113 } 114 return notes, nil 115} 116 117// GetBuildID returns the GNU build-ID for an ELF binary. 118// 119// If no build-ID was found but the binary was read without error, it returns 120// (nil, nil). 121func GetBuildID(f *elf.File) ([]byte, error) { 122 findBuildID := func(notes []elfNote) ([]byte, error) { 123 var buildID []byte 124 for _, note := range notes { 125 if note.Name == "GNU" && note.Type == noteTypeGNUBuildID { 126 if buildID == nil { 127 buildID = note.Desc 128 } else { 129 return nil, fmt.Errorf("multiple build ids found, don't know which to use") 130 } 131 } 132 } 133 return buildID, nil 134 } 135 136 for _, p := range f.Progs { 137 if p.Type != elf.PT_NOTE { 138 continue 139 } 140 notes, err := parseNotes(p.Open(), int(p.Align), f.ByteOrder) 141 if err != nil { 142 return nil, err 143 } 144 if b, err := findBuildID(notes); b != nil || err != nil { 145 return b, err 146 } 147 } 148 for _, s := range f.Sections { 149 if s.Type != elf.SHT_NOTE { 150 continue 151 } 152 notes, err := parseNotes(s.Open(), int(s.Addralign), f.ByteOrder) 153 if err != nil { 154 return nil, err 155 } 156 if b, err := findBuildID(notes); b != nil || err != nil { 157 return b, err 158 } 159 } 160 return nil, nil 161} 162 163// kernelBase calculates the base for kernel mappings, which usually require 164// special handling. For kernel mappings, tools (like perf) use the address of 165// the kernel relocation symbol (_text or _stext) as the mmap start. Additionally, 166// for obfuscation, ChromeOS profiles have the kernel image remapped to the 0-th page. 167func kernelBase(loadSegment *elf.ProgHeader, stextOffset *uint64, start, limit, offset uint64) (uint64, bool) { 168 const ( 169 // PAGE_OFFSET for PowerPC64, see arch/powerpc/Kconfig in the kernel sources. 170 pageOffsetPpc64 = 0xc000000000000000 171 pageSize = 4096 172 ) 173 174 if loadSegment.Vaddr == start-offset { 175 return offset, true 176 } 177 if start == 0 && limit != 0 && stextOffset != nil { 178 // ChromeOS remaps its kernel to 0. Nothing else should come 179 // down this path. Empirical values: 180 // VADDR=0xffffffff80200000 181 // stextOffset=0xffffffff80200198 182 return start - *stextOffset, true 183 } 184 if start >= loadSegment.Vaddr && limit > start && (offset == 0 || offset == pageOffsetPpc64 || offset == start) { 185 // Some kernels look like: 186 // VADDR=0xffffffff80200000 187 // stextOffset=0xffffffff80200198 188 // Start=0xffffffff83200000 189 // Limit=0xffffffff84200000 190 // Offset=0 (0xc000000000000000 for PowerPC64) (== Start for ASLR kernel) 191 // So the base should be: 192 if stextOffset != nil && (start%pageSize) == (*stextOffset%pageSize) { 193 // perf uses the address of _stext as start. Some tools may 194 // adjust for this before calling GetBase, in which case the page 195 // alignment should be different from that of stextOffset. 196 return start - *stextOffset, true 197 } 198 199 return start - loadSegment.Vaddr, true 200 } 201 if start%pageSize != 0 && stextOffset != nil && *stextOffset%pageSize == start%pageSize { 202 // ChromeOS remaps its kernel to 0 + start%pageSize. Nothing 203 // else should come down this path. Empirical values: 204 // start=0x198 limit=0x2f9fffff offset=0 205 // VADDR=0xffffffff81000000 206 // stextOffset=0xffffffff81000198 207 return start - *stextOffset, true 208 } 209 return 0, false 210} 211 212// GetBase determines the base address to subtract from virtual 213// address to get symbol table address. For an executable, the base 214// is 0. Otherwise, it's a shared library, and the base is the 215// address where the mapping starts. The kernel needs special handling. 216func GetBase(fh *elf.FileHeader, loadSegment *elf.ProgHeader, stextOffset *uint64, start, limit, offset uint64) (uint64, error) { 217 218 if start == 0 && offset == 0 && (limit == ^uint64(0) || limit == 0) { 219 // Some tools may introduce a fake mapping that spans the entire 220 // address space. Assume that the address has already been 221 // adjusted, so no additional base adjustment is necessary. 222 return 0, nil 223 } 224 225 switch fh.Type { 226 case elf.ET_EXEC: 227 if loadSegment == nil { 228 // Assume fixed-address executable and so no adjustment. 229 return 0, nil 230 } 231 if stextOffset == nil && start > 0 && start < 0x8000000000000000 { 232 // A regular user-mode executable. Compute the base offset using same 233 // arithmetics as in ET_DYN case below, see the explanation there. 234 // Ideally, the condition would just be "stextOffset == nil" as that 235 // represents the address of _stext symbol in the vmlinux image. Alas, 236 // the caller may skip reading it from the binary (it's expensive to scan 237 // all the symbols) and so it may be nil even for the kernel executable. 238 // So additionally check that the start is within the user-mode half of 239 // the 64-bit address space. 240 return start - offset + loadSegment.Off - loadSegment.Vaddr, nil 241 } 242 // Various kernel heuristics and cases are handled separately. 243 if base, match := kernelBase(loadSegment, stextOffset, start, limit, offset); match { 244 return base, nil 245 } 246 // ChromeOS can remap its kernel to 0, and the caller might have not found 247 // the _stext symbol. Split this case from kernelBase() above, since we don't 248 // want to apply it to an ET_DYN user-mode executable. 249 if start == 0 && limit != 0 && stextOffset == nil { 250 return start - loadSegment.Vaddr, nil 251 } 252 253 return 0, fmt.Errorf("don't know how to handle EXEC segment: %v start=0x%x limit=0x%x offset=0x%x", *loadSegment, start, limit, offset) 254 case elf.ET_REL: 255 if offset != 0 { 256 return 0, fmt.Errorf("don't know how to handle mapping.Offset") 257 } 258 return start, nil 259 case elf.ET_DYN: 260 // The process mapping information, start = start of virtual address range, 261 // and offset = offset in the executable file of the start address, tells us 262 // that a runtime virtual address x maps to a file offset 263 // fx = x - start + offset. 264 if loadSegment == nil { 265 return start - offset, nil 266 } 267 // Kernels compiled as PIE can be ET_DYN as well. Use heuristic, similar to 268 // the ET_EXEC case above. 269 if base, match := kernelBase(loadSegment, stextOffset, start, limit, offset); match { 270 return base, nil 271 } 272 // The program header, if not nil, indicates the offset in the file where 273 // the executable segment is located (loadSegment.Off), and the base virtual 274 // address where the first byte of the segment is loaded 275 // (loadSegment.Vaddr). A file offset fx maps to a virtual (symbol) address 276 // sx = fx - loadSegment.Off + loadSegment.Vaddr. 277 // 278 // Thus, a runtime virtual address x maps to a symbol address 279 // sx = x - start + offset - loadSegment.Off + loadSegment.Vaddr. 280 return start - offset + loadSegment.Off - loadSegment.Vaddr, nil 281 } 282 return 0, fmt.Errorf("don't know how to handle FileHeader.Type %v", fh.Type) 283} 284 285// FindTextProgHeader finds the program segment header containing the .text 286// section or nil if the segment cannot be found. 287func FindTextProgHeader(f *elf.File) *elf.ProgHeader { 288 for _, s := range f.Sections { 289 if s.Name == ".text" { 290 // Find the LOAD segment containing the .text section. 291 for _, p := range f.Progs { 292 if p.Type == elf.PT_LOAD && p.Flags&elf.PF_X != 0 && s.Addr >= p.Vaddr && s.Addr < p.Vaddr+p.Memsz { 293 return &p.ProgHeader 294 } 295 } 296 } 297 } 298 return nil 299} 300 301// ProgramHeadersForMapping returns the program segment headers that overlap 302// the runtime mapping with file offset mapOff and memory size mapSz. We skip 303// over segments zero file size because their file offset values are unreliable. 304// Even if overlapping, a segment is not selected if its aligned file offset is 305// greater than the mapping file offset, or if the mapping includes the last 306// page of the segment, but not the full segment and the mapping includes 307// additional pages after the segment end. 308// The function returns a slice of pointers to the headers in the input 309// slice, which are valid only while phdrs is not modified or discarded. 310func ProgramHeadersForMapping(phdrs []elf.ProgHeader, mapOff, mapSz uint64) []*elf.ProgHeader { 311 const ( 312 // pageSize defines the virtual memory page size used by the loader. This 313 // value is dependent on the memory management unit of the CPU. The page 314 // size is 4KB virtually on all the architectures that we care about, so we 315 // define this metric as a constant. If we encounter architectures where 316 // page sie is not 4KB, we must try to guess the page size on the system 317 // where the profile was collected, possibly using the architecture 318 // specified in the ELF file header. 319 pageSize = 4096 320 pageOffsetMask = pageSize - 1 321 ) 322 mapLimit := mapOff + mapSz 323 var headers []*elf.ProgHeader 324 for i := range phdrs { 325 p := &phdrs[i] 326 // Skip over segments with zero file size. Their file offsets can have 327 // arbitrary values, see b/195427553. 328 if p.Filesz == 0 { 329 continue 330 } 331 segLimit := p.Off + p.Memsz 332 // The segment must overlap the mapping. 333 if p.Type == elf.PT_LOAD && mapOff < segLimit && p.Off < mapLimit { 334 // If the mapping offset is strictly less than the page aligned segment 335 // offset, then this mapping comes from a different segment, fixes 336 // b/179920361. 337 alignedSegOffset := uint64(0) 338 if p.Off > (p.Vaddr & pageOffsetMask) { 339 alignedSegOffset = p.Off - (p.Vaddr & pageOffsetMask) 340 } 341 if mapOff < alignedSegOffset { 342 continue 343 } 344 // If the mapping starts in the middle of the segment, it covers less than 345 // one page of the segment, and it extends at least one page past the 346 // segment, then this mapping comes from a different segment. 347 if mapOff > p.Off && (segLimit < mapOff+pageSize) && (mapLimit >= segLimit+pageSize) { 348 continue 349 } 350 headers = append(headers, p) 351 } 352 } 353 return headers 354} 355 356// HeaderForFileOffset attempts to identify a unique program header that 357// includes the given file offset. It returns an error if it cannot identify a 358// unique header. 359func HeaderForFileOffset(headers []*elf.ProgHeader, fileOffset uint64) (*elf.ProgHeader, error) { 360 var ph *elf.ProgHeader 361 for _, h := range headers { 362 if fileOffset >= h.Off && fileOffset < h.Off+h.Memsz { 363 if ph != nil { 364 // Assuming no other bugs, this can only happen if we have two or 365 // more small program segments that fit on the same page, and a 366 // segment other than the last one includes uninitialized data, or 367 // if the debug binary used for symbolization is stripped of some 368 // sections, so segment file sizes are smaller than memory sizes. 369 return nil, fmt.Errorf("found second program header (%#v) that matches file offset %x, first program header is %#v. Is this a stripped binary, or does the first program segment contain uninitialized data?", *h, fileOffset, *ph) 370 } 371 ph = h 372 } 373 } 374 if ph == nil { 375 return nil, fmt.Errorf("no program header matches file offset %x", fileOffset) 376 } 377 return ph, nil 378} 379