1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package macho implements access to Mach-O object files.
7
8# Security
9
10This package is not designed to be hardened against adversarial inputs, and is
11outside the scope of https://go.dev/security/policy. In particular, only basic
12validation is done when parsing object files. As such, care should be taken when
13parsing untrusted inputs, as parsing malformed files may consume significant
14resources, or cause panics.
15*/
16package macho
17
18// High level access to low level data structures.
19
20import (
21	"bytes"
22	"compress/zlib"
23	"debug/dwarf"
24	"encoding/binary"
25	"fmt"
26	"internal/saferio"
27	"io"
28	"os"
29	"strings"
30)
31
32// A File represents an open Mach-O file.
33type File struct {
34	FileHeader
35	ByteOrder binary.ByteOrder
36	Loads     []Load
37	Sections  []*Section
38
39	Symtab   *Symtab
40	Dysymtab *Dysymtab
41
42	closer io.Closer
43}
44
45// A Load represents any Mach-O load command.
46type Load interface {
47	Raw() []byte
48}
49
50// A LoadBytes is the uninterpreted bytes of a Mach-O load command.
51type LoadBytes []byte
52
53func (b LoadBytes) Raw() []byte { return b }
54
55// A SegmentHeader is the header for a Mach-O 32-bit or 64-bit load segment command.
56type SegmentHeader struct {
57	Cmd     LoadCmd
58	Len     uint32
59	Name    string
60	Addr    uint64
61	Memsz   uint64
62	Offset  uint64
63	Filesz  uint64
64	Maxprot uint32
65	Prot    uint32
66	Nsect   uint32
67	Flag    uint32
68}
69
70// A Segment represents a Mach-O 32-bit or 64-bit load segment command.
71type Segment struct {
72	LoadBytes
73	SegmentHeader
74
75	// Embed ReaderAt for ReadAt method.
76	// Do not embed SectionReader directly
77	// to avoid having Read and Seek.
78	// If a client wants Read and Seek it must use
79	// Open() to avoid fighting over the seek offset
80	// with other clients.
81	io.ReaderAt
82	sr *io.SectionReader
83}
84
85// Data reads and returns the contents of the segment.
86func (s *Segment) Data() ([]byte, error) {
87	return saferio.ReadDataAt(s.sr, s.Filesz, 0)
88}
89
90// Open returns a new ReadSeeker reading the segment.
91func (s *Segment) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) }
92
93type SectionHeader struct {
94	Name   string
95	Seg    string
96	Addr   uint64
97	Size   uint64
98	Offset uint32
99	Align  uint32
100	Reloff uint32
101	Nreloc uint32
102	Flags  uint32
103}
104
105// A Reloc represents a Mach-O relocation.
106type Reloc struct {
107	Addr  uint32
108	Value uint32
109	// when Scattered == false && Extern == true, Value is the symbol number.
110	// when Scattered == false && Extern == false, Value is the section number.
111	// when Scattered == true, Value is the value that this reloc refers to.
112	Type      uint8
113	Len       uint8 // 0=byte, 1=word, 2=long, 3=quad
114	Pcrel     bool
115	Extern    bool // valid if Scattered == false
116	Scattered bool
117}
118
119type Section struct {
120	SectionHeader
121	Relocs []Reloc
122
123	// Embed ReaderAt for ReadAt method.
124	// Do not embed SectionReader directly
125	// to avoid having Read and Seek.
126	// If a client wants Read and Seek it must use
127	// Open() to avoid fighting over the seek offset
128	// with other clients.
129	io.ReaderAt
130	sr *io.SectionReader
131}
132
133// Data reads and returns the contents of the Mach-O section.
134func (s *Section) Data() ([]byte, error) {
135	return saferio.ReadDataAt(s.sr, s.Size, 0)
136}
137
138// Open returns a new ReadSeeker reading the Mach-O section.
139func (s *Section) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) }
140
141// A Dylib represents a Mach-O load dynamic library command.
142type Dylib struct {
143	LoadBytes
144	Name           string
145	Time           uint32
146	CurrentVersion uint32
147	CompatVersion  uint32
148}
149
150// A Symtab represents a Mach-O symbol table command.
151type Symtab struct {
152	LoadBytes
153	SymtabCmd
154	Syms []Symbol
155}
156
157// A Dysymtab represents a Mach-O dynamic symbol table command.
158type Dysymtab struct {
159	LoadBytes
160	DysymtabCmd
161	IndirectSyms []uint32 // indices into Symtab.Syms
162}
163
164// A Rpath represents a Mach-O rpath command.
165type Rpath struct {
166	LoadBytes
167	Path string
168}
169
170// A Symbol is a Mach-O 32-bit or 64-bit symbol table entry.
171type Symbol struct {
172	Name  string
173	Type  uint8
174	Sect  uint8
175	Desc  uint16
176	Value uint64
177}
178
179/*
180 * Mach-O reader
181 */
182
183// FormatError is returned by some operations if the data does
184// not have the correct format for an object file.
185type FormatError struct {
186	off int64
187	msg string
188	val any
189}
190
191func (e *FormatError) Error() string {
192	msg := e.msg
193	if e.val != nil {
194		msg += fmt.Sprintf(" '%v'", e.val)
195	}
196	msg += fmt.Sprintf(" in record at byte %#x", e.off)
197	return msg
198}
199
200// Open opens the named file using [os.Open] and prepares it for use as a Mach-O binary.
201func Open(name string) (*File, error) {
202	f, err := os.Open(name)
203	if err != nil {
204		return nil, err
205	}
206	ff, err := NewFile(f)
207	if err != nil {
208		f.Close()
209		return nil, err
210	}
211	ff.closer = f
212	return ff, nil
213}
214
215// Close closes the [File].
216// If the [File] was created using [NewFile] directly instead of [Open],
217// Close has no effect.
218func (f *File) Close() error {
219	var err error
220	if f.closer != nil {
221		err = f.closer.Close()
222		f.closer = nil
223	}
224	return err
225}
226
227// NewFile creates a new [File] for accessing a Mach-O binary in an underlying reader.
228// The Mach-O binary is expected to start at position 0 in the ReaderAt.
229func NewFile(r io.ReaderAt) (*File, error) {
230	f := new(File)
231	sr := io.NewSectionReader(r, 0, 1<<63-1)
232
233	// Read and decode Mach magic to determine byte order, size.
234	// Magic32 and Magic64 differ only in the bottom bit.
235	var ident [4]byte
236	if _, err := r.ReadAt(ident[0:], 0); err != nil {
237		return nil, err
238	}
239	be := binary.BigEndian.Uint32(ident[0:])
240	le := binary.LittleEndian.Uint32(ident[0:])
241	switch Magic32 &^ 1 {
242	case be &^ 1:
243		f.ByteOrder = binary.BigEndian
244		f.Magic = be
245	case le &^ 1:
246		f.ByteOrder = binary.LittleEndian
247		f.Magic = le
248	default:
249		return nil, &FormatError{0, "invalid magic number", nil}
250	}
251
252	// Read entire file header.
253	if err := binary.Read(sr, f.ByteOrder, &f.FileHeader); err != nil {
254		return nil, err
255	}
256
257	// Then load commands.
258	offset := int64(fileHeaderSize32)
259	if f.Magic == Magic64 {
260		offset = fileHeaderSize64
261	}
262	dat, err := saferio.ReadDataAt(r, uint64(f.Cmdsz), offset)
263	if err != nil {
264		return nil, err
265	}
266	c := saferio.SliceCap[Load](uint64(f.Ncmd))
267	if c < 0 {
268		return nil, &FormatError{offset, "too many load commands", nil}
269	}
270	f.Loads = make([]Load, 0, c)
271	bo := f.ByteOrder
272	for i := uint32(0); i < f.Ncmd; i++ {
273		// Each load command begins with uint32 command and length.
274		if len(dat) < 8 {
275			return nil, &FormatError{offset, "command block too small", nil}
276		}
277		cmd, siz := LoadCmd(bo.Uint32(dat[0:4])), bo.Uint32(dat[4:8])
278		if siz < 8 || siz > uint32(len(dat)) {
279			return nil, &FormatError{offset, "invalid command block size", nil}
280		}
281		var cmddat []byte
282		cmddat, dat = dat[0:siz], dat[siz:]
283		offset += int64(siz)
284		var s *Segment
285		switch cmd {
286		default:
287			f.Loads = append(f.Loads, LoadBytes(cmddat))
288
289		case LoadCmdRpath:
290			var hdr RpathCmd
291			b := bytes.NewReader(cmddat)
292			if err := binary.Read(b, bo, &hdr); err != nil {
293				return nil, err
294			}
295			l := new(Rpath)
296			if hdr.Path >= uint32(len(cmddat)) {
297				return nil, &FormatError{offset, "invalid path in rpath command", hdr.Path}
298			}
299			l.Path = cstring(cmddat[hdr.Path:])
300			l.LoadBytes = LoadBytes(cmddat)
301			f.Loads = append(f.Loads, l)
302
303		case LoadCmdDylib:
304			var hdr DylibCmd
305			b := bytes.NewReader(cmddat)
306			if err := binary.Read(b, bo, &hdr); err != nil {
307				return nil, err
308			}
309			l := new(Dylib)
310			if hdr.Name >= uint32(len(cmddat)) {
311				return nil, &FormatError{offset, "invalid name in dynamic library command", hdr.Name}
312			}
313			l.Name = cstring(cmddat[hdr.Name:])
314			l.Time = hdr.Time
315			l.CurrentVersion = hdr.CurrentVersion
316			l.CompatVersion = hdr.CompatVersion
317			l.LoadBytes = LoadBytes(cmddat)
318			f.Loads = append(f.Loads, l)
319
320		case LoadCmdSymtab:
321			var hdr SymtabCmd
322			b := bytes.NewReader(cmddat)
323			if err := binary.Read(b, bo, &hdr); err != nil {
324				return nil, err
325			}
326			strtab, err := saferio.ReadDataAt(r, uint64(hdr.Strsize), int64(hdr.Stroff))
327			if err != nil {
328				return nil, err
329			}
330			var symsz int
331			if f.Magic == Magic64 {
332				symsz = 16
333			} else {
334				symsz = 12
335			}
336			symdat, err := saferio.ReadDataAt(r, uint64(hdr.Nsyms)*uint64(symsz), int64(hdr.Symoff))
337			if err != nil {
338				return nil, err
339			}
340			st, err := f.parseSymtab(symdat, strtab, cmddat, &hdr, offset)
341			if err != nil {
342				return nil, err
343			}
344			f.Loads = append(f.Loads, st)
345			f.Symtab = st
346
347		case LoadCmdDysymtab:
348			var hdr DysymtabCmd
349			b := bytes.NewReader(cmddat)
350			if err := binary.Read(b, bo, &hdr); err != nil {
351				return nil, err
352			}
353			if f.Symtab == nil {
354				return nil, &FormatError{offset, "dynamic symbol table seen before any ordinary symbol table", nil}
355			} else if hdr.Iundefsym > uint32(len(f.Symtab.Syms)) {
356				return nil, &FormatError{offset, fmt.Sprintf(
357					"undefined symbols index in dynamic symbol table command is greater than symbol table length (%d > %d)",
358					hdr.Iundefsym, len(f.Symtab.Syms)), nil}
359			} else if hdr.Iundefsym+hdr.Nundefsym > uint32(len(f.Symtab.Syms)) {
360				return nil, &FormatError{offset, fmt.Sprintf(
361					"number of undefined symbols after index in dynamic symbol table command is greater than symbol table length (%d > %d)",
362					hdr.Iundefsym+hdr.Nundefsym, len(f.Symtab.Syms)), nil}
363			}
364			dat, err := saferio.ReadDataAt(r, uint64(hdr.Nindirectsyms)*4, int64(hdr.Indirectsymoff))
365			if err != nil {
366				return nil, err
367			}
368			x := make([]uint32, hdr.Nindirectsyms)
369			if err := binary.Read(bytes.NewReader(dat), bo, x); err != nil {
370				return nil, err
371			}
372			st := new(Dysymtab)
373			st.LoadBytes = LoadBytes(cmddat)
374			st.DysymtabCmd = hdr
375			st.IndirectSyms = x
376			f.Loads = append(f.Loads, st)
377			f.Dysymtab = st
378
379		case LoadCmdSegment:
380			var seg32 Segment32
381			b := bytes.NewReader(cmddat)
382			if err := binary.Read(b, bo, &seg32); err != nil {
383				return nil, err
384			}
385			s = new(Segment)
386			s.LoadBytes = cmddat
387			s.Cmd = cmd
388			s.Len = siz
389			s.Name = cstring(seg32.Name[0:])
390			s.Addr = uint64(seg32.Addr)
391			s.Memsz = uint64(seg32.Memsz)
392			s.Offset = uint64(seg32.Offset)
393			s.Filesz = uint64(seg32.Filesz)
394			s.Maxprot = seg32.Maxprot
395			s.Prot = seg32.Prot
396			s.Nsect = seg32.Nsect
397			s.Flag = seg32.Flag
398			f.Loads = append(f.Loads, s)
399			for i := 0; i < int(s.Nsect); i++ {
400				var sh32 Section32
401				if err := binary.Read(b, bo, &sh32); err != nil {
402					return nil, err
403				}
404				sh := new(Section)
405				sh.Name = cstring(sh32.Name[0:])
406				sh.Seg = cstring(sh32.Seg[0:])
407				sh.Addr = uint64(sh32.Addr)
408				sh.Size = uint64(sh32.Size)
409				sh.Offset = sh32.Offset
410				sh.Align = sh32.Align
411				sh.Reloff = sh32.Reloff
412				sh.Nreloc = sh32.Nreloc
413				sh.Flags = sh32.Flags
414				if err := f.pushSection(sh, r); err != nil {
415					return nil, err
416				}
417			}
418
419		case LoadCmdSegment64:
420			var seg64 Segment64
421			b := bytes.NewReader(cmddat)
422			if err := binary.Read(b, bo, &seg64); err != nil {
423				return nil, err
424			}
425			s = new(Segment)
426			s.LoadBytes = cmddat
427			s.Cmd = cmd
428			s.Len = siz
429			s.Name = cstring(seg64.Name[0:])
430			s.Addr = seg64.Addr
431			s.Memsz = seg64.Memsz
432			s.Offset = seg64.Offset
433			s.Filesz = seg64.Filesz
434			s.Maxprot = seg64.Maxprot
435			s.Prot = seg64.Prot
436			s.Nsect = seg64.Nsect
437			s.Flag = seg64.Flag
438			f.Loads = append(f.Loads, s)
439			for i := 0; i < int(s.Nsect); i++ {
440				var sh64 Section64
441				if err := binary.Read(b, bo, &sh64); err != nil {
442					return nil, err
443				}
444				sh := new(Section)
445				sh.Name = cstring(sh64.Name[0:])
446				sh.Seg = cstring(sh64.Seg[0:])
447				sh.Addr = sh64.Addr
448				sh.Size = sh64.Size
449				sh.Offset = sh64.Offset
450				sh.Align = sh64.Align
451				sh.Reloff = sh64.Reloff
452				sh.Nreloc = sh64.Nreloc
453				sh.Flags = sh64.Flags
454				if err := f.pushSection(sh, r); err != nil {
455					return nil, err
456				}
457			}
458		}
459		if s != nil {
460			if int64(s.Offset) < 0 {
461				return nil, &FormatError{offset, "invalid section offset", s.Offset}
462			}
463			if int64(s.Filesz) < 0 {
464				return nil, &FormatError{offset, "invalid section file size", s.Filesz}
465			}
466			s.sr = io.NewSectionReader(r, int64(s.Offset), int64(s.Filesz))
467			s.ReaderAt = s.sr
468		}
469	}
470	return f, nil
471}
472
473func (f *File) parseSymtab(symdat, strtab, cmddat []byte, hdr *SymtabCmd, offset int64) (*Symtab, error) {
474	bo := f.ByteOrder
475	c := saferio.SliceCap[Symbol](uint64(hdr.Nsyms))
476	if c < 0 {
477		return nil, &FormatError{offset, "too many symbols", nil}
478	}
479	symtab := make([]Symbol, 0, c)
480	b := bytes.NewReader(symdat)
481	for i := 0; i < int(hdr.Nsyms); i++ {
482		var n Nlist64
483		if f.Magic == Magic64 {
484			if err := binary.Read(b, bo, &n); err != nil {
485				return nil, err
486			}
487		} else {
488			var n32 Nlist32
489			if err := binary.Read(b, bo, &n32); err != nil {
490				return nil, err
491			}
492			n.Name = n32.Name
493			n.Type = n32.Type
494			n.Sect = n32.Sect
495			n.Desc = n32.Desc
496			n.Value = uint64(n32.Value)
497		}
498		if n.Name >= uint32(len(strtab)) {
499			return nil, &FormatError{offset, "invalid name in symbol table", n.Name}
500		}
501		// We add "_" to Go symbols. Strip it here. See issue 33808.
502		name := cstring(strtab[n.Name:])
503		if strings.Contains(name, ".") && name[0] == '_' {
504			name = name[1:]
505		}
506		symtab = append(symtab, Symbol{
507			Name:  name,
508			Type:  n.Type,
509			Sect:  n.Sect,
510			Desc:  n.Desc,
511			Value: n.Value,
512		})
513	}
514	st := new(Symtab)
515	st.LoadBytes = LoadBytes(cmddat)
516	st.Syms = symtab
517	return st, nil
518}
519
520type relocInfo struct {
521	Addr   uint32
522	Symnum uint32
523}
524
525func (f *File) pushSection(sh *Section, r io.ReaderAt) error {
526	f.Sections = append(f.Sections, sh)
527	sh.sr = io.NewSectionReader(r, int64(sh.Offset), int64(sh.Size))
528	sh.ReaderAt = sh.sr
529
530	if sh.Nreloc > 0 {
531		reldat, err := saferio.ReadDataAt(r, uint64(sh.Nreloc)*8, int64(sh.Reloff))
532		if err != nil {
533			return err
534		}
535		b := bytes.NewReader(reldat)
536
537		bo := f.ByteOrder
538
539		sh.Relocs = make([]Reloc, sh.Nreloc)
540		for i := range sh.Relocs {
541			rel := &sh.Relocs[i]
542
543			var ri relocInfo
544			if err := binary.Read(b, bo, &ri); err != nil {
545				return err
546			}
547
548			if ri.Addr&(1<<31) != 0 { // scattered
549				rel.Addr = ri.Addr & (1<<24 - 1)
550				rel.Type = uint8((ri.Addr >> 24) & (1<<4 - 1))
551				rel.Len = uint8((ri.Addr >> 28) & (1<<2 - 1))
552				rel.Pcrel = ri.Addr&(1<<30) != 0
553				rel.Value = ri.Symnum
554				rel.Scattered = true
555			} else {
556				switch bo {
557				case binary.LittleEndian:
558					rel.Addr = ri.Addr
559					rel.Value = ri.Symnum & (1<<24 - 1)
560					rel.Pcrel = ri.Symnum&(1<<24) != 0
561					rel.Len = uint8((ri.Symnum >> 25) & (1<<2 - 1))
562					rel.Extern = ri.Symnum&(1<<27) != 0
563					rel.Type = uint8((ri.Symnum >> 28) & (1<<4 - 1))
564				case binary.BigEndian:
565					rel.Addr = ri.Addr
566					rel.Value = ri.Symnum >> 8
567					rel.Pcrel = ri.Symnum&(1<<7) != 0
568					rel.Len = uint8((ri.Symnum >> 5) & (1<<2 - 1))
569					rel.Extern = ri.Symnum&(1<<4) != 0
570					rel.Type = uint8(ri.Symnum & (1<<4 - 1))
571				default:
572					panic("unreachable")
573				}
574			}
575		}
576	}
577
578	return nil
579}
580
581func cstring(b []byte) string {
582	i := bytes.IndexByte(b, 0)
583	if i == -1 {
584		i = len(b)
585	}
586	return string(b[0:i])
587}
588
589// Segment returns the first Segment with the given name, or nil if no such segment exists.
590func (f *File) Segment(name string) *Segment {
591	for _, l := range f.Loads {
592		if s, ok := l.(*Segment); ok && s.Name == name {
593			return s
594		}
595	}
596	return nil
597}
598
599// Section returns the first section with the given name, or nil if no such
600// section exists.
601func (f *File) Section(name string) *Section {
602	for _, s := range f.Sections {
603		if s.Name == name {
604			return s
605		}
606	}
607	return nil
608}
609
610// DWARF returns the DWARF debug information for the Mach-O file.
611func (f *File) DWARF() (*dwarf.Data, error) {
612	dwarfSuffix := func(s *Section) string {
613		switch {
614		case strings.HasPrefix(s.Name, "__debug_"):
615			return s.Name[8:]
616		case strings.HasPrefix(s.Name, "__zdebug_"):
617			return s.Name[9:]
618		default:
619			return ""
620		}
621
622	}
623	sectionData := func(s *Section) ([]byte, error) {
624		b, err := s.Data()
625		if err != nil && uint64(len(b)) < s.Size {
626			return nil, err
627		}
628
629		if len(b) >= 12 && string(b[:4]) == "ZLIB" {
630			dlen := binary.BigEndian.Uint64(b[4:12])
631			dbuf := make([]byte, dlen)
632			r, err := zlib.NewReader(bytes.NewBuffer(b[12:]))
633			if err != nil {
634				return nil, err
635			}
636			if _, err := io.ReadFull(r, dbuf); err != nil {
637				return nil, err
638			}
639			if err := r.Close(); err != nil {
640				return nil, err
641			}
642			b = dbuf
643		}
644		return b, nil
645	}
646
647	// There are many other DWARF sections, but these
648	// are the ones the debug/dwarf package uses.
649	// Don't bother loading others.
650	var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil}
651	for _, s := range f.Sections {
652		suffix := dwarfSuffix(s)
653		if suffix == "" {
654			continue
655		}
656		if _, ok := dat[suffix]; !ok {
657			continue
658		}
659		b, err := sectionData(s)
660		if err != nil {
661			return nil, err
662		}
663		dat[suffix] = b
664	}
665
666	d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"])
667	if err != nil {
668		return nil, err
669	}
670
671	// Look for DWARF4 .debug_types sections and DWARF5 sections.
672	for i, s := range f.Sections {
673		suffix := dwarfSuffix(s)
674		if suffix == "" {
675			continue
676		}
677		if _, ok := dat[suffix]; ok {
678			// Already handled.
679			continue
680		}
681
682		b, err := sectionData(s)
683		if err != nil {
684			return nil, err
685		}
686
687		if suffix == "types" {
688			err = d.AddTypes(fmt.Sprintf("types-%d", i), b)
689		} else {
690			err = d.AddSection(".debug_"+suffix, b)
691		}
692		if err != nil {
693			return nil, err
694		}
695	}
696
697	return d, nil
698}
699
700// ImportedSymbols returns the names of all symbols
701// referred to by the binary f that are expected to be
702// satisfied by other libraries at dynamic load time.
703func (f *File) ImportedSymbols() ([]string, error) {
704	if f.Dysymtab == nil || f.Symtab == nil {
705		return nil, &FormatError{0, "missing symbol table", nil}
706	}
707
708	st := f.Symtab
709	dt := f.Dysymtab
710	var all []string
711	for _, s := range st.Syms[dt.Iundefsym : dt.Iundefsym+dt.Nundefsym] {
712		all = append(all, s.Name)
713	}
714	return all, nil
715}
716
717// ImportedLibraries returns the paths of all libraries
718// referred to by the binary f that are expected to be
719// linked with the binary at dynamic link time.
720func (f *File) ImportedLibraries() ([]string, error) {
721	var all []string
722	for _, l := range f.Loads {
723		if lib, ok := l.(*Dylib); ok {
724			all = append(all, lib.Name)
725		}
726	}
727	return all, nil
728}
729