1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ld
6
7import (
8	"bytes"
9	"compress/zlib"
10	"debug/macho"
11	"encoding/binary"
12	"fmt"
13	"io"
14	"os"
15	"reflect"
16	"unsafe"
17)
18
19type loadCmd struct {
20	Cmd macho.LoadCmd
21	Len uint32
22}
23
24type dyldInfoCmd struct {
25	Cmd                      macho.LoadCmd
26	Len                      uint32
27	RebaseOff, RebaseLen     uint32
28	BindOff, BindLen         uint32
29	WeakBindOff, WeakBindLen uint32
30	LazyBindOff, LazyBindLen uint32
31	ExportOff, ExportLen     uint32
32}
33
34type linkEditDataCmd struct {
35	Cmd              macho.LoadCmd
36	Len              uint32
37	DataOff, DataLen uint32
38}
39
40type encryptionInfoCmd struct {
41	Cmd                macho.LoadCmd
42	Len                uint32
43	CryptOff, CryptLen uint32
44	CryptId            uint32
45}
46
47type uuidCmd struct {
48	Cmd  macho.LoadCmd
49	Len  uint32
50	Uuid [16]byte
51}
52
53type loadCmdReader struct {
54	offset, next int64
55	f            *os.File
56	order        binary.ByteOrder
57}
58
59func (r *loadCmdReader) Next() (loadCmd, error) {
60	var cmd loadCmd
61
62	r.offset = r.next
63	if _, err := r.f.Seek(r.offset, 0); err != nil {
64		return cmd, err
65	}
66	if err := binary.Read(r.f, r.order, &cmd); err != nil {
67		return cmd, err
68	}
69	r.next = r.offset + int64(cmd.Len)
70	return cmd, nil
71}
72
73func (r loadCmdReader) ReadAt(offset int64, data interface{}) error {
74	if _, err := r.f.Seek(r.offset+offset, 0); err != nil {
75		return err
76	}
77	return binary.Read(r.f, r.order, data)
78}
79
80func (r loadCmdReader) WriteAt(offset int64, data interface{}) error {
81	if _, err := r.f.Seek(r.offset+offset, 0); err != nil {
82		return err
83	}
84	return binary.Write(r.f, r.order, data)
85}
86
87// machoCombineDwarf merges dwarf info generated by dsymutil into a macho executable.
88//
89// With internal linking, DWARF is embedded into the executable, this lets us do the
90// same for external linking.
91// exef is the file of the executable with no DWARF. It must have enough room in the macho
92// header to add the DWARF sections. (Use ld's -headerpad option)
93// exem is the macho representation of exef.
94// dsym is the path to the macho file containing DWARF from dsymutil.
95// outexe is the path where the combined executable should be saved.
96func machoCombineDwarf(ctxt *Link, exef *os.File, exem *macho.File, dsym, outexe string) error {
97	dwarff, err := os.Open(dsym)
98	if err != nil {
99		return err
100	}
101	defer dwarff.Close()
102	outf, err := os.OpenFile(outexe, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0755)
103	if err != nil {
104		return err
105	}
106	defer outf.Close()
107	dwarfm, err := macho.NewFile(dwarff)
108	if err != nil {
109		return err
110	}
111	defer dwarfm.Close()
112
113	// The string table needs to be the last thing in the file
114	// for code signing to work. So we'll need to move the
115	// linkedit section, but all the others can be copied directly.
116	linkseg := exem.Segment("__LINKEDIT")
117	if linkseg == nil {
118		return fmt.Errorf("missing __LINKEDIT segment")
119	}
120
121	if _, err := exef.Seek(0, 0); err != nil {
122		return err
123	}
124	if _, err := io.CopyN(outf, exef, int64(linkseg.Offset)); err != nil {
125		return err
126	}
127
128	realdwarf := dwarfm.Segment("__DWARF")
129	if realdwarf == nil {
130		return fmt.Errorf("missing __DWARF segment")
131	}
132
133	// Try to compress the DWARF sections. This includes some Apple
134	// proprietary sections like __apple_types.
135	compressedSects, compressedBytes, err := machoCompressSections(ctxt, dwarfm)
136	if err != nil {
137		return err
138	}
139
140	// Now copy the dwarf data into the output.
141	// Kernel requires all loaded segments to be page-aligned in the file,
142	// even though we mark this one as being 0 bytes of virtual address space.
143	dwarfstart := Rnd(int64(linkseg.Offset), *FlagRound)
144	if _, err := outf.Seek(dwarfstart, 0); err != nil {
145		return err
146	}
147
148	if _, err := dwarff.Seek(int64(realdwarf.Offset), 0); err != nil {
149		return err
150	}
151
152	// Write out the compressed sections, or the originals if we gave up
153	// on compressing them.
154	var dwarfsize uint64
155	if compressedBytes != nil {
156		dwarfsize = uint64(len(compressedBytes))
157		if _, err := outf.Write(compressedBytes); err != nil {
158			return err
159		}
160	} else {
161		if _, err := io.CopyN(outf, dwarff, int64(realdwarf.Filesz)); err != nil {
162			return err
163		}
164		dwarfsize = realdwarf.Filesz
165	}
166
167	// And finally the linkedit section.
168	if _, err := exef.Seek(int64(linkseg.Offset), 0); err != nil {
169		return err
170	}
171	linkstart := Rnd(dwarfstart+int64(dwarfsize), *FlagRound)
172	if _, err := outf.Seek(linkstart, 0); err != nil {
173		return err
174	}
175	if _, err := io.Copy(outf, exef); err != nil {
176		return err
177	}
178
179	// Now we need to update the headers.
180	textsect := exem.Section("__text")
181	if textsect == nil {
182		return fmt.Errorf("missing __text section")
183	}
184
185	cmdOffset := unsafe.Sizeof(exem.FileHeader)
186	if is64bit := exem.Magic == macho.Magic64; is64bit {
187		// mach_header_64 has one extra uint32.
188		cmdOffset += unsafe.Sizeof(exem.Magic)
189	}
190	dwarfCmdOffset := uint32(cmdOffset) + exem.FileHeader.Cmdsz
191	availablePadding := textsect.Offset - dwarfCmdOffset
192	if availablePadding < realdwarf.Len {
193		return fmt.Errorf("no room to add dwarf info. Need at least %d padding bytes, found %d", realdwarf.Len, availablePadding)
194	}
195	// First, copy the dwarf load command into the header. It will be
196	// updated later with new offsets and lengths as necessary.
197	if _, err := outf.Seek(int64(dwarfCmdOffset), 0); err != nil {
198		return err
199	}
200	if _, err := io.CopyN(outf, bytes.NewReader(realdwarf.Raw()), int64(realdwarf.Len)); err != nil {
201		return err
202	}
203	if _, err := outf.Seek(int64(unsafe.Offsetof(exem.FileHeader.Ncmd)), 0); err != nil {
204		return err
205	}
206	if err := binary.Write(outf, exem.ByteOrder, exem.Ncmd+1); err != nil {
207		return err
208	}
209	if err := binary.Write(outf, exem.ByteOrder, exem.Cmdsz+realdwarf.Len); err != nil {
210		return err
211	}
212
213	reader := loadCmdReader{next: int64(cmdOffset), f: outf, order: exem.ByteOrder}
214	for i := uint32(0); i < exem.Ncmd; i++ {
215		cmd, err := reader.Next()
216		if err != nil {
217			return err
218		}
219		linkoffset := uint64(linkstart) - linkseg.Offset
220		switch cmd.Cmd {
221		case macho.LoadCmdSegment64:
222			err = machoUpdateSegment(reader, linkseg, linkoffset)
223		case macho.LoadCmdSegment:
224			panic("unexpected 32-bit segment")
225		case LC_DYLD_INFO, LC_DYLD_INFO_ONLY:
226			err = machoUpdateLoadCommand(reader, linkseg, linkoffset, &dyldInfoCmd{}, "RebaseOff", "BindOff", "WeakBindOff", "LazyBindOff", "ExportOff")
227		case macho.LoadCmdSymtab:
228			err = machoUpdateLoadCommand(reader, linkseg, linkoffset, &macho.SymtabCmd{}, "Symoff", "Stroff")
229		case macho.LoadCmdDysymtab:
230			err = machoUpdateLoadCommand(reader, linkseg, linkoffset, &macho.DysymtabCmd{}, "Tocoffset", "Modtaboff", "Extrefsymoff", "Indirectsymoff", "Extreloff", "Locreloff")
231		case LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS,
232			LC_DYLD_EXPORTS_TRIE, LC_DYLD_CHAINED_FIXUPS:
233			err = machoUpdateLoadCommand(reader, linkseg, linkoffset, &linkEditDataCmd{}, "DataOff")
234		case LC_ENCRYPTION_INFO, LC_ENCRYPTION_INFO_64:
235			err = machoUpdateLoadCommand(reader, linkseg, linkoffset, &encryptionInfoCmd{}, "CryptOff")
236		case LC_UUID:
237			var u uuidCmd
238			err = reader.ReadAt(0, &u)
239			if err == nil {
240				copy(u.Uuid[:], uuidFromGoBuildId(*flagBuildid))
241				err = reader.WriteAt(0, &u)
242			}
243		case macho.LoadCmdDylib, macho.LoadCmdThread, macho.LoadCmdUnixThread,
244			LC_PREBOUND_DYLIB, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_SOURCE_VERSION,
245			LC_MAIN, LC_LOAD_DYLINKER, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB, LC_RPATH, LC_ID_DYLIB,
246			LC_SYMSEG, LC_LOADFVMLIB, LC_IDFVMLIB, LC_IDENT, LC_FVMFILE, LC_PREPAGE, LC_ID_DYLINKER,
247			LC_ROUTINES, LC_SUB_FRAMEWORK, LC_SUB_UMBRELLA, LC_SUB_CLIENT, LC_SUB_LIBRARY, LC_TWOLEVEL_HINTS,
248			LC_PREBIND_CKSUM, LC_ROUTINES_64, LC_LAZY_LOAD_DYLIB, LC_LOAD_UPWARD_DYLIB, LC_DYLD_ENVIRONMENT,
249			LC_LINKER_OPTION, LC_LINKER_OPTIMIZATION_HINT, LC_VERSION_MIN_TVOS, LC_VERSION_MIN_WATCHOS,
250			LC_VERSION_NOTE, LC_BUILD_VERSION:
251			// Nothing to update
252		default:
253			err = fmt.Errorf("unknown load command 0x%x (%s)", int(cmd.Cmd), cmd.Cmd)
254		}
255		if err != nil {
256			return err
257		}
258	}
259	// Do the final update of the DWARF segment's load command.
260	return machoUpdateDwarfHeader(&reader, compressedSects, dwarfsize, dwarfstart, realdwarf)
261}
262
263// machoCompressSections tries to compress the DWARF segments in dwarfm,
264// returning the updated sections and segment contents, nils if the sections
265// weren't compressed, or an error if there was a problem reading dwarfm.
266func machoCompressSections(ctxt *Link, dwarfm *macho.File) ([]*macho.Section, []byte, error) {
267	if !ctxt.compressDWARF {
268		return nil, nil, nil
269	}
270
271	dwarfseg := dwarfm.Segment("__DWARF")
272	var sects []*macho.Section
273	var buf bytes.Buffer
274
275	for _, sect := range dwarfm.Sections {
276		if sect.Seg != "__DWARF" {
277			continue
278		}
279
280		// As of writing, there are no relocations in dsymutil's output
281		// so there's no point in worrying about them. Bail out if that
282		// changes.
283		if sect.Nreloc != 0 {
284			return nil, nil, nil
285		}
286
287		data, err := sect.Data()
288		if err != nil {
289			return nil, nil, err
290		}
291
292		compressed, contents, err := machoCompressSection(data)
293		if err != nil {
294			return nil, nil, err
295		}
296
297		newSec := *sect
298		newSec.Offset = uint32(dwarfseg.Offset) + uint32(buf.Len())
299		newSec.Addr = dwarfseg.Addr + uint64(buf.Len())
300		if compressed {
301			newSec.Name = "__z" + sect.Name[2:]
302			newSec.Size = uint64(len(contents))
303		}
304		sects = append(sects, &newSec)
305		buf.Write(contents)
306	}
307	return sects, buf.Bytes(), nil
308}
309
310// machoCompressSection compresses secBytes if it results in less data.
311func machoCompressSection(sectBytes []byte) (compressed bool, contents []byte, err error) {
312	var buf bytes.Buffer
313	buf.WriteString("ZLIB")
314	var sizeBytes [8]byte
315	binary.BigEndian.PutUint64(sizeBytes[:], uint64(len(sectBytes)))
316	buf.Write(sizeBytes[:])
317
318	z := zlib.NewWriter(&buf)
319	if _, err := z.Write(sectBytes); err != nil {
320		return false, nil, err
321	}
322	if err := z.Close(); err != nil {
323		return false, nil, err
324	}
325	if buf.Len() >= len(sectBytes) {
326		return false, sectBytes, nil
327	}
328	return true, buf.Bytes(), nil
329}
330
331// machoUpdateSegment updates the load command for a moved segment.
332// Only the linkedit segment should move, and it should have 0 sections.
333func machoUpdateSegment(r loadCmdReader, linkseg *macho.Segment, linkoffset uint64) error {
334	var seg macho.Segment64
335	if err := r.ReadAt(0, &seg); err != nil {
336		return err
337	}
338
339	// Only the linkedit segment moved, anything before that is fine.
340	if seg.Offset < linkseg.Offset {
341		return nil
342	}
343	seg.Offset += linkoffset
344	if err := r.WriteAt(0, &seg); err != nil {
345		return err
346	}
347	// There shouldn't be any sections, but just to make sure...
348	return machoUpdateSections(r, &seg, linkoffset, nil)
349}
350
351func machoUpdateSections(r loadCmdReader, seg *macho.Segment64, deltaOffset uint64, compressedSects []*macho.Section) error {
352	nsect := seg.Nsect
353	if nsect == 0 {
354		return nil
355	}
356	sectOffset := int64(unsafe.Sizeof(*seg))
357
358	var sect macho.Section64
359	sectSize := int64(unsafe.Sizeof(sect))
360	for i := uint32(0); i < nsect; i++ {
361		if err := r.ReadAt(sectOffset, &sect); err != nil {
362			return err
363		}
364		if compressedSects != nil {
365			cSect := compressedSects[i]
366			copy(sect.Name[:], cSect.Name)
367			sect.Size = cSect.Size
368			if cSect.Offset != 0 {
369				sect.Offset = cSect.Offset + uint32(deltaOffset)
370			}
371			if cSect.Addr != 0 {
372				sect.Addr = cSect.Addr
373			}
374		} else {
375			if sect.Offset != 0 {
376				sect.Offset += uint32(deltaOffset)
377			}
378			if sect.Reloff != 0 {
379				sect.Reloff += uint32(deltaOffset)
380			}
381		}
382		if err := r.WriteAt(sectOffset, &sect); err != nil {
383			return err
384		}
385		sectOffset += sectSize
386	}
387	return nil
388}
389
390// machoUpdateDwarfHeader updates the DWARF segment load command.
391func machoUpdateDwarfHeader(r *loadCmdReader, compressedSects []*macho.Section, dwarfsize uint64, dwarfstart int64, realdwarf *macho.Segment) error {
392	cmd, err := r.Next()
393	if err != nil {
394		return err
395	}
396	if cmd.Cmd != macho.LoadCmdSegment64 {
397		panic("not a Segment64")
398	}
399	var seg macho.Segment64
400	if err := r.ReadAt(0, &seg); err != nil {
401		return err
402	}
403	seg.Offset = uint64(dwarfstart)
404
405	if compressedSects != nil {
406		var segSize uint64
407		for _, newSect := range compressedSects {
408			segSize += newSect.Size
409		}
410		seg.Filesz = segSize
411	} else {
412		seg.Filesz = dwarfsize
413	}
414
415	// We want the DWARF segment to be considered non-loadable, so
416	// force vmaddr and vmsize to zero. In addition, set the initial
417	// protection to zero so as to make the dynamic loader happy,
418	// since otherwise it may complain that the vm size and file
419	// size don't match for the segment. See issues 21647 and 32673
420	// for more context. Also useful to refer to the Apple dynamic
421	// loader source, specifically ImageLoaderMachO::sniffLoadCommands
422	// in ImageLoaderMachO.cpp (various versions can be found online, see
423	// https://opensource.apple.com/source/dyld/dyld-519.2.2/src/ImageLoaderMachO.cpp.auto.html
424	// as one example).
425	seg.Addr = 0
426	seg.Memsz = 0
427	seg.Prot = 0
428
429	if err := r.WriteAt(0, &seg); err != nil {
430		return err
431	}
432	return machoUpdateSections(*r, &seg, uint64(dwarfstart)-realdwarf.Offset, compressedSects)
433}
434
435func machoUpdateLoadCommand(r loadCmdReader, linkseg *macho.Segment, linkoffset uint64, cmd interface{}, fields ...string) error {
436	if err := r.ReadAt(0, cmd); err != nil {
437		return err
438	}
439	value := reflect.Indirect(reflect.ValueOf(cmd))
440
441	for _, name := range fields {
442		field := value.FieldByName(name)
443		if fieldval := field.Uint(); fieldval >= linkseg.Offset {
444			field.SetUint(fieldval + linkoffset)
445		}
446	}
447	if err := r.WriteAt(0, cmd); err != nil {
448		return err
449	}
450	return nil
451}
452