1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ld
6
7import (
8	"cmd/internal/goobj"
9	"cmd/internal/objabi"
10	"cmd/internal/sys"
11	"cmd/link/internal/loader"
12	"cmd/link/internal/sym"
13	"fmt"
14	"internal/abi"
15	"internal/buildcfg"
16	"strings"
17	"unicode"
18)
19
20var _ = fmt.Print
21
22type deadcodePass struct {
23	ctxt *Link
24	ldr  *loader.Loader
25	wq   heap // work queue, using min-heap for better locality
26
27	ifaceMethod        map[methodsig]bool // methods called from reached interface call sites
28	genericIfaceMethod map[string]bool    // names of methods called from reached generic interface call sites
29	markableMethods    []methodref        // methods of reached types
30	reflectSeen        bool               // whether we have seen a reflect method call
31	dynlink            bool
32
33	methodsigstmp []methodsig // scratch buffer for decoding method signatures
34	pkginits      []loader.Sym
35	mapinitnoop   loader.Sym
36}
37
38func (d *deadcodePass) init() {
39	d.ldr.InitReachable()
40	d.ifaceMethod = make(map[methodsig]bool)
41	d.genericIfaceMethod = make(map[string]bool)
42	if buildcfg.Experiment.FieldTrack {
43		d.ldr.Reachparent = make([]loader.Sym, d.ldr.NSym())
44	}
45	d.dynlink = d.ctxt.DynlinkingGo()
46
47	if d.ctxt.BuildMode == BuildModeShared {
48		// Mark all symbols defined in this library as reachable when
49		// building a shared library.
50		n := d.ldr.NDef()
51		for i := 1; i < n; i++ {
52			s := loader.Sym(i)
53			if d.ldr.SymType(s) == sym.STEXT && d.ldr.SymSize(s) == 0 {
54				// Zero-sized text symbol is a function deadcoded by the
55				// compiler. It doesn't really get compiled, and its
56				// metadata may be missing.
57				continue
58			}
59			d.mark(s, 0)
60		}
61		d.mark(d.ctxt.mainInittasks, 0)
62		return
63	}
64
65	var names []string
66
67	// In a normal binary, start at main.main and the init
68	// functions and mark what is reachable from there.
69	if d.ctxt.linkShared && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) {
70		names = append(names, "main.main", "main..inittask")
71	} else {
72		// The external linker refers main symbol directly.
73		if d.ctxt.LinkMode == LinkExternal && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) {
74			if d.ctxt.HeadType == objabi.Hwindows && d.ctxt.Arch.Family == sys.I386 {
75				*flagEntrySymbol = "_main"
76			} else {
77				*flagEntrySymbol = "main"
78			}
79		}
80		names = append(names, *flagEntrySymbol)
81	}
82	// runtime.unreachableMethod is a function that will throw if called.
83	// We redirect unreachable methods to it.
84	names = append(names, "runtime.unreachableMethod")
85	if d.ctxt.BuildMode == BuildModePlugin {
86		names = append(names, objabi.PathToPrefix(*flagPluginPath)+"..inittask", objabi.PathToPrefix(*flagPluginPath)+".main", "go:plugin.tabs")
87
88		// We don't keep the go.plugin.exports symbol,
89		// but we do keep the symbols it refers to.
90		exportsIdx := d.ldr.Lookup("go:plugin.exports", 0)
91		if exportsIdx != 0 {
92			relocs := d.ldr.Relocs(exportsIdx)
93			for i := 0; i < relocs.Count(); i++ {
94				d.mark(relocs.At(i).Sym(), 0)
95			}
96		}
97	}
98
99	if d.ctxt.Debugvlog > 1 {
100		d.ctxt.Logf("deadcode start names: %v\n", names)
101	}
102
103	for _, name := range names {
104		// Mark symbol as a data/ABI0 symbol.
105		d.mark(d.ldr.Lookup(name, 0), 0)
106		if abiInternalVer != 0 {
107			// Also mark any Go functions (internal ABI).
108			d.mark(d.ldr.Lookup(name, abiInternalVer), 0)
109		}
110	}
111
112	// All dynamic exports are roots.
113	for _, s := range d.ctxt.dynexp {
114		if d.ctxt.Debugvlog > 1 {
115			d.ctxt.Logf("deadcode start dynexp: %s<%d>\n", d.ldr.SymName(s), d.ldr.SymVersion(s))
116		}
117		d.mark(s, 0)
118	}
119
120	d.mapinitnoop = d.ldr.Lookup("runtime.mapinitnoop", abiInternalVer)
121	if d.mapinitnoop == 0 {
122		panic("could not look up runtime.mapinitnoop")
123	}
124	if d.ctxt.mainInittasks != 0 {
125		d.mark(d.ctxt.mainInittasks, 0)
126	}
127}
128
129func (d *deadcodePass) flood() {
130	var methods []methodref
131	for !d.wq.empty() {
132		symIdx := d.wq.pop()
133
134		// Methods may be called via reflection. Give up on static analysis,
135		// and mark all exported methods of all reachable types as reachable.
136		d.reflectSeen = d.reflectSeen || d.ldr.IsReflectMethod(symIdx)
137
138		isgotype := d.ldr.IsGoType(symIdx)
139		relocs := d.ldr.Relocs(symIdx)
140		var usedInIface bool
141
142		if isgotype {
143			if d.dynlink {
144				// When dynamic linking, a type may be passed across DSO
145				// boundary and get converted to interface at the other side.
146				d.ldr.SetAttrUsedInIface(symIdx, true)
147			}
148			usedInIface = d.ldr.AttrUsedInIface(symIdx)
149		}
150
151		methods = methods[:0]
152		for i := 0; i < relocs.Count(); i++ {
153			r := relocs.At(i)
154			if r.Weak() {
155				convertWeakToStrong := false
156				// When build with "-linkshared", we can't tell if the
157				// interface method in itab will be used or not.
158				// Ignore the weak attribute.
159				if d.ctxt.linkShared && d.ldr.IsItab(symIdx) {
160					convertWeakToStrong = true
161				}
162				// If the program uses plugins, we can no longer treat
163				// relocs from pkg init functions to outlined map init
164				// fragments as weak, since doing so can cause package
165				// init clashes between the main program and the
166				// plugin. See #62430 for more details.
167				if d.ctxt.canUsePlugins && r.Type().IsDirectCall() {
168					convertWeakToStrong = true
169				}
170				if !convertWeakToStrong {
171					// skip this reloc
172					continue
173				}
174			}
175			t := r.Type()
176			switch t {
177			case objabi.R_METHODOFF:
178				if i+2 >= relocs.Count() {
179					panic("expect three consecutive R_METHODOFF relocs")
180				}
181				if usedInIface {
182					methods = append(methods, methodref{src: symIdx, r: i})
183					// The method descriptor is itself a type descriptor, and
184					// it can be used to reach other types, e.g. by using
185					// reflect.Type.Method(i).Type.In(j). We need to traverse
186					// its child types with UsedInIface set. (See also the
187					// comment below.)
188					rs := r.Sym()
189					if !d.ldr.AttrUsedInIface(rs) {
190						d.ldr.SetAttrUsedInIface(rs, true)
191						if d.ldr.AttrReachable(rs) {
192							d.ldr.SetAttrReachable(rs, false)
193							d.mark(rs, symIdx)
194						}
195					}
196				}
197				i += 2
198				continue
199			case objabi.R_USETYPE:
200				// type symbol used for DWARF. we need to load the symbol but it may not
201				// be otherwise reachable in the program.
202				// do nothing for now as we still load all type symbols.
203				continue
204			case objabi.R_USEIFACE:
205				// R_USEIFACE is a marker relocation that tells the linker the type is
206				// converted to an interface, i.e. should have UsedInIface set. See the
207				// comment below for why we need to unset the Reachable bit and re-mark it.
208				rs := r.Sym()
209				if d.ldr.IsItab(rs) {
210					// This relocation can also point at an itab, in which case it
211					// means "the Type field of that itab".
212					rs = decodeItabType(d.ldr, d.ctxt.Arch, rs)
213				}
214				if !d.ldr.IsGoType(rs) && !d.ctxt.linkShared {
215					panic(fmt.Sprintf("R_USEIFACE in %s references %s which is not a type or itab", d.ldr.SymName(symIdx), d.ldr.SymName(rs)))
216				}
217				if !d.ldr.AttrUsedInIface(rs) {
218					d.ldr.SetAttrUsedInIface(rs, true)
219					if d.ldr.AttrReachable(rs) {
220						d.ldr.SetAttrReachable(rs, false)
221						d.mark(rs, symIdx)
222					}
223				}
224				continue
225			case objabi.R_USEIFACEMETHOD:
226				// R_USEIFACEMETHOD is a marker relocation that marks an interface
227				// method as used.
228				rs := r.Sym()
229				if d.ctxt.linkShared && (d.ldr.SymType(rs) == sym.SDYNIMPORT || d.ldr.SymType(rs) == sym.Sxxx) {
230					// Don't decode symbol from shared library (we'll mark all exported methods anyway).
231					// We check for both SDYNIMPORT and Sxxx because name-mangled symbols haven't
232					// been resolved at this point.
233					continue
234				}
235				m := d.decodeIfaceMethod(d.ldr, d.ctxt.Arch, rs, r.Add())
236				if d.ctxt.Debugvlog > 1 {
237					d.ctxt.Logf("reached iface method: %v\n", m)
238				}
239				d.ifaceMethod[m] = true
240				continue
241			case objabi.R_USENAMEDMETHOD:
242				name := d.decodeGenericIfaceMethod(d.ldr, r.Sym())
243				if d.ctxt.Debugvlog > 1 {
244					d.ctxt.Logf("reached generic iface method: %s\n", name)
245				}
246				d.genericIfaceMethod[name] = true
247				continue // don't mark referenced symbol - it is not needed in the final binary.
248			case objabi.R_INITORDER:
249				// inittasks has already run, so any R_INITORDER links are now
250				// superfluous - the only live inittask records are those which are
251				// in a scheduled list somewhere (e.g. runtime.moduledata.inittasks).
252				continue
253			}
254			rs := r.Sym()
255			if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) {
256				// If a type is converted to an interface, it is possible to obtain an
257				// interface with a "child" type of it using reflection (e.g. obtain an
258				// interface of T from []chan T). We need to traverse its "child" types
259				// with UsedInIface attribute set.
260				// When visiting the child type (chan T in the example above), it will
261				// have UsedInIface set, so it in turn will mark and (re)visit its children
262				// (e.g. T above).
263				// We unset the reachable bit here, so if the child type is already visited,
264				// it will be visited again.
265				// Note that a type symbol can be visited at most twice, one without
266				// UsedInIface and one with. So termination is still guaranteed.
267				d.ldr.SetAttrUsedInIface(rs, true)
268				d.ldr.SetAttrReachable(rs, false)
269			}
270			d.mark(rs, symIdx)
271		}
272		naux := d.ldr.NAux(symIdx)
273		for i := 0; i < naux; i++ {
274			a := d.ldr.Aux(symIdx, i)
275			if a.Type() == goobj.AuxGotype {
276				// A symbol being reachable doesn't imply we need its
277				// type descriptor. Don't mark it.
278				continue
279			}
280			d.mark(a.Sym(), symIdx)
281		}
282		// Record sym if package init func (here naux != 0 is a cheap way
283		// to check first if it is a function symbol).
284		if naux != 0 && d.ldr.IsPkgInit(symIdx) {
285
286			d.pkginits = append(d.pkginits, symIdx)
287		}
288		// Some host object symbols have an outer object, which acts like a
289		// "carrier" symbol, or it holds all the symbols for a particular
290		// section. We need to mark all "referenced" symbols from that carrier,
291		// so we make sure we're pulling in all outer symbols, and their sub
292		// symbols. This is not ideal, and these carrier/section symbols could
293		// be removed.
294		if d.ldr.IsExternal(symIdx) {
295			d.mark(d.ldr.OuterSym(symIdx), symIdx)
296			d.mark(d.ldr.SubSym(symIdx), symIdx)
297		}
298
299		if len(methods) != 0 {
300			if !isgotype {
301				panic("method found on non-type symbol")
302			}
303			// Decode runtime type information for type methods
304			// to help work out which methods can be called
305			// dynamically via interfaces.
306			methodsigs := d.decodetypeMethods(d.ldr, d.ctxt.Arch, symIdx, &relocs)
307			if len(methods) != len(methodsigs) {
308				panic(fmt.Sprintf("%q has %d method relocations for %d methods", d.ldr.SymName(symIdx), len(methods), len(methodsigs)))
309			}
310			for i, m := range methodsigs {
311				methods[i].m = m
312				if d.ctxt.Debugvlog > 1 {
313					d.ctxt.Logf("markable method: %v of sym %v %s\n", m, symIdx, d.ldr.SymName(symIdx))
314				}
315			}
316			d.markableMethods = append(d.markableMethods, methods...)
317		}
318	}
319}
320
321// mapinitcleanup walks all pkg init functions and looks for weak relocations
322// to mapinit symbols that are no longer reachable. It rewrites
323// the relocs to target a new no-op routine in the runtime.
324func (d *deadcodePass) mapinitcleanup() {
325	for _, idx := range d.pkginits {
326		relocs := d.ldr.Relocs(idx)
327		var su *loader.SymbolBuilder
328		for i := 0; i < relocs.Count(); i++ {
329			r := relocs.At(i)
330			rs := r.Sym()
331			if r.Weak() && r.Type().IsDirectCall() && !d.ldr.AttrReachable(rs) {
332				// double check to make sure target is indeed map.init
333				rsn := d.ldr.SymName(rs)
334				if !strings.Contains(rsn, "map.init") {
335					panic(fmt.Sprintf("internal error: expected map.init sym for weak call reloc, got %s -> %s", d.ldr.SymName(idx), rsn))
336				}
337				d.ldr.SetAttrReachable(d.mapinitnoop, true)
338				if d.ctxt.Debugvlog > 1 {
339					d.ctxt.Logf("deadcode: %s rewrite %s ref to %s\n",
340						d.ldr.SymName(idx), rsn,
341						d.ldr.SymName(d.mapinitnoop))
342				}
343				if su == nil {
344					su = d.ldr.MakeSymbolUpdater(idx)
345				}
346				su.SetRelocSym(i, d.mapinitnoop)
347			}
348		}
349	}
350}
351
352func (d *deadcodePass) mark(symIdx, parent loader.Sym) {
353	if symIdx != 0 && !d.ldr.AttrReachable(symIdx) {
354		d.wq.push(symIdx)
355		d.ldr.SetAttrReachable(symIdx, true)
356		if buildcfg.Experiment.FieldTrack && d.ldr.Reachparent[symIdx] == 0 {
357			d.ldr.Reachparent[symIdx] = parent
358		}
359		if *flagDumpDep {
360			to := d.ldr.SymName(symIdx)
361			if to != "" {
362				to = d.dumpDepAddFlags(to, symIdx)
363				from := "_"
364				if parent != 0 {
365					from = d.ldr.SymName(parent)
366					from = d.dumpDepAddFlags(from, parent)
367				}
368				fmt.Printf("%s -> %s\n", from, to)
369			}
370		}
371	}
372}
373
374func (d *deadcodePass) dumpDepAddFlags(name string, symIdx loader.Sym) string {
375	var flags strings.Builder
376	if d.ldr.AttrUsedInIface(symIdx) {
377		flags.WriteString("<UsedInIface>")
378	}
379	if d.ldr.IsReflectMethod(symIdx) {
380		flags.WriteString("<ReflectMethod>")
381	}
382	if flags.Len() > 0 {
383		return name + " " + flags.String()
384	}
385	return name
386}
387
388func (d *deadcodePass) markMethod(m methodref) {
389	relocs := d.ldr.Relocs(m.src)
390	d.mark(relocs.At(m.r).Sym(), m.src)
391	d.mark(relocs.At(m.r+1).Sym(), m.src)
392	d.mark(relocs.At(m.r+2).Sym(), m.src)
393}
394
395// deadcode marks all reachable symbols.
396//
397// The basis of the dead code elimination is a flood fill of symbols,
398// following their relocations, beginning at *flagEntrySymbol.
399//
400// This flood fill is wrapped in logic for pruning unused methods.
401// All methods are mentioned by relocations on their receiver's *rtype.
402// These relocations are specially defined as R_METHODOFF by the compiler
403// so we can detect and manipulated them here.
404//
405// There are three ways a method of a reachable type can be invoked:
406//
407//  1. direct call
408//  2. through a reachable interface type
409//  3. reflect.Value.Method (or MethodByName), or reflect.Type.Method
410//     (or MethodByName)
411//
412// The first case is handled by the flood fill, a directly called method
413// is marked as reachable.
414//
415// The second case is handled by decomposing all reachable interface
416// types into method signatures. Each encountered method is compared
417// against the interface method signatures, if it matches it is marked
418// as reachable. This is extremely conservative, but easy and correct.
419//
420// The third case is handled by looking for functions that compiler flagged
421// as REFLECTMETHOD. REFLECTMETHOD on a function F means that F does a method
422// lookup with reflection, but the compiler was not able to statically determine
423// the method name.
424//
425// All functions that call reflect.Value.Method or reflect.Type.Method are REFLECTMETHODs.
426// Functions that call reflect.Value.MethodByName or reflect.Type.MethodByName with
427// a non-constant argument are REFLECTMETHODs, too. If we find a REFLECTMETHOD,
428// we give up on static analysis, and mark all exported methods of all reachable
429// types as reachable.
430//
431// If the argument to MethodByName is a compile-time constant, the compiler
432// emits a relocation with the method name. Matching methods are kept in all
433// reachable types.
434//
435// Any unreached text symbols are removed from ctxt.Textp.
436func deadcode(ctxt *Link) {
437	ldr := ctxt.loader
438	d := deadcodePass{ctxt: ctxt, ldr: ldr}
439	d.init()
440	d.flood()
441
442	if ctxt.DynlinkingGo() {
443		// Exported methods may satisfy interfaces we don't know
444		// about yet when dynamically linking.
445		d.reflectSeen = true
446	}
447
448	for {
449		// Mark all methods that could satisfy a discovered
450		// interface as reachable. We recheck old marked interfaces
451		// as new types (with new methods) may have been discovered
452		// in the last pass.
453		rem := d.markableMethods[:0]
454		for _, m := range d.markableMethods {
455			if (d.reflectSeen && (m.isExported() || d.dynlink)) || d.ifaceMethod[m.m] || d.genericIfaceMethod[m.m.name] {
456				d.markMethod(m)
457			} else {
458				rem = append(rem, m)
459			}
460		}
461		d.markableMethods = rem
462
463		if d.wq.empty() {
464			// No new work was discovered. Done.
465			break
466		}
467		d.flood()
468	}
469	if *flagPruneWeakMap {
470		d.mapinitcleanup()
471	}
472}
473
474// methodsig is a typed method signature (name + type).
475type methodsig struct {
476	name string
477	typ  loader.Sym // type descriptor symbol of the function
478}
479
480// methodref holds the relocations from a receiver type symbol to its
481// method. There are three relocations, one for each of the fields in
482// the reflect.method struct: mtyp, ifn, and tfn.
483type methodref struct {
484	m   methodsig
485	src loader.Sym // receiver type symbol
486	r   int        // the index of R_METHODOFF relocations
487}
488
489func (m methodref) isExported() bool {
490	for _, r := range m.m.name {
491		return unicode.IsUpper(r)
492	}
493	panic("methodref has no signature")
494}
495
496// decodeMethodSig decodes an array of method signature information.
497// Each element of the array is size bytes. The first 4 bytes is a
498// nameOff for the method name, and the next 4 bytes is a typeOff for
499// the function type.
500//
501// Conveniently this is the layout of both runtime.method and runtime.imethod.
502func (d *deadcodePass) decodeMethodSig(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs, off, size, count int) []methodsig {
503	if cap(d.methodsigstmp) < count {
504		d.methodsigstmp = append(d.methodsigstmp[:0], make([]methodsig, count)...)
505	}
506	var methods = d.methodsigstmp[:count]
507	for i := 0; i < count; i++ {
508		methods[i].name = decodetypeName(ldr, symIdx, relocs, off)
509		methods[i].typ = decodeRelocSym(ldr, symIdx, relocs, int32(off+4))
510		off += size
511	}
512	return methods
513}
514
515// Decode the method of interface type symbol symIdx at offset off.
516func (d *deadcodePass) decodeIfaceMethod(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, off int64) methodsig {
517	p := ldr.Data(symIdx)
518	if p == nil {
519		panic(fmt.Sprintf("missing symbol %q", ldr.SymName(symIdx)))
520	}
521	if decodetypeKind(arch, p) != abi.Interface {
522		panic(fmt.Sprintf("symbol %q is not an interface", ldr.SymName(symIdx)))
523	}
524	relocs := ldr.Relocs(symIdx)
525	var m methodsig
526	m.name = decodetypeName(ldr, symIdx, &relocs, int(off))
527	m.typ = decodeRelocSym(ldr, symIdx, &relocs, int32(off+4))
528	return m
529}
530
531// Decode the method name stored in symbol symIdx. The symbol should contain just the bytes of a method name.
532func (d *deadcodePass) decodeGenericIfaceMethod(ldr *loader.Loader, symIdx loader.Sym) string {
533	return ldr.DataString(symIdx)
534}
535
536func (d *deadcodePass) decodetypeMethods(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs) []methodsig {
537	p := ldr.Data(symIdx)
538	if !decodetypeHasUncommon(arch, p) {
539		panic(fmt.Sprintf("no methods on %q", ldr.SymName(symIdx)))
540	}
541	off := commonsize(arch) // reflect.rtype
542	switch decodetypeKind(arch, p) {
543	case abi.Struct: // reflect.structType
544		off += 4 * arch.PtrSize
545	case abi.Pointer: // reflect.ptrType
546		off += arch.PtrSize
547	case abi.Func: // reflect.funcType
548		off += arch.PtrSize // 4 bytes, pointer aligned
549	case abi.Slice: // reflect.sliceType
550		off += arch.PtrSize
551	case abi.Array: // reflect.arrayType
552		off += 3 * arch.PtrSize
553	case abi.Chan: // reflect.chanType
554		off += 2 * arch.PtrSize
555	case abi.Map: // reflect.mapType
556		off += 4*arch.PtrSize + 8
557	case abi.Interface: // reflect.interfaceType
558		off += 3 * arch.PtrSize
559	default:
560		// just Sizeof(rtype)
561	}
562
563	mcount := int(decodeInuxi(arch, p[off+4:], 2))
564	moff := int(decodeInuxi(arch, p[off+4+2+2:], 4))
565	off += moff                // offset to array of reflect.method values
566	const sizeofMethod = 4 * 4 // sizeof reflect.method in program
567	return d.decodeMethodSig(ldr, arch, symIdx, relocs, off, sizeofMethod, mcount)
568}
569