1// Copyright 2019 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package ld 6 7import ( 8 "cmd/internal/goobj" 9 "cmd/internal/objabi" 10 "cmd/internal/sys" 11 "cmd/link/internal/loader" 12 "cmd/link/internal/sym" 13 "fmt" 14 "internal/abi" 15 "internal/buildcfg" 16 "strings" 17 "unicode" 18) 19 20var _ = fmt.Print 21 22type deadcodePass struct { 23 ctxt *Link 24 ldr *loader.Loader 25 wq heap // work queue, using min-heap for better locality 26 27 ifaceMethod map[methodsig]bool // methods called from reached interface call sites 28 genericIfaceMethod map[string]bool // names of methods called from reached generic interface call sites 29 markableMethods []methodref // methods of reached types 30 reflectSeen bool // whether we have seen a reflect method call 31 dynlink bool 32 33 methodsigstmp []methodsig // scratch buffer for decoding method signatures 34 pkginits []loader.Sym 35 mapinitnoop loader.Sym 36} 37 38func (d *deadcodePass) init() { 39 d.ldr.InitReachable() 40 d.ifaceMethod = make(map[methodsig]bool) 41 d.genericIfaceMethod = make(map[string]bool) 42 if buildcfg.Experiment.FieldTrack { 43 d.ldr.Reachparent = make([]loader.Sym, d.ldr.NSym()) 44 } 45 d.dynlink = d.ctxt.DynlinkingGo() 46 47 if d.ctxt.BuildMode == BuildModeShared { 48 // Mark all symbols defined in this library as reachable when 49 // building a shared library. 50 n := d.ldr.NDef() 51 for i := 1; i < n; i++ { 52 s := loader.Sym(i) 53 if d.ldr.SymType(s) == sym.STEXT && d.ldr.SymSize(s) == 0 { 54 // Zero-sized text symbol is a function deadcoded by the 55 // compiler. It doesn't really get compiled, and its 56 // metadata may be missing. 57 continue 58 } 59 d.mark(s, 0) 60 } 61 d.mark(d.ctxt.mainInittasks, 0) 62 return 63 } 64 65 var names []string 66 67 // In a normal binary, start at main.main and the init 68 // functions and mark what is reachable from there. 69 if d.ctxt.linkShared && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) { 70 names = append(names, "main.main", "main..inittask") 71 } else { 72 // The external linker refers main symbol directly. 73 if d.ctxt.LinkMode == LinkExternal && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) { 74 if d.ctxt.HeadType == objabi.Hwindows && d.ctxt.Arch.Family == sys.I386 { 75 *flagEntrySymbol = "_main" 76 } else { 77 *flagEntrySymbol = "main" 78 } 79 } 80 names = append(names, *flagEntrySymbol) 81 } 82 // runtime.unreachableMethod is a function that will throw if called. 83 // We redirect unreachable methods to it. 84 names = append(names, "runtime.unreachableMethod") 85 if d.ctxt.BuildMode == BuildModePlugin { 86 names = append(names, objabi.PathToPrefix(*flagPluginPath)+"..inittask", objabi.PathToPrefix(*flagPluginPath)+".main", "go:plugin.tabs") 87 88 // We don't keep the go.plugin.exports symbol, 89 // but we do keep the symbols it refers to. 90 exportsIdx := d.ldr.Lookup("go:plugin.exports", 0) 91 if exportsIdx != 0 { 92 relocs := d.ldr.Relocs(exportsIdx) 93 for i := 0; i < relocs.Count(); i++ { 94 d.mark(relocs.At(i).Sym(), 0) 95 } 96 } 97 } 98 99 if d.ctxt.Debugvlog > 1 { 100 d.ctxt.Logf("deadcode start names: %v\n", names) 101 } 102 103 for _, name := range names { 104 // Mark symbol as a data/ABI0 symbol. 105 d.mark(d.ldr.Lookup(name, 0), 0) 106 if abiInternalVer != 0 { 107 // Also mark any Go functions (internal ABI). 108 d.mark(d.ldr.Lookup(name, abiInternalVer), 0) 109 } 110 } 111 112 // All dynamic exports are roots. 113 for _, s := range d.ctxt.dynexp { 114 if d.ctxt.Debugvlog > 1 { 115 d.ctxt.Logf("deadcode start dynexp: %s<%d>\n", d.ldr.SymName(s), d.ldr.SymVersion(s)) 116 } 117 d.mark(s, 0) 118 } 119 120 d.mapinitnoop = d.ldr.Lookup("runtime.mapinitnoop", abiInternalVer) 121 if d.mapinitnoop == 0 { 122 panic("could not look up runtime.mapinitnoop") 123 } 124 if d.ctxt.mainInittasks != 0 { 125 d.mark(d.ctxt.mainInittasks, 0) 126 } 127} 128 129func (d *deadcodePass) flood() { 130 var methods []methodref 131 for !d.wq.empty() { 132 symIdx := d.wq.pop() 133 134 // Methods may be called via reflection. Give up on static analysis, 135 // and mark all exported methods of all reachable types as reachable. 136 d.reflectSeen = d.reflectSeen || d.ldr.IsReflectMethod(symIdx) 137 138 isgotype := d.ldr.IsGoType(symIdx) 139 relocs := d.ldr.Relocs(symIdx) 140 var usedInIface bool 141 142 if isgotype { 143 if d.dynlink { 144 // When dynamic linking, a type may be passed across DSO 145 // boundary and get converted to interface at the other side. 146 d.ldr.SetAttrUsedInIface(symIdx, true) 147 } 148 usedInIface = d.ldr.AttrUsedInIface(symIdx) 149 } 150 151 methods = methods[:0] 152 for i := 0; i < relocs.Count(); i++ { 153 r := relocs.At(i) 154 if r.Weak() { 155 convertWeakToStrong := false 156 // When build with "-linkshared", we can't tell if the 157 // interface method in itab will be used or not. 158 // Ignore the weak attribute. 159 if d.ctxt.linkShared && d.ldr.IsItab(symIdx) { 160 convertWeakToStrong = true 161 } 162 // If the program uses plugins, we can no longer treat 163 // relocs from pkg init functions to outlined map init 164 // fragments as weak, since doing so can cause package 165 // init clashes between the main program and the 166 // plugin. See #62430 for more details. 167 if d.ctxt.canUsePlugins && r.Type().IsDirectCall() { 168 convertWeakToStrong = true 169 } 170 if !convertWeakToStrong { 171 // skip this reloc 172 continue 173 } 174 } 175 t := r.Type() 176 switch t { 177 case objabi.R_METHODOFF: 178 if i+2 >= relocs.Count() { 179 panic("expect three consecutive R_METHODOFF relocs") 180 } 181 if usedInIface { 182 methods = append(methods, methodref{src: symIdx, r: i}) 183 // The method descriptor is itself a type descriptor, and 184 // it can be used to reach other types, e.g. by using 185 // reflect.Type.Method(i).Type.In(j). We need to traverse 186 // its child types with UsedInIface set. (See also the 187 // comment below.) 188 rs := r.Sym() 189 if !d.ldr.AttrUsedInIface(rs) { 190 d.ldr.SetAttrUsedInIface(rs, true) 191 if d.ldr.AttrReachable(rs) { 192 d.ldr.SetAttrReachable(rs, false) 193 d.mark(rs, symIdx) 194 } 195 } 196 } 197 i += 2 198 continue 199 case objabi.R_USETYPE: 200 // type symbol used for DWARF. we need to load the symbol but it may not 201 // be otherwise reachable in the program. 202 // do nothing for now as we still load all type symbols. 203 continue 204 case objabi.R_USEIFACE: 205 // R_USEIFACE is a marker relocation that tells the linker the type is 206 // converted to an interface, i.e. should have UsedInIface set. See the 207 // comment below for why we need to unset the Reachable bit and re-mark it. 208 rs := r.Sym() 209 if d.ldr.IsItab(rs) { 210 // This relocation can also point at an itab, in which case it 211 // means "the Type field of that itab". 212 rs = decodeItabType(d.ldr, d.ctxt.Arch, rs) 213 } 214 if !d.ldr.IsGoType(rs) && !d.ctxt.linkShared { 215 panic(fmt.Sprintf("R_USEIFACE in %s references %s which is not a type or itab", d.ldr.SymName(symIdx), d.ldr.SymName(rs))) 216 } 217 if !d.ldr.AttrUsedInIface(rs) { 218 d.ldr.SetAttrUsedInIface(rs, true) 219 if d.ldr.AttrReachable(rs) { 220 d.ldr.SetAttrReachable(rs, false) 221 d.mark(rs, symIdx) 222 } 223 } 224 continue 225 case objabi.R_USEIFACEMETHOD: 226 // R_USEIFACEMETHOD is a marker relocation that marks an interface 227 // method as used. 228 rs := r.Sym() 229 if d.ctxt.linkShared && (d.ldr.SymType(rs) == sym.SDYNIMPORT || d.ldr.SymType(rs) == sym.Sxxx) { 230 // Don't decode symbol from shared library (we'll mark all exported methods anyway). 231 // We check for both SDYNIMPORT and Sxxx because name-mangled symbols haven't 232 // been resolved at this point. 233 continue 234 } 235 m := d.decodeIfaceMethod(d.ldr, d.ctxt.Arch, rs, r.Add()) 236 if d.ctxt.Debugvlog > 1 { 237 d.ctxt.Logf("reached iface method: %v\n", m) 238 } 239 d.ifaceMethod[m] = true 240 continue 241 case objabi.R_USENAMEDMETHOD: 242 name := d.decodeGenericIfaceMethod(d.ldr, r.Sym()) 243 if d.ctxt.Debugvlog > 1 { 244 d.ctxt.Logf("reached generic iface method: %s\n", name) 245 } 246 d.genericIfaceMethod[name] = true 247 continue // don't mark referenced symbol - it is not needed in the final binary. 248 case objabi.R_INITORDER: 249 // inittasks has already run, so any R_INITORDER links are now 250 // superfluous - the only live inittask records are those which are 251 // in a scheduled list somewhere (e.g. runtime.moduledata.inittasks). 252 continue 253 } 254 rs := r.Sym() 255 if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) { 256 // If a type is converted to an interface, it is possible to obtain an 257 // interface with a "child" type of it using reflection (e.g. obtain an 258 // interface of T from []chan T). We need to traverse its "child" types 259 // with UsedInIface attribute set. 260 // When visiting the child type (chan T in the example above), it will 261 // have UsedInIface set, so it in turn will mark and (re)visit its children 262 // (e.g. T above). 263 // We unset the reachable bit here, so if the child type is already visited, 264 // it will be visited again. 265 // Note that a type symbol can be visited at most twice, one without 266 // UsedInIface and one with. So termination is still guaranteed. 267 d.ldr.SetAttrUsedInIface(rs, true) 268 d.ldr.SetAttrReachable(rs, false) 269 } 270 d.mark(rs, symIdx) 271 } 272 naux := d.ldr.NAux(symIdx) 273 for i := 0; i < naux; i++ { 274 a := d.ldr.Aux(symIdx, i) 275 if a.Type() == goobj.AuxGotype { 276 // A symbol being reachable doesn't imply we need its 277 // type descriptor. Don't mark it. 278 continue 279 } 280 d.mark(a.Sym(), symIdx) 281 } 282 // Record sym if package init func (here naux != 0 is a cheap way 283 // to check first if it is a function symbol). 284 if naux != 0 && d.ldr.IsPkgInit(symIdx) { 285 286 d.pkginits = append(d.pkginits, symIdx) 287 } 288 // Some host object symbols have an outer object, which acts like a 289 // "carrier" symbol, or it holds all the symbols for a particular 290 // section. We need to mark all "referenced" symbols from that carrier, 291 // so we make sure we're pulling in all outer symbols, and their sub 292 // symbols. This is not ideal, and these carrier/section symbols could 293 // be removed. 294 if d.ldr.IsExternal(symIdx) { 295 d.mark(d.ldr.OuterSym(symIdx), symIdx) 296 d.mark(d.ldr.SubSym(symIdx), symIdx) 297 } 298 299 if len(methods) != 0 { 300 if !isgotype { 301 panic("method found on non-type symbol") 302 } 303 // Decode runtime type information for type methods 304 // to help work out which methods can be called 305 // dynamically via interfaces. 306 methodsigs := d.decodetypeMethods(d.ldr, d.ctxt.Arch, symIdx, &relocs) 307 if len(methods) != len(methodsigs) { 308 panic(fmt.Sprintf("%q has %d method relocations for %d methods", d.ldr.SymName(symIdx), len(methods), len(methodsigs))) 309 } 310 for i, m := range methodsigs { 311 methods[i].m = m 312 if d.ctxt.Debugvlog > 1 { 313 d.ctxt.Logf("markable method: %v of sym %v %s\n", m, symIdx, d.ldr.SymName(symIdx)) 314 } 315 } 316 d.markableMethods = append(d.markableMethods, methods...) 317 } 318 } 319} 320 321// mapinitcleanup walks all pkg init functions and looks for weak relocations 322// to mapinit symbols that are no longer reachable. It rewrites 323// the relocs to target a new no-op routine in the runtime. 324func (d *deadcodePass) mapinitcleanup() { 325 for _, idx := range d.pkginits { 326 relocs := d.ldr.Relocs(idx) 327 var su *loader.SymbolBuilder 328 for i := 0; i < relocs.Count(); i++ { 329 r := relocs.At(i) 330 rs := r.Sym() 331 if r.Weak() && r.Type().IsDirectCall() && !d.ldr.AttrReachable(rs) { 332 // double check to make sure target is indeed map.init 333 rsn := d.ldr.SymName(rs) 334 if !strings.Contains(rsn, "map.init") { 335 panic(fmt.Sprintf("internal error: expected map.init sym for weak call reloc, got %s -> %s", d.ldr.SymName(idx), rsn)) 336 } 337 d.ldr.SetAttrReachable(d.mapinitnoop, true) 338 if d.ctxt.Debugvlog > 1 { 339 d.ctxt.Logf("deadcode: %s rewrite %s ref to %s\n", 340 d.ldr.SymName(idx), rsn, 341 d.ldr.SymName(d.mapinitnoop)) 342 } 343 if su == nil { 344 su = d.ldr.MakeSymbolUpdater(idx) 345 } 346 su.SetRelocSym(i, d.mapinitnoop) 347 } 348 } 349 } 350} 351 352func (d *deadcodePass) mark(symIdx, parent loader.Sym) { 353 if symIdx != 0 && !d.ldr.AttrReachable(symIdx) { 354 d.wq.push(symIdx) 355 d.ldr.SetAttrReachable(symIdx, true) 356 if buildcfg.Experiment.FieldTrack && d.ldr.Reachparent[symIdx] == 0 { 357 d.ldr.Reachparent[symIdx] = parent 358 } 359 if *flagDumpDep { 360 to := d.ldr.SymName(symIdx) 361 if to != "" { 362 to = d.dumpDepAddFlags(to, symIdx) 363 from := "_" 364 if parent != 0 { 365 from = d.ldr.SymName(parent) 366 from = d.dumpDepAddFlags(from, parent) 367 } 368 fmt.Printf("%s -> %s\n", from, to) 369 } 370 } 371 } 372} 373 374func (d *deadcodePass) dumpDepAddFlags(name string, symIdx loader.Sym) string { 375 var flags strings.Builder 376 if d.ldr.AttrUsedInIface(symIdx) { 377 flags.WriteString("<UsedInIface>") 378 } 379 if d.ldr.IsReflectMethod(symIdx) { 380 flags.WriteString("<ReflectMethod>") 381 } 382 if flags.Len() > 0 { 383 return name + " " + flags.String() 384 } 385 return name 386} 387 388func (d *deadcodePass) markMethod(m methodref) { 389 relocs := d.ldr.Relocs(m.src) 390 d.mark(relocs.At(m.r).Sym(), m.src) 391 d.mark(relocs.At(m.r+1).Sym(), m.src) 392 d.mark(relocs.At(m.r+2).Sym(), m.src) 393} 394 395// deadcode marks all reachable symbols. 396// 397// The basis of the dead code elimination is a flood fill of symbols, 398// following their relocations, beginning at *flagEntrySymbol. 399// 400// This flood fill is wrapped in logic for pruning unused methods. 401// All methods are mentioned by relocations on their receiver's *rtype. 402// These relocations are specially defined as R_METHODOFF by the compiler 403// so we can detect and manipulated them here. 404// 405// There are three ways a method of a reachable type can be invoked: 406// 407// 1. direct call 408// 2. through a reachable interface type 409// 3. reflect.Value.Method (or MethodByName), or reflect.Type.Method 410// (or MethodByName) 411// 412// The first case is handled by the flood fill, a directly called method 413// is marked as reachable. 414// 415// The second case is handled by decomposing all reachable interface 416// types into method signatures. Each encountered method is compared 417// against the interface method signatures, if it matches it is marked 418// as reachable. This is extremely conservative, but easy and correct. 419// 420// The third case is handled by looking for functions that compiler flagged 421// as REFLECTMETHOD. REFLECTMETHOD on a function F means that F does a method 422// lookup with reflection, but the compiler was not able to statically determine 423// the method name. 424// 425// All functions that call reflect.Value.Method or reflect.Type.Method are REFLECTMETHODs. 426// Functions that call reflect.Value.MethodByName or reflect.Type.MethodByName with 427// a non-constant argument are REFLECTMETHODs, too. If we find a REFLECTMETHOD, 428// we give up on static analysis, and mark all exported methods of all reachable 429// types as reachable. 430// 431// If the argument to MethodByName is a compile-time constant, the compiler 432// emits a relocation with the method name. Matching methods are kept in all 433// reachable types. 434// 435// Any unreached text symbols are removed from ctxt.Textp. 436func deadcode(ctxt *Link) { 437 ldr := ctxt.loader 438 d := deadcodePass{ctxt: ctxt, ldr: ldr} 439 d.init() 440 d.flood() 441 442 if ctxt.DynlinkingGo() { 443 // Exported methods may satisfy interfaces we don't know 444 // about yet when dynamically linking. 445 d.reflectSeen = true 446 } 447 448 for { 449 // Mark all methods that could satisfy a discovered 450 // interface as reachable. We recheck old marked interfaces 451 // as new types (with new methods) may have been discovered 452 // in the last pass. 453 rem := d.markableMethods[:0] 454 for _, m := range d.markableMethods { 455 if (d.reflectSeen && (m.isExported() || d.dynlink)) || d.ifaceMethod[m.m] || d.genericIfaceMethod[m.m.name] { 456 d.markMethod(m) 457 } else { 458 rem = append(rem, m) 459 } 460 } 461 d.markableMethods = rem 462 463 if d.wq.empty() { 464 // No new work was discovered. Done. 465 break 466 } 467 d.flood() 468 } 469 if *flagPruneWeakMap { 470 d.mapinitcleanup() 471 } 472} 473 474// methodsig is a typed method signature (name + type). 475type methodsig struct { 476 name string 477 typ loader.Sym // type descriptor symbol of the function 478} 479 480// methodref holds the relocations from a receiver type symbol to its 481// method. There are three relocations, one for each of the fields in 482// the reflect.method struct: mtyp, ifn, and tfn. 483type methodref struct { 484 m methodsig 485 src loader.Sym // receiver type symbol 486 r int // the index of R_METHODOFF relocations 487} 488 489func (m methodref) isExported() bool { 490 for _, r := range m.m.name { 491 return unicode.IsUpper(r) 492 } 493 panic("methodref has no signature") 494} 495 496// decodeMethodSig decodes an array of method signature information. 497// Each element of the array is size bytes. The first 4 bytes is a 498// nameOff for the method name, and the next 4 bytes is a typeOff for 499// the function type. 500// 501// Conveniently this is the layout of both runtime.method and runtime.imethod. 502func (d *deadcodePass) decodeMethodSig(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs, off, size, count int) []methodsig { 503 if cap(d.methodsigstmp) < count { 504 d.methodsigstmp = append(d.methodsigstmp[:0], make([]methodsig, count)...) 505 } 506 var methods = d.methodsigstmp[:count] 507 for i := 0; i < count; i++ { 508 methods[i].name = decodetypeName(ldr, symIdx, relocs, off) 509 methods[i].typ = decodeRelocSym(ldr, symIdx, relocs, int32(off+4)) 510 off += size 511 } 512 return methods 513} 514 515// Decode the method of interface type symbol symIdx at offset off. 516func (d *deadcodePass) decodeIfaceMethod(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, off int64) methodsig { 517 p := ldr.Data(symIdx) 518 if p == nil { 519 panic(fmt.Sprintf("missing symbol %q", ldr.SymName(symIdx))) 520 } 521 if decodetypeKind(arch, p) != abi.Interface { 522 panic(fmt.Sprintf("symbol %q is not an interface", ldr.SymName(symIdx))) 523 } 524 relocs := ldr.Relocs(symIdx) 525 var m methodsig 526 m.name = decodetypeName(ldr, symIdx, &relocs, int(off)) 527 m.typ = decodeRelocSym(ldr, symIdx, &relocs, int32(off+4)) 528 return m 529} 530 531// Decode the method name stored in symbol symIdx. The symbol should contain just the bytes of a method name. 532func (d *deadcodePass) decodeGenericIfaceMethod(ldr *loader.Loader, symIdx loader.Sym) string { 533 return ldr.DataString(symIdx) 534} 535 536func (d *deadcodePass) decodetypeMethods(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs) []methodsig { 537 p := ldr.Data(symIdx) 538 if !decodetypeHasUncommon(arch, p) { 539 panic(fmt.Sprintf("no methods on %q", ldr.SymName(symIdx))) 540 } 541 off := commonsize(arch) // reflect.rtype 542 switch decodetypeKind(arch, p) { 543 case abi.Struct: // reflect.structType 544 off += 4 * arch.PtrSize 545 case abi.Pointer: // reflect.ptrType 546 off += arch.PtrSize 547 case abi.Func: // reflect.funcType 548 off += arch.PtrSize // 4 bytes, pointer aligned 549 case abi.Slice: // reflect.sliceType 550 off += arch.PtrSize 551 case abi.Array: // reflect.arrayType 552 off += 3 * arch.PtrSize 553 case abi.Chan: // reflect.chanType 554 off += 2 * arch.PtrSize 555 case abi.Map: // reflect.mapType 556 off += 4*arch.PtrSize + 8 557 case abi.Interface: // reflect.interfaceType 558 off += 3 * arch.PtrSize 559 default: 560 // just Sizeof(rtype) 561 } 562 563 mcount := int(decodeInuxi(arch, p[off+4:], 2)) 564 moff := int(decodeInuxi(arch, p[off+4+2+2:], 4)) 565 off += moff // offset to array of reflect.method values 566 const sizeofMethod = 4 * 4 // sizeof reflect.method in program 567 return d.decodeMethodSig(ldr, arch, symIdx, relocs, off, sizeofMethod, mcount) 568} 569