1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package ssagen 6 7import ( 8 "fmt" 9 "internal/buildcfg" 10 "os" 11 "sort" 12 "sync" 13 14 "cmd/compile/internal/base" 15 "cmd/compile/internal/inline" 16 "cmd/compile/internal/ir" 17 "cmd/compile/internal/liveness" 18 "cmd/compile/internal/objw" 19 "cmd/compile/internal/pgoir" 20 "cmd/compile/internal/ssa" 21 "cmd/compile/internal/types" 22 "cmd/internal/obj" 23 "cmd/internal/objabi" 24 "cmd/internal/src" 25) 26 27// cmpstackvarlt reports whether the stack variable a sorts before b. 28func cmpstackvarlt(a, b *ir.Name, mls *liveness.MergeLocalsState) bool { 29 // Sort non-autos before autos. 30 if needAlloc(a) != needAlloc(b) { 31 return needAlloc(b) 32 } 33 34 // If both are non-auto (e.g., parameters, results), then sort by 35 // frame offset (defined by ABI). 36 if !needAlloc(a) { 37 return a.FrameOffset() < b.FrameOffset() 38 } 39 40 // From here on, a and b are both autos (i.e., local variables). 41 42 // Sort followers after leaders, if mls != nil 43 if mls != nil { 44 aFollow := mls.Subsumed(a) 45 bFollow := mls.Subsumed(b) 46 if aFollow != bFollow { 47 return bFollow 48 } 49 } 50 51 // Sort used before unused (so AllocFrame can truncate unused 52 // variables). 53 if a.Used() != b.Used() { 54 return a.Used() 55 } 56 57 // Sort pointer-typed before non-pointer types. 58 // Keeps the stack's GC bitmap compact. 59 ap := a.Type().HasPointers() 60 bp := b.Type().HasPointers() 61 if ap != bp { 62 return ap 63 } 64 65 // Group variables that need zeroing, so we can efficiently zero 66 // them altogether. 67 ap = a.Needzero() 68 bp = b.Needzero() 69 if ap != bp { 70 return ap 71 } 72 73 // Sort variables in descending alignment order, so we can optimally 74 // pack variables into the frame. 75 if a.Type().Alignment() != b.Type().Alignment() { 76 return a.Type().Alignment() > b.Type().Alignment() 77 } 78 79 // Sort normal variables before open-coded-defer slots, so that the 80 // latter are grouped together and near the top of the frame (to 81 // minimize varint encoding of their varp offset). 82 if a.OpenDeferSlot() != b.OpenDeferSlot() { 83 return a.OpenDeferSlot() 84 } 85 86 // If a and b are both open-coded defer slots, then order them by 87 // index in descending order, so they'll be laid out in the frame in 88 // ascending order. 89 // 90 // Their index was saved in FrameOffset in state.openDeferSave. 91 if a.OpenDeferSlot() { 92 return a.FrameOffset() > b.FrameOffset() 93 } 94 95 // Tie breaker for stable results. 96 return a.Sym().Name < b.Sym().Name 97} 98 99// needAlloc reports whether n is within the current frame, for which we need to 100// allocate space. In particular, it excludes arguments and results, which are in 101// the callers frame. 102func needAlloc(n *ir.Name) bool { 103 if n.Op() != ir.ONAME { 104 base.FatalfAt(n.Pos(), "%v has unexpected Op %v", n, n.Op()) 105 } 106 107 switch n.Class { 108 case ir.PAUTO: 109 return true 110 case ir.PPARAM: 111 return false 112 case ir.PPARAMOUT: 113 return n.IsOutputParamInRegisters() 114 115 default: 116 base.FatalfAt(n.Pos(), "%v has unexpected Class %v", n, n.Class) 117 return false 118 } 119} 120 121func (s *ssafn) AllocFrame(f *ssa.Func) { 122 s.stksize = 0 123 s.stkptrsize = 0 124 s.stkalign = int64(types.RegSize) 125 fn := s.curfn 126 127 // Mark the PAUTO's unused. 128 for _, ln := range fn.Dcl { 129 if ln.OpenDeferSlot() { 130 // Open-coded defer slots have indices that were assigned 131 // upfront during SSA construction, but the defer statement can 132 // later get removed during deadcode elimination (#61895). To 133 // keep their relative offsets correct, treat them all as used. 134 continue 135 } 136 137 if needAlloc(ln) { 138 ln.SetUsed(false) 139 } 140 } 141 142 for _, l := range f.RegAlloc { 143 if ls, ok := l.(ssa.LocalSlot); ok { 144 ls.N.SetUsed(true) 145 } 146 } 147 148 for _, b := range f.Blocks { 149 for _, v := range b.Values { 150 if n, ok := v.Aux.(*ir.Name); ok { 151 switch n.Class { 152 case ir.PPARAMOUT: 153 if n.IsOutputParamInRegisters() && v.Op == ssa.OpVarDef { 154 // ignore VarDef, look for "real" uses. 155 // TODO: maybe do this for PAUTO as well? 156 continue 157 } 158 fallthrough 159 case ir.PPARAM, ir.PAUTO: 160 n.SetUsed(true) 161 } 162 } 163 } 164 } 165 166 var mls *liveness.MergeLocalsState 167 var leaders map[*ir.Name]int64 168 if base.Debug.MergeLocals != 0 { 169 mls = liveness.MergeLocals(fn, f) 170 if base.Debug.MergeLocalsTrace > 0 && mls != nil { 171 savedNP, savedP := mls.EstSavings() 172 fmt.Fprintf(os.Stderr, "%s: %d bytes of stack space saved via stack slot merging (%d nonpointer %d pointer)\n", ir.FuncName(fn), savedNP+savedP, savedNP, savedP) 173 if base.Debug.MergeLocalsTrace > 1 { 174 fmt.Fprintf(os.Stderr, "=-= merge locals state for %v:\n%v", 175 fn, mls) 176 } 177 } 178 leaders = make(map[*ir.Name]int64) 179 } 180 181 // Use sort.SliceStable instead of sort.Slice so stack layout (and thus 182 // compiler output) is less sensitive to frontend changes that 183 // introduce or remove unused variables. 184 sort.SliceStable(fn.Dcl, func(i, j int) bool { 185 return cmpstackvarlt(fn.Dcl[i], fn.Dcl[j], mls) 186 }) 187 188 if mls != nil { 189 // Rewrite fn.Dcl to reposition followers (subsumed vars) to 190 // be immediately following the leader var in their partition. 191 followers := []*ir.Name{} 192 newdcl := make([]*ir.Name, 0, len(fn.Dcl)) 193 for i := 0; i < len(fn.Dcl); i++ { 194 n := fn.Dcl[i] 195 if mls.Subsumed(n) { 196 continue 197 } 198 newdcl = append(newdcl, n) 199 if mls.IsLeader(n) { 200 followers = mls.Followers(n, followers) 201 // position followers immediately after leader 202 newdcl = append(newdcl, followers...) 203 } 204 } 205 fn.Dcl = newdcl 206 } 207 208 if base.Debug.MergeLocalsTrace > 1 && mls != nil { 209 fmt.Fprintf(os.Stderr, "=-= sorted DCL for %v:\n", fn) 210 for i, v := range fn.Dcl { 211 if !ssa.IsMergeCandidate(v) { 212 continue 213 } 214 fmt.Fprintf(os.Stderr, " %d: %q isleader=%v subsumed=%v used=%v sz=%d align=%d t=%s\n", i, v.Sym().Name, mls.IsLeader(v), mls.Subsumed(v), v.Used(), v.Type().Size(), v.Type().Alignment(), v.Type().String()) 215 } 216 } 217 218 // Reassign stack offsets of the locals that are used. 219 lastHasPtr := false 220 for i, n := range fn.Dcl { 221 if n.Op() != ir.ONAME || n.Class != ir.PAUTO && !(n.Class == ir.PPARAMOUT && n.IsOutputParamInRegisters()) { 222 // i.e., stack assign if AUTO, or if PARAMOUT in registers (which has no predefined spill locations) 223 continue 224 } 225 if mls != nil && mls.Subsumed(n) { 226 continue 227 } 228 if !n.Used() { 229 fn.DebugInfo.(*ssa.FuncDebug).OptDcl = fn.Dcl[i:] 230 fn.Dcl = fn.Dcl[:i] 231 break 232 } 233 types.CalcSize(n.Type()) 234 w := n.Type().Size() 235 if w >= types.MaxWidth || w < 0 { 236 base.Fatalf("bad width") 237 } 238 if w == 0 && lastHasPtr { 239 // Pad between a pointer-containing object and a zero-sized object. 240 // This prevents a pointer to the zero-sized object from being interpreted 241 // as a pointer to the pointer-containing object (and causing it 242 // to be scanned when it shouldn't be). See issue 24993. 243 w = 1 244 } 245 s.stksize += w 246 s.stksize = types.RoundUp(s.stksize, n.Type().Alignment()) 247 if n.Type().Alignment() > int64(types.RegSize) { 248 s.stkalign = n.Type().Alignment() 249 } 250 if n.Type().HasPointers() { 251 s.stkptrsize = s.stksize 252 lastHasPtr = true 253 } else { 254 lastHasPtr = false 255 } 256 n.SetFrameOffset(-s.stksize) 257 if mls != nil && mls.IsLeader(n) { 258 leaders[n] = -s.stksize 259 } 260 } 261 262 if mls != nil { 263 // Update offsets of followers (subsumed vars) to be the 264 // same as the leader var in their partition. 265 for i := 0; i < len(fn.Dcl); i++ { 266 n := fn.Dcl[i] 267 if !mls.Subsumed(n) { 268 continue 269 } 270 leader := mls.Leader(n) 271 off, ok := leaders[leader] 272 if !ok { 273 panic("internal error missing leader") 274 } 275 // Set the stack offset this subsumed (followed) var 276 // to be the same as the leader. 277 n.SetFrameOffset(off) 278 } 279 280 if base.Debug.MergeLocalsTrace > 1 { 281 fmt.Fprintf(os.Stderr, "=-= stack layout for %v:\n", fn) 282 for i, v := range fn.Dcl { 283 if v.Op() != ir.ONAME || (v.Class != ir.PAUTO && !(v.Class == ir.PPARAMOUT && v.IsOutputParamInRegisters())) { 284 continue 285 } 286 fmt.Fprintf(os.Stderr, " %d: %q frameoff %d isleader=%v subsumed=%v sz=%d align=%d t=%s\n", i, v.Sym().Name, v.FrameOffset(), mls.IsLeader(v), mls.Subsumed(v), v.Type().Size(), v.Type().Alignment(), v.Type().String()) 287 } 288 } 289 } 290 291 s.stksize = types.RoundUp(s.stksize, s.stkalign) 292 s.stkptrsize = types.RoundUp(s.stkptrsize, s.stkalign) 293} 294 295const maxStackSize = 1 << 30 296 297// Compile builds an SSA backend function, 298// uses it to generate a plist, 299// and flushes that plist to machine code. 300// worker indicates which of the backend workers is doing the processing. 301func Compile(fn *ir.Func, worker int, profile *pgoir.Profile) { 302 f := buildssa(fn, worker, inline.IsPgoHotFunc(fn, profile) || inline.HasPgoHotInline(fn)) 303 // Note: check arg size to fix issue 25507. 304 if f.Frontend().(*ssafn).stksize >= maxStackSize || f.OwnAux.ArgWidth() >= maxStackSize { 305 largeStackFramesMu.Lock() 306 largeStackFrames = append(largeStackFrames, largeStack{locals: f.Frontend().(*ssafn).stksize, args: f.OwnAux.ArgWidth(), pos: fn.Pos()}) 307 largeStackFramesMu.Unlock() 308 return 309 } 310 pp := objw.NewProgs(fn, worker) 311 defer pp.Free() 312 genssa(f, pp) 313 // Check frame size again. 314 // The check above included only the space needed for local variables. 315 // After genssa, the space needed includes local variables and the callee arg region. 316 // We must do this check prior to calling pp.Flush. 317 // If there are any oversized stack frames, 318 // the assembler may emit inscrutable complaints about invalid instructions. 319 if pp.Text.To.Offset >= maxStackSize { 320 largeStackFramesMu.Lock() 321 locals := f.Frontend().(*ssafn).stksize 322 largeStackFrames = append(largeStackFrames, largeStack{locals: locals, args: f.OwnAux.ArgWidth(), callee: pp.Text.To.Offset - locals, pos: fn.Pos()}) 323 largeStackFramesMu.Unlock() 324 return 325 } 326 327 pp.Flush() // assemble, fill in boilerplate, etc. 328 329 // If we're compiling the package init function, search for any 330 // relocations that target global map init outline functions and 331 // turn them into weak relocs. 332 if fn.IsPackageInit() && base.Debug.WrapGlobalMapCtl != 1 { 333 weakenGlobalMapInitRelocs(fn) 334 } 335 336 // fieldtrack must be called after pp.Flush. See issue 20014. 337 fieldtrack(pp.Text.From.Sym, fn.FieldTrack) 338} 339 340// globalMapInitLsyms records the LSym of each map.init.NNN outlined 341// map initializer function created by the compiler. 342var globalMapInitLsyms map[*obj.LSym]struct{} 343 344// RegisterMapInitLsym records "s" in the set of outlined map initializer 345// functions. 346func RegisterMapInitLsym(s *obj.LSym) { 347 if globalMapInitLsyms == nil { 348 globalMapInitLsyms = make(map[*obj.LSym]struct{}) 349 } 350 globalMapInitLsyms[s] = struct{}{} 351} 352 353// weakenGlobalMapInitRelocs walks through all of the relocations on a 354// given a package init function "fn" and looks for relocs that target 355// outlined global map initializer functions; if it finds any such 356// relocs, it flags them as R_WEAK. 357func weakenGlobalMapInitRelocs(fn *ir.Func) { 358 if globalMapInitLsyms == nil { 359 return 360 } 361 for i := range fn.LSym.R { 362 tgt := fn.LSym.R[i].Sym 363 if tgt == nil { 364 continue 365 } 366 if _, ok := globalMapInitLsyms[tgt]; !ok { 367 continue 368 } 369 if base.Debug.WrapGlobalMapDbg > 1 { 370 fmt.Fprintf(os.Stderr, "=-= weakify fn %v reloc %d %+v\n", fn, i, 371 fn.LSym.R[i]) 372 } 373 // set the R_WEAK bit, leave rest of reloc type intact 374 fn.LSym.R[i].Type |= objabi.R_WEAK 375 } 376} 377 378// StackOffset returns the stack location of a LocalSlot relative to the 379// stack pointer, suitable for use in a DWARF location entry. This has nothing 380// to do with its offset in the user variable. 381func StackOffset(slot ssa.LocalSlot) int32 { 382 n := slot.N 383 var off int64 384 switch n.Class { 385 case ir.PPARAM, ir.PPARAMOUT: 386 if !n.IsOutputParamInRegisters() { 387 off = n.FrameOffset() + base.Ctxt.Arch.FixedFrameSize 388 break 389 } 390 fallthrough // PPARAMOUT in registers allocates like an AUTO 391 case ir.PAUTO: 392 off = n.FrameOffset() 393 if base.Ctxt.Arch.FixedFrameSize == 0 { 394 off -= int64(types.PtrSize) 395 } 396 if buildcfg.FramePointerEnabled { 397 off -= int64(types.PtrSize) 398 } 399 } 400 return int32(off + slot.Off) 401} 402 403// fieldtrack adds R_USEFIELD relocations to fnsym to record any 404// struct fields that it used. 405func fieldtrack(fnsym *obj.LSym, tracked map[*obj.LSym]struct{}) { 406 if fnsym == nil { 407 return 408 } 409 if !buildcfg.Experiment.FieldTrack || len(tracked) == 0 { 410 return 411 } 412 413 trackSyms := make([]*obj.LSym, 0, len(tracked)) 414 for sym := range tracked { 415 trackSyms = append(trackSyms, sym) 416 } 417 sort.Slice(trackSyms, func(i, j int) bool { return trackSyms[i].Name < trackSyms[j].Name }) 418 for _, sym := range trackSyms { 419 r := obj.Addrel(fnsym) 420 r.Sym = sym 421 r.Type = objabi.R_USEFIELD 422 } 423} 424 425// largeStack is info about a function whose stack frame is too large (rare). 426type largeStack struct { 427 locals int64 428 args int64 429 callee int64 430 pos src.XPos 431} 432 433var ( 434 largeStackFramesMu sync.Mutex // protects largeStackFrames 435 largeStackFrames []largeStack 436) 437 438func CheckLargeStacks() { 439 // Check whether any of the functions we have compiled have gigantic stack frames. 440 sort.Slice(largeStackFrames, func(i, j int) bool { 441 return largeStackFrames[i].pos.Before(largeStackFrames[j].pos) 442 }) 443 for _, large := range largeStackFrames { 444 if large.callee != 0 { 445 base.ErrorfAt(large.pos, 0, "stack frame too large (>1GB): %d MB locals + %d MB args + %d MB callee", large.locals>>20, large.args>>20, large.callee>>20) 446 } else { 447 base.ErrorfAt(large.pos, 0, "stack frame too large (>1GB): %d MB locals + %d MB args", large.locals>>20, large.args>>20) 448 } 449 } 450} 451