1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssagen
6
7import (
8	"fmt"
9	"internal/buildcfg"
10	"os"
11	"sort"
12	"sync"
13
14	"cmd/compile/internal/base"
15	"cmd/compile/internal/inline"
16	"cmd/compile/internal/ir"
17	"cmd/compile/internal/liveness"
18	"cmd/compile/internal/objw"
19	"cmd/compile/internal/pgoir"
20	"cmd/compile/internal/ssa"
21	"cmd/compile/internal/types"
22	"cmd/internal/obj"
23	"cmd/internal/objabi"
24	"cmd/internal/src"
25)
26
27// cmpstackvarlt reports whether the stack variable a sorts before b.
28func cmpstackvarlt(a, b *ir.Name, mls *liveness.MergeLocalsState) bool {
29	// Sort non-autos before autos.
30	if needAlloc(a) != needAlloc(b) {
31		return needAlloc(b)
32	}
33
34	// If both are non-auto (e.g., parameters, results), then sort by
35	// frame offset (defined by ABI).
36	if !needAlloc(a) {
37		return a.FrameOffset() < b.FrameOffset()
38	}
39
40	// From here on, a and b are both autos (i.e., local variables).
41
42	// Sort followers after leaders, if mls != nil
43	if mls != nil {
44		aFollow := mls.Subsumed(a)
45		bFollow := mls.Subsumed(b)
46		if aFollow != bFollow {
47			return bFollow
48		}
49	}
50
51	// Sort used before unused (so AllocFrame can truncate unused
52	// variables).
53	if a.Used() != b.Used() {
54		return a.Used()
55	}
56
57	// Sort pointer-typed before non-pointer types.
58	// Keeps the stack's GC bitmap compact.
59	ap := a.Type().HasPointers()
60	bp := b.Type().HasPointers()
61	if ap != bp {
62		return ap
63	}
64
65	// Group variables that need zeroing, so we can efficiently zero
66	// them altogether.
67	ap = a.Needzero()
68	bp = b.Needzero()
69	if ap != bp {
70		return ap
71	}
72
73	// Sort variables in descending alignment order, so we can optimally
74	// pack variables into the frame.
75	if a.Type().Alignment() != b.Type().Alignment() {
76		return a.Type().Alignment() > b.Type().Alignment()
77	}
78
79	// Sort normal variables before open-coded-defer slots, so that the
80	// latter are grouped together and near the top of the frame (to
81	// minimize varint encoding of their varp offset).
82	if a.OpenDeferSlot() != b.OpenDeferSlot() {
83		return a.OpenDeferSlot()
84	}
85
86	// If a and b are both open-coded defer slots, then order them by
87	// index in descending order, so they'll be laid out in the frame in
88	// ascending order.
89	//
90	// Their index was saved in FrameOffset in state.openDeferSave.
91	if a.OpenDeferSlot() {
92		return a.FrameOffset() > b.FrameOffset()
93	}
94
95	// Tie breaker for stable results.
96	return a.Sym().Name < b.Sym().Name
97}
98
99// needAlloc reports whether n is within the current frame, for which we need to
100// allocate space. In particular, it excludes arguments and results, which are in
101// the callers frame.
102func needAlloc(n *ir.Name) bool {
103	if n.Op() != ir.ONAME {
104		base.FatalfAt(n.Pos(), "%v has unexpected Op %v", n, n.Op())
105	}
106
107	switch n.Class {
108	case ir.PAUTO:
109		return true
110	case ir.PPARAM:
111		return false
112	case ir.PPARAMOUT:
113		return n.IsOutputParamInRegisters()
114
115	default:
116		base.FatalfAt(n.Pos(), "%v has unexpected Class %v", n, n.Class)
117		return false
118	}
119}
120
121func (s *ssafn) AllocFrame(f *ssa.Func) {
122	s.stksize = 0
123	s.stkptrsize = 0
124	s.stkalign = int64(types.RegSize)
125	fn := s.curfn
126
127	// Mark the PAUTO's unused.
128	for _, ln := range fn.Dcl {
129		if ln.OpenDeferSlot() {
130			// Open-coded defer slots have indices that were assigned
131			// upfront during SSA construction, but the defer statement can
132			// later get removed during deadcode elimination (#61895). To
133			// keep their relative offsets correct, treat them all as used.
134			continue
135		}
136
137		if needAlloc(ln) {
138			ln.SetUsed(false)
139		}
140	}
141
142	for _, l := range f.RegAlloc {
143		if ls, ok := l.(ssa.LocalSlot); ok {
144			ls.N.SetUsed(true)
145		}
146	}
147
148	for _, b := range f.Blocks {
149		for _, v := range b.Values {
150			if n, ok := v.Aux.(*ir.Name); ok {
151				switch n.Class {
152				case ir.PPARAMOUT:
153					if n.IsOutputParamInRegisters() && v.Op == ssa.OpVarDef {
154						// ignore VarDef, look for "real" uses.
155						// TODO: maybe do this for PAUTO as well?
156						continue
157					}
158					fallthrough
159				case ir.PPARAM, ir.PAUTO:
160					n.SetUsed(true)
161				}
162			}
163		}
164	}
165
166	var mls *liveness.MergeLocalsState
167	var leaders map[*ir.Name]int64
168	if base.Debug.MergeLocals != 0 {
169		mls = liveness.MergeLocals(fn, f)
170		if base.Debug.MergeLocalsTrace > 0 && mls != nil {
171			savedNP, savedP := mls.EstSavings()
172			fmt.Fprintf(os.Stderr, "%s: %d bytes of stack space saved via stack slot merging (%d nonpointer %d pointer)\n", ir.FuncName(fn), savedNP+savedP, savedNP, savedP)
173			if base.Debug.MergeLocalsTrace > 1 {
174				fmt.Fprintf(os.Stderr, "=-= merge locals state for %v:\n%v",
175					fn, mls)
176			}
177		}
178		leaders = make(map[*ir.Name]int64)
179	}
180
181	// Use sort.SliceStable instead of sort.Slice so stack layout (and thus
182	// compiler output) is less sensitive to frontend changes that
183	// introduce or remove unused variables.
184	sort.SliceStable(fn.Dcl, func(i, j int) bool {
185		return cmpstackvarlt(fn.Dcl[i], fn.Dcl[j], mls)
186	})
187
188	if mls != nil {
189		// Rewrite fn.Dcl to reposition followers (subsumed vars) to
190		// be immediately following the leader var in their partition.
191		followers := []*ir.Name{}
192		newdcl := make([]*ir.Name, 0, len(fn.Dcl))
193		for i := 0; i < len(fn.Dcl); i++ {
194			n := fn.Dcl[i]
195			if mls.Subsumed(n) {
196				continue
197			}
198			newdcl = append(newdcl, n)
199			if mls.IsLeader(n) {
200				followers = mls.Followers(n, followers)
201				// position followers immediately after leader
202				newdcl = append(newdcl, followers...)
203			}
204		}
205		fn.Dcl = newdcl
206	}
207
208	if base.Debug.MergeLocalsTrace > 1 && mls != nil {
209		fmt.Fprintf(os.Stderr, "=-= sorted DCL for %v:\n", fn)
210		for i, v := range fn.Dcl {
211			if !ssa.IsMergeCandidate(v) {
212				continue
213			}
214			fmt.Fprintf(os.Stderr, " %d: %q isleader=%v subsumed=%v used=%v sz=%d align=%d t=%s\n", i, v.Sym().Name, mls.IsLeader(v), mls.Subsumed(v), v.Used(), v.Type().Size(), v.Type().Alignment(), v.Type().String())
215		}
216	}
217
218	// Reassign stack offsets of the locals that are used.
219	lastHasPtr := false
220	for i, n := range fn.Dcl {
221		if n.Op() != ir.ONAME || n.Class != ir.PAUTO && !(n.Class == ir.PPARAMOUT && n.IsOutputParamInRegisters()) {
222			// i.e., stack assign if AUTO, or if PARAMOUT in registers (which has no predefined spill locations)
223			continue
224		}
225		if mls != nil && mls.Subsumed(n) {
226			continue
227		}
228		if !n.Used() {
229			fn.DebugInfo.(*ssa.FuncDebug).OptDcl = fn.Dcl[i:]
230			fn.Dcl = fn.Dcl[:i]
231			break
232		}
233		types.CalcSize(n.Type())
234		w := n.Type().Size()
235		if w >= types.MaxWidth || w < 0 {
236			base.Fatalf("bad width")
237		}
238		if w == 0 && lastHasPtr {
239			// Pad between a pointer-containing object and a zero-sized object.
240			// This prevents a pointer to the zero-sized object from being interpreted
241			// as a pointer to the pointer-containing object (and causing it
242			// to be scanned when it shouldn't be). See issue 24993.
243			w = 1
244		}
245		s.stksize += w
246		s.stksize = types.RoundUp(s.stksize, n.Type().Alignment())
247		if n.Type().Alignment() > int64(types.RegSize) {
248			s.stkalign = n.Type().Alignment()
249		}
250		if n.Type().HasPointers() {
251			s.stkptrsize = s.stksize
252			lastHasPtr = true
253		} else {
254			lastHasPtr = false
255		}
256		n.SetFrameOffset(-s.stksize)
257		if mls != nil && mls.IsLeader(n) {
258			leaders[n] = -s.stksize
259		}
260	}
261
262	if mls != nil {
263		// Update offsets of followers (subsumed vars) to be the
264		// same as the leader var in their partition.
265		for i := 0; i < len(fn.Dcl); i++ {
266			n := fn.Dcl[i]
267			if !mls.Subsumed(n) {
268				continue
269			}
270			leader := mls.Leader(n)
271			off, ok := leaders[leader]
272			if !ok {
273				panic("internal error missing leader")
274			}
275			// Set the stack offset this subsumed (followed) var
276			// to be the same as the leader.
277			n.SetFrameOffset(off)
278		}
279
280		if base.Debug.MergeLocalsTrace > 1 {
281			fmt.Fprintf(os.Stderr, "=-= stack layout for %v:\n", fn)
282			for i, v := range fn.Dcl {
283				if v.Op() != ir.ONAME || (v.Class != ir.PAUTO && !(v.Class == ir.PPARAMOUT && v.IsOutputParamInRegisters())) {
284					continue
285				}
286				fmt.Fprintf(os.Stderr, " %d: %q frameoff %d isleader=%v subsumed=%v sz=%d align=%d t=%s\n", i, v.Sym().Name, v.FrameOffset(), mls.IsLeader(v), mls.Subsumed(v), v.Type().Size(), v.Type().Alignment(), v.Type().String())
287			}
288		}
289	}
290
291	s.stksize = types.RoundUp(s.stksize, s.stkalign)
292	s.stkptrsize = types.RoundUp(s.stkptrsize, s.stkalign)
293}
294
295const maxStackSize = 1 << 30
296
297// Compile builds an SSA backend function,
298// uses it to generate a plist,
299// and flushes that plist to machine code.
300// worker indicates which of the backend workers is doing the processing.
301func Compile(fn *ir.Func, worker int, profile *pgoir.Profile) {
302	f := buildssa(fn, worker, inline.IsPgoHotFunc(fn, profile) || inline.HasPgoHotInline(fn))
303	// Note: check arg size to fix issue 25507.
304	if f.Frontend().(*ssafn).stksize >= maxStackSize || f.OwnAux.ArgWidth() >= maxStackSize {
305		largeStackFramesMu.Lock()
306		largeStackFrames = append(largeStackFrames, largeStack{locals: f.Frontend().(*ssafn).stksize, args: f.OwnAux.ArgWidth(), pos: fn.Pos()})
307		largeStackFramesMu.Unlock()
308		return
309	}
310	pp := objw.NewProgs(fn, worker)
311	defer pp.Free()
312	genssa(f, pp)
313	// Check frame size again.
314	// The check above included only the space needed for local variables.
315	// After genssa, the space needed includes local variables and the callee arg region.
316	// We must do this check prior to calling pp.Flush.
317	// If there are any oversized stack frames,
318	// the assembler may emit inscrutable complaints about invalid instructions.
319	if pp.Text.To.Offset >= maxStackSize {
320		largeStackFramesMu.Lock()
321		locals := f.Frontend().(*ssafn).stksize
322		largeStackFrames = append(largeStackFrames, largeStack{locals: locals, args: f.OwnAux.ArgWidth(), callee: pp.Text.To.Offset - locals, pos: fn.Pos()})
323		largeStackFramesMu.Unlock()
324		return
325	}
326
327	pp.Flush() // assemble, fill in boilerplate, etc.
328
329	// If we're compiling the package init function, search for any
330	// relocations that target global map init outline functions and
331	// turn them into weak relocs.
332	if fn.IsPackageInit() && base.Debug.WrapGlobalMapCtl != 1 {
333		weakenGlobalMapInitRelocs(fn)
334	}
335
336	// fieldtrack must be called after pp.Flush. See issue 20014.
337	fieldtrack(pp.Text.From.Sym, fn.FieldTrack)
338}
339
340// globalMapInitLsyms records the LSym of each map.init.NNN outlined
341// map initializer function created by the compiler.
342var globalMapInitLsyms map[*obj.LSym]struct{}
343
344// RegisterMapInitLsym records "s" in the set of outlined map initializer
345// functions.
346func RegisterMapInitLsym(s *obj.LSym) {
347	if globalMapInitLsyms == nil {
348		globalMapInitLsyms = make(map[*obj.LSym]struct{})
349	}
350	globalMapInitLsyms[s] = struct{}{}
351}
352
353// weakenGlobalMapInitRelocs walks through all of the relocations on a
354// given a package init function "fn" and looks for relocs that target
355// outlined global map initializer functions; if it finds any such
356// relocs, it flags them as R_WEAK.
357func weakenGlobalMapInitRelocs(fn *ir.Func) {
358	if globalMapInitLsyms == nil {
359		return
360	}
361	for i := range fn.LSym.R {
362		tgt := fn.LSym.R[i].Sym
363		if tgt == nil {
364			continue
365		}
366		if _, ok := globalMapInitLsyms[tgt]; !ok {
367			continue
368		}
369		if base.Debug.WrapGlobalMapDbg > 1 {
370			fmt.Fprintf(os.Stderr, "=-= weakify fn %v reloc %d %+v\n", fn, i,
371				fn.LSym.R[i])
372		}
373		// set the R_WEAK bit, leave rest of reloc type intact
374		fn.LSym.R[i].Type |= objabi.R_WEAK
375	}
376}
377
378// StackOffset returns the stack location of a LocalSlot relative to the
379// stack pointer, suitable for use in a DWARF location entry. This has nothing
380// to do with its offset in the user variable.
381func StackOffset(slot ssa.LocalSlot) int32 {
382	n := slot.N
383	var off int64
384	switch n.Class {
385	case ir.PPARAM, ir.PPARAMOUT:
386		if !n.IsOutputParamInRegisters() {
387			off = n.FrameOffset() + base.Ctxt.Arch.FixedFrameSize
388			break
389		}
390		fallthrough // PPARAMOUT in registers allocates like an AUTO
391	case ir.PAUTO:
392		off = n.FrameOffset()
393		if base.Ctxt.Arch.FixedFrameSize == 0 {
394			off -= int64(types.PtrSize)
395		}
396		if buildcfg.FramePointerEnabled {
397			off -= int64(types.PtrSize)
398		}
399	}
400	return int32(off + slot.Off)
401}
402
403// fieldtrack adds R_USEFIELD relocations to fnsym to record any
404// struct fields that it used.
405func fieldtrack(fnsym *obj.LSym, tracked map[*obj.LSym]struct{}) {
406	if fnsym == nil {
407		return
408	}
409	if !buildcfg.Experiment.FieldTrack || len(tracked) == 0 {
410		return
411	}
412
413	trackSyms := make([]*obj.LSym, 0, len(tracked))
414	for sym := range tracked {
415		trackSyms = append(trackSyms, sym)
416	}
417	sort.Slice(trackSyms, func(i, j int) bool { return trackSyms[i].Name < trackSyms[j].Name })
418	for _, sym := range trackSyms {
419		r := obj.Addrel(fnsym)
420		r.Sym = sym
421		r.Type = objabi.R_USEFIELD
422	}
423}
424
425// largeStack is info about a function whose stack frame is too large (rare).
426type largeStack struct {
427	locals int64
428	args   int64
429	callee int64
430	pos    src.XPos
431}
432
433var (
434	largeStackFramesMu sync.Mutex // protects largeStackFrames
435	largeStackFrames   []largeStack
436)
437
438func CheckLargeStacks() {
439	// Check whether any of the functions we have compiled have gigantic stack frames.
440	sort.Slice(largeStackFrames, func(i, j int) bool {
441		return largeStackFrames[i].pos.Before(largeStackFrames[j].pos)
442	})
443	for _, large := range largeStackFrames {
444		if large.callee != 0 {
445			base.ErrorfAt(large.pos, 0, "stack frame too large (>1GB): %d MB locals + %d MB args + %d MB callee", large.locals>>20, large.args>>20, large.callee>>20)
446		} else {
447			base.ErrorfAt(large.pos, 0, "stack frame too large (>1GB): %d MB locals + %d MB args", large.locals>>20, large.args>>20)
448		}
449	}
450}
451