1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// TODO: live at start of block instead?
6
7package ssa
8
9import (
10	"cmd/compile/internal/ir"
11	"cmd/compile/internal/types"
12	"cmd/internal/src"
13	"fmt"
14)
15
16type stackAllocState struct {
17	f *Func
18
19	// live is the output of stackalloc.
20	// live[b.id] = live values at the end of block b.
21	live [][]ID
22
23	// The following slices are reused across multiple users
24	// of stackAllocState.
25	values    []stackValState
26	interfere [][]ID // interfere[v.id] = values that interfere with v.
27	names     []LocalSlot
28
29	nArgSlot, // Number of Values sourced to arg slot
30	nNotNeed, // Number of Values not needing a stack slot
31	nNamedSlot, // Number of Values using a named stack slot
32	nReuse, // Number of values reusing a stack slot
33	nAuto, // Number of autos allocated for stack slots.
34	nSelfInterfere int32 // Number of self-interferences
35}
36
37func newStackAllocState(f *Func) *stackAllocState {
38	s := f.Cache.stackAllocState
39	if s == nil {
40		return new(stackAllocState)
41	}
42	if s.f != nil {
43		f.fe.Fatalf(src.NoXPos, "newStackAllocState called without previous free")
44	}
45	return s
46}
47
48func putStackAllocState(s *stackAllocState) {
49	for i := range s.values {
50		s.values[i] = stackValState{}
51	}
52	for i := range s.interfere {
53		s.interfere[i] = nil
54	}
55	for i := range s.names {
56		s.names[i] = LocalSlot{}
57	}
58	s.f.Cache.stackAllocState = s
59	s.f = nil
60	s.live = nil
61	s.nArgSlot, s.nNotNeed, s.nNamedSlot, s.nReuse, s.nAuto, s.nSelfInterfere = 0, 0, 0, 0, 0, 0
62}
63
64type stackValState struct {
65	typ      *types.Type
66	spill    *Value
67	needSlot bool
68	isArg    bool
69}
70
71// stackalloc allocates storage in the stack frame for
72// all Values that did not get a register.
73// Returns a map from block ID to the stack values live at the end of that block.
74func stackalloc(f *Func, spillLive [][]ID) [][]ID {
75	if f.pass.debug > stackDebug {
76		fmt.Println("before stackalloc")
77		fmt.Println(f.String())
78	}
79	s := newStackAllocState(f)
80	s.init(f, spillLive)
81	defer putStackAllocState(s)
82
83	s.stackalloc()
84	if f.pass.stats > 0 {
85		f.LogStat("stack_alloc_stats",
86			s.nArgSlot, "arg_slots", s.nNotNeed, "slot_not_needed",
87			s.nNamedSlot, "named_slots", s.nAuto, "auto_slots",
88			s.nReuse, "reused_slots", s.nSelfInterfere, "self_interfering")
89	}
90
91	return s.live
92}
93
94func (s *stackAllocState) init(f *Func, spillLive [][]ID) {
95	s.f = f
96
97	// Initialize value information.
98	if n := f.NumValues(); cap(s.values) >= n {
99		s.values = s.values[:n]
100	} else {
101		s.values = make([]stackValState, n)
102	}
103	for _, b := range f.Blocks {
104		for _, v := range b.Values {
105			s.values[v.ID].typ = v.Type
106			s.values[v.ID].needSlot = !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && f.getHome(v.ID) == nil && !v.rematerializeable() && !v.OnWasmStack
107			s.values[v.ID].isArg = hasAnyArgOp(v)
108			if f.pass.debug > stackDebug && s.values[v.ID].needSlot {
109				fmt.Printf("%s needs a stack slot\n", v)
110			}
111			if v.Op == OpStoreReg {
112				s.values[v.Args[0].ID].spill = v
113			}
114		}
115	}
116
117	// Compute liveness info for values needing a slot.
118	s.computeLive(spillLive)
119
120	// Build interference graph among values needing a slot.
121	s.buildInterferenceGraph()
122}
123
124func (s *stackAllocState) stackalloc() {
125	f := s.f
126
127	// Build map from values to their names, if any.
128	// A value may be associated with more than one name (e.g. after
129	// the assignment i=j). This step picks one name per value arbitrarily.
130	if n := f.NumValues(); cap(s.names) >= n {
131		s.names = s.names[:n]
132	} else {
133		s.names = make([]LocalSlot, n)
134	}
135	names := s.names
136	empty := LocalSlot{}
137	for _, name := range f.Names {
138		// Note: not "range f.NamedValues" above, because
139		// that would be nondeterministic.
140		for _, v := range f.NamedValues[*name] {
141			if v.Op == OpArgIntReg || v.Op == OpArgFloatReg {
142				aux := v.Aux.(*AuxNameOffset)
143				// Never let an arg be bound to a differently named thing.
144				if name.N != aux.Name || name.Off != aux.Offset {
145					if f.pass.debug > stackDebug {
146						fmt.Printf("stackalloc register arg %s skipping name %s\n", v, name)
147					}
148					continue
149				}
150			} else if name.N.Class == ir.PPARAM && v.Op != OpArg {
151				// PPARAM's only bind to OpArg
152				if f.pass.debug > stackDebug {
153					fmt.Printf("stackalloc PPARAM name %s skipping non-Arg %s\n", name, v)
154				}
155				continue
156			}
157
158			if names[v.ID] == empty {
159				if f.pass.debug > stackDebug {
160					fmt.Printf("stackalloc value %s to name %s\n", v, *name)
161				}
162				names[v.ID] = *name
163			}
164		}
165	}
166
167	// Allocate args to their assigned locations.
168	for _, v := range f.Entry.Values {
169		if !hasAnyArgOp(v) {
170			continue
171		}
172		if v.Aux == nil {
173			f.Fatalf("%s has nil Aux\n", v.LongString())
174		}
175		if v.Op == OpArg {
176			loc := LocalSlot{N: v.Aux.(*ir.Name), Type: v.Type, Off: v.AuxInt}
177			if f.pass.debug > stackDebug {
178				fmt.Printf("stackalloc OpArg %s to %s\n", v, loc)
179			}
180			f.setHome(v, loc)
181			continue
182		}
183		// You might think this below would be the right idea, but you would be wrong.
184		// It almost works; as of 105a6e9518 - 2021-04-23,
185		// GOSSAHASH=11011011001011111 == cmd/compile/internal/noder.(*noder).embedded
186		// is compiled incorrectly.  I believe the cause is one of those SSA-to-registers
187		// puzzles that the register allocator untangles; in the event that a register
188		// parameter does not end up bound to a name, "fixing" it is a bad idea.
189		//
190		//if f.DebugTest {
191		//	if v.Op == OpArgIntReg || v.Op == OpArgFloatReg {
192		//		aux := v.Aux.(*AuxNameOffset)
193		//		loc := LocalSlot{N: aux.Name, Type: v.Type, Off: aux.Offset}
194		//		if f.pass.debug > stackDebug {
195		//			fmt.Printf("stackalloc Op%s %s to %s\n", v.Op, v, loc)
196		//		}
197		//		names[v.ID] = loc
198		//		continue
199		//	}
200		//}
201
202	}
203
204	// For each type, we keep track of all the stack slots we
205	// have allocated for that type. This map is keyed by
206	// strings returned by types.LinkString. This guarantees
207	// type equality, but also lets us match the same type represented
208	// by two different types.Type structures. See issue 65783.
209	locations := map[string][]LocalSlot{}
210
211	// Each time we assign a stack slot to a value v, we remember
212	// the slot we used via an index into locations[v.Type].
213	slots := f.Cache.allocIntSlice(f.NumValues())
214	defer f.Cache.freeIntSlice(slots)
215	for i := range slots {
216		slots[i] = -1
217	}
218
219	// Pick a stack slot for each value needing one.
220	used := f.Cache.allocBoolSlice(f.NumValues())
221	defer f.Cache.freeBoolSlice(used)
222	for _, b := range f.Blocks {
223		for _, v := range b.Values {
224			if !s.values[v.ID].needSlot {
225				s.nNotNeed++
226				continue
227			}
228			if hasAnyArgOp(v) {
229				s.nArgSlot++
230				continue // already picked
231			}
232
233			// If this is a named value, try to use the name as
234			// the spill location.
235			var name LocalSlot
236			if v.Op == OpStoreReg {
237				name = names[v.Args[0].ID]
238			} else {
239				name = names[v.ID]
240			}
241			if name.N != nil && v.Type.Compare(name.Type) == types.CMPeq {
242				for _, id := range s.interfere[v.ID] {
243					h := f.getHome(id)
244					if h != nil && h.(LocalSlot).N == name.N && h.(LocalSlot).Off == name.Off {
245						// A variable can interfere with itself.
246						// It is rare, but it can happen.
247						s.nSelfInterfere++
248						goto noname
249					}
250				}
251				if f.pass.debug > stackDebug {
252					fmt.Printf("stackalloc %s to %s\n", v, name)
253				}
254				s.nNamedSlot++
255				f.setHome(v, name)
256				continue
257			}
258
259		noname:
260			// Set of stack slots we could reuse.
261			typeKey := v.Type.LinkString()
262			locs := locations[typeKey]
263			// Mark all positions in locs used by interfering values.
264			for i := 0; i < len(locs); i++ {
265				used[i] = false
266			}
267			for _, xid := range s.interfere[v.ID] {
268				slot := slots[xid]
269				if slot >= 0 {
270					used[slot] = true
271				}
272			}
273			// Find an unused stack slot.
274			var i int
275			for i = 0; i < len(locs); i++ {
276				if !used[i] {
277					s.nReuse++
278					break
279				}
280			}
281			// If there is no unused stack slot, allocate a new one.
282			if i == len(locs) {
283				s.nAuto++
284				locs = append(locs, LocalSlot{N: f.NewLocal(v.Pos, v.Type), Type: v.Type, Off: 0})
285				locations[typeKey] = locs
286			}
287			// Use the stack variable at that index for v.
288			loc := locs[i]
289			if f.pass.debug > stackDebug {
290				fmt.Printf("stackalloc %s to %s\n", v, loc)
291			}
292			f.setHome(v, loc)
293			slots[v.ID] = i
294		}
295	}
296}
297
298// computeLive computes a map from block ID to a list of
299// stack-slot-needing value IDs live at the end of that block.
300// TODO: this could be quadratic if lots of variables are live across lots of
301// basic blocks. Figure out a way to make this function (or, more precisely, the user
302// of this function) require only linear size & time.
303func (s *stackAllocState) computeLive(spillLive [][]ID) {
304	s.live = make([][]ID, s.f.NumBlocks())
305	var phis []*Value
306	live := s.f.newSparseSet(s.f.NumValues())
307	defer s.f.retSparseSet(live)
308	t := s.f.newSparseSet(s.f.NumValues())
309	defer s.f.retSparseSet(t)
310
311	// Instead of iterating over f.Blocks, iterate over their postordering.
312	// Liveness information flows backward, so starting at the end
313	// increases the probability that we will stabilize quickly.
314	po := s.f.postorder()
315	for {
316		changed := false
317		for _, b := range po {
318			// Start with known live values at the end of the block
319			live.clear()
320			live.addAll(s.live[b.ID])
321
322			// Propagate backwards to the start of the block
323			phis = phis[:0]
324			for i := len(b.Values) - 1; i >= 0; i-- {
325				v := b.Values[i]
326				live.remove(v.ID)
327				if v.Op == OpPhi {
328					// Save phi for later.
329					// Note: its args might need a stack slot even though
330					// the phi itself doesn't. So don't use needSlot.
331					if !v.Type.IsMemory() && !v.Type.IsVoid() {
332						phis = append(phis, v)
333					}
334					continue
335				}
336				for _, a := range v.Args {
337					if s.values[a.ID].needSlot {
338						live.add(a.ID)
339					}
340				}
341			}
342
343			// for each predecessor of b, expand its list of live-at-end values
344			// invariant: s contains the values live at the start of b (excluding phi inputs)
345			for i, e := range b.Preds {
346				p := e.b
347				t.clear()
348				t.addAll(s.live[p.ID])
349				t.addAll(live.contents())
350				t.addAll(spillLive[p.ID])
351				for _, v := range phis {
352					a := v.Args[i]
353					if s.values[a.ID].needSlot {
354						t.add(a.ID)
355					}
356					if spill := s.values[a.ID].spill; spill != nil {
357						//TODO: remove?  Subsumed by SpillUse?
358						t.add(spill.ID)
359					}
360				}
361				if t.size() == len(s.live[p.ID]) {
362					continue
363				}
364				// grow p's live set
365				s.live[p.ID] = append(s.live[p.ID][:0], t.contents()...)
366				changed = true
367			}
368		}
369
370		if !changed {
371			break
372		}
373	}
374	if s.f.pass.debug > stackDebug {
375		for _, b := range s.f.Blocks {
376			fmt.Printf("stacklive %s %v\n", b, s.live[b.ID])
377		}
378	}
379}
380
381func (f *Func) getHome(vid ID) Location {
382	if int(vid) >= len(f.RegAlloc) {
383		return nil
384	}
385	return f.RegAlloc[vid]
386}
387
388func (f *Func) setHome(v *Value, loc Location) {
389	for v.ID >= ID(len(f.RegAlloc)) {
390		f.RegAlloc = append(f.RegAlloc, nil)
391	}
392	f.RegAlloc[v.ID] = loc
393}
394
395func (s *stackAllocState) buildInterferenceGraph() {
396	f := s.f
397	if n := f.NumValues(); cap(s.interfere) >= n {
398		s.interfere = s.interfere[:n]
399	} else {
400		s.interfere = make([][]ID, n)
401	}
402	live := f.newSparseSet(f.NumValues())
403	defer f.retSparseSet(live)
404	for _, b := range f.Blocks {
405		// Propagate liveness backwards to the start of the block.
406		// Two values interfere if one is defined while the other is live.
407		live.clear()
408		live.addAll(s.live[b.ID])
409		for i := len(b.Values) - 1; i >= 0; i-- {
410			v := b.Values[i]
411			if s.values[v.ID].needSlot {
412				live.remove(v.ID)
413				for _, id := range live.contents() {
414					// Note: args can have different types and still interfere
415					// (with each other or with other values). See issue 23522.
416					if s.values[v.ID].typ.Compare(s.values[id].typ) == types.CMPeq || hasAnyArgOp(v) || s.values[id].isArg {
417						s.interfere[v.ID] = append(s.interfere[v.ID], id)
418						s.interfere[id] = append(s.interfere[id], v.ID)
419					}
420				}
421			}
422			for _, a := range v.Args {
423				if s.values[a.ID].needSlot {
424					live.add(a.ID)
425				}
426			}
427			if hasAnyArgOp(v) && s.values[v.ID].needSlot {
428				// OpArg is an input argument which is pre-spilled.
429				// We add back v.ID here because we want this value
430				// to appear live even before this point. Being live
431				// all the way to the start of the entry block prevents other
432				// values from being allocated to the same slot and clobbering
433				// the input value before we have a chance to load it.
434
435				// TODO(register args) this is apparently not wrong for register args -- is it necessary?
436				live.add(v.ID)
437			}
438		}
439	}
440	if f.pass.debug > stackDebug {
441		for vid, i := range s.interfere {
442			if len(i) > 0 {
443				fmt.Printf("v%d interferes with", vid)
444				for _, x := range i {
445					fmt.Printf(" v%d", x)
446				}
447				fmt.Println()
448			}
449		}
450	}
451}
452
453func hasAnyArgOp(v *Value) bool {
454	return v.Op == OpArg || v.Op == OpArgIntReg || v.Op == OpArgFloatReg
455}
456