1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import "cmd/compile/internal/base"
8
9// tighten moves Values closer to the Blocks in which they are used.
10// This can reduce the amount of register spilling required,
11// if it doesn't also create more live values.
12// A Value can be moved to any block that
13// dominates all blocks in which it is used.
14func tighten(f *Func) {
15	if base.Flag.N != 0 && len(f.Blocks) < 10000 {
16		// Skip the optimization in -N mode, except for huge functions.
17		// Too many values live across blocks can cause pathological
18		// behavior in the register allocator (see issue 52180).
19		return
20	}
21
22	canMove := f.Cache.allocBoolSlice(f.NumValues())
23	defer f.Cache.freeBoolSlice(canMove)
24
25	// Compute the memory states of each block.
26	startMem := f.Cache.allocValueSlice(f.NumBlocks())
27	defer f.Cache.freeValueSlice(startMem)
28	endMem := f.Cache.allocValueSlice(f.NumBlocks())
29	defer f.Cache.freeValueSlice(endMem)
30	memState(f, startMem, endMem)
31
32	for _, b := range f.Blocks {
33		for _, v := range b.Values {
34			if v.Op.isLoweredGetClosurePtr() {
35				// Must stay in the entry block.
36				continue
37			}
38			switch v.Op {
39			case OpPhi, OpArg, OpArgIntReg, OpArgFloatReg, OpSelect0, OpSelect1, OpSelectN:
40				// Phis need to stay in their block.
41				// Arg must stay in the entry block.
42				// Tuple selectors must stay with the tuple generator.
43				// SelectN is typically, ultimately, a register.
44				continue
45			}
46			// Count arguments which will need a register.
47			narg := 0
48			for _, a := range v.Args {
49				// SP and SB are special registers and have no effect on
50				// the allocation of general-purpose registers.
51				if a.needRegister() && a.Op != OpSB && a.Op != OpSP {
52					narg++
53				}
54			}
55			if narg >= 2 && !v.Type.IsFlags() {
56				// Don't move values with more than one input, as that may
57				// increase register pressure.
58				// We make an exception for flags, as we want flag generators
59				// moved next to uses (because we only have 1 flag register).
60				continue
61			}
62			canMove[v.ID] = true
63		}
64	}
65
66	// Build data structure for fast least-common-ancestor queries.
67	lca := makeLCArange(f)
68
69	// For each moveable value, record the block that dominates all uses found so far.
70	target := f.Cache.allocBlockSlice(f.NumValues())
71	defer f.Cache.freeBlockSlice(target)
72
73	// Grab loop information.
74	// We use this to make sure we don't tighten a value into a (deeper) loop.
75	idom := f.Idom()
76	loops := f.loopnest()
77	loops.calculateDepths()
78
79	changed := true
80	for changed {
81		changed = false
82
83		// Reset target
84		for i := range target {
85			target[i] = nil
86		}
87
88		// Compute target locations (for moveable values only).
89		// target location = the least common ancestor of all uses in the dominator tree.
90		for _, b := range f.Blocks {
91			for _, v := range b.Values {
92				for i, a := range v.Args {
93					if !canMove[a.ID] {
94						continue
95					}
96					use := b
97					if v.Op == OpPhi {
98						use = b.Preds[i].b
99					}
100					if target[a.ID] == nil {
101						target[a.ID] = use
102					} else {
103						target[a.ID] = lca.find(target[a.ID], use)
104					}
105				}
106			}
107			for _, c := range b.ControlValues() {
108				if !canMove[c.ID] {
109					continue
110				}
111				if target[c.ID] == nil {
112					target[c.ID] = b
113				} else {
114					target[c.ID] = lca.find(target[c.ID], b)
115				}
116			}
117		}
118
119		// If the target location is inside a loop,
120		// move the target location up to just before the loop head.
121		for _, b := range f.Blocks {
122			origloop := loops.b2l[b.ID]
123			for _, v := range b.Values {
124				t := target[v.ID]
125				if t == nil {
126					continue
127				}
128				targetloop := loops.b2l[t.ID]
129				for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) {
130					t = idom[targetloop.header.ID]
131					target[v.ID] = t
132					targetloop = loops.b2l[t.ID]
133				}
134			}
135		}
136
137		// Move values to target locations.
138		for _, b := range f.Blocks {
139			for i := 0; i < len(b.Values); i++ {
140				v := b.Values[i]
141				t := target[v.ID]
142				if t == nil || t == b {
143					// v is not moveable, or is already in correct place.
144					continue
145				}
146				if mem := v.MemoryArg(); mem != nil {
147					if startMem[t.ID] != mem {
148						// We can't move a value with a memory arg unless the target block
149						// has that memory arg as its starting memory.
150						continue
151					}
152				}
153				if f.pass.debug > 0 {
154					b.Func.Warnl(v.Pos, "%v is moved", v.Op)
155				}
156				// Move v to the block which dominates its uses.
157				t.Values = append(t.Values, v)
158				v.Block = t
159				last := len(b.Values) - 1
160				b.Values[i] = b.Values[last]
161				b.Values[last] = nil
162				b.Values = b.Values[:last]
163				changed = true
164				i--
165			}
166		}
167	}
168}
169
170// phiTighten moves constants closer to phi users.
171// This pass avoids having lots of constants live for lots of the program.
172// See issue 16407.
173func phiTighten(f *Func) {
174	for _, b := range f.Blocks {
175		for _, v := range b.Values {
176			if v.Op != OpPhi {
177				continue
178			}
179			for i, a := range v.Args {
180				if !a.rematerializeable() {
181					continue // not a constant we can move around
182				}
183				if a.Block == b.Preds[i].b {
184					continue // already in the right place
185				}
186				// Make a copy of a, put in predecessor block.
187				v.SetArg(i, a.copyInto(b.Preds[i].b))
188			}
189		}
190	}
191}
192
193// memState computes the memory state at the beginning and end of each block of
194// the function. The memory state is represented by a value of mem type.
195// The returned result is stored in startMem and endMem, and endMem is nil for
196// blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not
197// suitable for infinite loop blocks that do not contain any mem operations.
198// For example:
199// b1:
200//
201//	(some values)
202//
203// plain -> b2
204// b2: <- b1 b2
205// Plain -> b2
206//
207// Algorithm introduction:
208//  1. The start memory state of a block is InitMem, a Phi node of type mem or
209//     an incoming memory value.
210//  2. The start memory state of a block is consistent with the end memory state
211//     of its parent nodes. If the start memory state of a block is a Phi value,
212//     then the end memory state of its parent nodes is consistent with the
213//     corresponding argument value of the Phi node.
214//  3. The algorithm first obtains the memory state of some blocks in the tree
215//     in the first step. Then floods the known memory state to other nodes in
216//     the second step.
217func memState(f *Func, startMem, endMem []*Value) {
218	// This slice contains the set of blocks that have had their startMem set but this
219	// startMem value has not yet been propagated to the endMem of its predecessors
220	changed := make([]*Block, 0)
221	// First step, init the memory state of some blocks.
222	for _, b := range f.Blocks {
223		for _, v := range b.Values {
224			var mem *Value
225			if v.Op == OpPhi {
226				if v.Type.IsMemory() {
227					mem = v
228				}
229			} else if v.Op == OpInitMem {
230				mem = v // This is actually not needed.
231			} else if a := v.MemoryArg(); a != nil && a.Block != b {
232				// The only incoming memory value doesn't belong to this block.
233				mem = a
234			}
235			if mem != nil {
236				if old := startMem[b.ID]; old != nil {
237					if old == mem {
238						continue
239					}
240					f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem)
241				}
242				startMem[b.ID] = mem
243				changed = append(changed, b)
244			}
245		}
246	}
247
248	// Second step, floods the known memory state of some blocks to others.
249	for len(changed) != 0 {
250		top := changed[0]
251		changed = changed[1:]
252		mem := startMem[top.ID]
253		for i, p := range top.Preds {
254			pb := p.b
255			if endMem[pb.ID] != nil {
256				continue
257			}
258			if mem.Op == OpPhi && mem.Block == top {
259				endMem[pb.ID] = mem.Args[i]
260			} else {
261				endMem[pb.ID] = mem
262			}
263			if startMem[pb.ID] == nil {
264				startMem[pb.ID] = endMem[pb.ID]
265				changed = append(changed, pb)
266			}
267		}
268	}
269}
270