1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import "cmd/internal/src"
8
9// branchelim tries to eliminate branches by
10// generating CondSelect instructions.
11//
12// Search for basic blocks that look like
13//
14//	bb0            bb0
15//	 | \          /   \
16//	 | bb1  or  bb1   bb2    <- trivial if/else blocks
17//	 | /          \   /
18//	bb2            bb3
19//
20// where the intermediate blocks are mostly empty (with no side-effects);
21// rewrite Phis in the postdominator as CondSelects.
22func branchelim(f *Func) {
23	// FIXME: add support for lowering CondSelects on more architectures
24	switch f.Config.arch {
25	case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64":
26		// implemented
27	default:
28		return
29	}
30
31	// Find all the values used in computing the address of any load.
32	// Typically these values have operations like AddPtr, Lsh64x64, etc.
33	loadAddr := f.newSparseSet(f.NumValues())
34	defer f.retSparseSet(loadAddr)
35	for _, b := range f.Blocks {
36		for _, v := range b.Values {
37			switch v.Op {
38			case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64:
39				loadAddr.add(v.Args[0].ID)
40			case OpMove:
41				loadAddr.add(v.Args[1].ID)
42			}
43		}
44	}
45	po := f.postorder()
46	for {
47		n := loadAddr.size()
48		for _, b := range po {
49			for i := len(b.Values) - 1; i >= 0; i-- {
50				v := b.Values[i]
51				if !loadAddr.contains(v.ID) {
52					continue
53				}
54				for _, a := range v.Args {
55					if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
56						loadAddr.add(a.ID)
57					}
58				}
59			}
60		}
61		if loadAddr.size() == n {
62			break
63		}
64	}
65
66	change := true
67	for change {
68		change = false
69		for _, b := range f.Blocks {
70			change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
71		}
72	}
73}
74
75func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
76	if loadAddr.contains(v.ID) {
77		// The result of the soon-to-be conditional move is used to compute a load address.
78		// We want to avoid generating a conditional move in this case
79		// because the load address would now be data-dependent on the condition.
80		// Previously it would only be control-dependent on the condition, which is faster
81		// if the branch predicts well (or possibly even if it doesn't, if the load will
82		// be an expensive cache miss).
83		// See issue #26306.
84		return false
85	}
86	if arch == "loong64" {
87		// We should not generate conditional moves if neither of the arguments is constant zero,
88		// because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the
89		// register pressure.
90		if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) &&
91			!(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) {
92			return false
93		}
94	}
95	// For now, stick to simple scalars that fit in registers
96	switch {
97	case v.Type.Size() > v.Block.Func.Config.RegSize:
98		return false
99	case v.Type.IsPtrShaped():
100		return true
101	case v.Type.IsInteger():
102		if arch == "amd64" && v.Type.Size() < 2 {
103			// amd64 doesn't support CMOV with byte registers
104			return false
105		}
106		return true
107	default:
108		return false
109	}
110}
111
112// elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
113// loadAddr is a set of values which are used to compute the address of a load.
114// Those values are exempt from CMOV generation.
115func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
116	// See if dom is an If with one arm that
117	// is trivial and succeeded by the other
118	// successor of dom.
119	if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
120		return false
121	}
122	var simple, post *Block
123	for i := range dom.Succs {
124		bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
125		if isLeafPlain(bb) && bb.Succs[0].Block() == other {
126			simple = bb
127			post = other
128			break
129		}
130	}
131	if simple == nil || len(post.Preds) != 2 || post == dom {
132		return false
133	}
134
135	// We've found our diamond CFG of blocks.
136	// Now decide if fusing 'simple' into dom+post
137	// looks profitable.
138
139	// Check that there are Phis, and that all of them
140	// can be safely rewritten to CondSelect.
141	hasphis := false
142	for _, v := range post.Values {
143		if v.Op == OpPhi {
144			hasphis = true
145			if !canCondSelect(v, f.Config.arch, loadAddr) {
146				return false
147			}
148		}
149	}
150	if !hasphis {
151		return false
152	}
153
154	// Pick some upper bound for the number of instructions
155	// we'd be willing to execute just to generate a dead
156	// argument to CondSelect. In the worst case, this is
157	// the number of useless instructions executed.
158	const maxfuseinsts = 2
159
160	if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) {
161		return false
162	}
163
164	// Replace Phi instructions in b with CondSelect instructions
165	swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
166	for _, v := range post.Values {
167		if v.Op != OpPhi {
168			continue
169		}
170		v.Op = OpCondSelect
171		if swap {
172			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
173		}
174		v.AddArg(dom.Controls[0])
175	}
176
177	// Put all of the instructions into 'dom'
178	// and update the CFG appropriately.
179	dom.Kind = post.Kind
180	dom.CopyControls(post)
181	dom.Aux = post.Aux
182	dom.Succs = append(dom.Succs[:0], post.Succs...)
183	for i := range dom.Succs {
184		e := dom.Succs[i]
185		e.b.Preds[e.i].b = dom
186	}
187
188	// Try really hard to preserve statement marks attached to blocks.
189	simplePos := simple.Pos
190	postPos := post.Pos
191	simpleStmt := simplePos.IsStmt() == src.PosIsStmt
192	postStmt := postPos.IsStmt() == src.PosIsStmt
193
194	for _, v := range simple.Values {
195		v.Block = dom
196	}
197	for _, v := range post.Values {
198		v.Block = dom
199	}
200
201	// findBlockPos determines if b contains a stmt-marked value
202	// that has the same line number as the Pos for b itself.
203	// (i.e. is the position on b actually redundant?)
204	findBlockPos := func(b *Block) bool {
205		pos := b.Pos
206		for _, v := range b.Values {
207			// See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos)
208			if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt {
209				return true
210			}
211		}
212		return false
213	}
214	if simpleStmt {
215		simpleStmt = !findBlockPos(simple)
216		if !simpleStmt && simplePos.SameFileAndLine(postPos) {
217			postStmt = false
218		}
219
220	}
221	if postStmt {
222		postStmt = !findBlockPos(post)
223	}
224
225	// If simpleStmt and/or postStmt are still true, then try harder
226	// to find the corresponding statement marks new homes.
227
228	// setBlockPos determines if b contains a can-be-statement value
229	// that has the same line number as the Pos for b itself, and
230	// puts a statement mark on it, and returns whether it succeeded
231	// in this operation.
232	setBlockPos := func(b *Block) bool {
233		pos := b.Pos
234		for _, v := range b.Values {
235			if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) {
236				v.Pos = v.Pos.WithIsStmt()
237				return true
238			}
239		}
240		return false
241	}
242	// If necessary and possible, add a mark to a value in simple
243	if simpleStmt {
244		if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) {
245			postStmt = false
246		}
247	}
248	// If necessary and possible, add a mark to a value in post
249	if postStmt {
250		postStmt = !setBlockPos(post)
251	}
252
253	// Before giving up (this was added because it helps), try the end of "dom", and if that is not available,
254	// try the values in the successor block if it is uncomplicated.
255	if postStmt {
256		if dom.Pos.IsStmt() != src.PosIsStmt {
257			dom.Pos = postPos
258		} else {
259			// Try the successor block
260			if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 {
261				succ := dom.Succs[0].Block()
262				for _, v := range succ.Values {
263					if isPoorStatementOp(v.Op) {
264						continue
265					}
266					if postPos.SameFileAndLine(v.Pos) {
267						v.Pos = v.Pos.WithIsStmt()
268					}
269					postStmt = false
270					break
271				}
272				// If postStmt still true, tag the block itself if possible
273				if postStmt && succ.Pos.IsStmt() != src.PosIsStmt {
274					succ.Pos = postPos
275				}
276			}
277		}
278	}
279
280	dom.Values = append(dom.Values, simple.Values...)
281	dom.Values = append(dom.Values, post.Values...)
282
283	// Trash 'post' and 'simple'
284	clobberBlock(post)
285	clobberBlock(simple)
286
287	f.invalidateCFG()
288	return true
289}
290
291// is this a BlockPlain with one predecessor?
292func isLeafPlain(b *Block) bool {
293	return b.Kind == BlockPlain && len(b.Preds) == 1
294}
295
296func clobberBlock(b *Block) {
297	b.Values = nil
298	b.Preds = nil
299	b.Succs = nil
300	b.Aux = nil
301	b.ResetControls()
302	b.Likely = BranchUnknown
303	b.Kind = BlockInvalid
304}
305
306// elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
307// loadAddr is a set of values which are used to compute the address of a load.
308// Those values are exempt from CMOV generation.
309func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
310	// See if 'b' ends in an if/else: it should
311	// have two successors, both of which are BlockPlain
312	// and succeeded by the same block.
313	if b.Kind != BlockIf || b.Likely != BranchUnknown {
314		return false
315	}
316	yes, no := b.Succs[0].Block(), b.Succs[1].Block()
317	if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) {
318		return false
319	}
320	if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) {
321		return false
322	}
323	if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
324		return false
325	}
326	// block that postdominates the if/else
327	post := b.Succs[0].Block().Succs[0].Block()
328	if len(post.Preds) != 2 || post == b {
329		return false
330	}
331	hasphis := false
332	for _, v := range post.Values {
333		if v.Op == OpPhi {
334			hasphis = true
335			if !canCondSelect(v, f.Config.arch, loadAddr) {
336				return false
337			}
338		}
339	}
340	if !hasphis {
341		return false
342	}
343
344	// Don't generate CondSelects if branch is cheaper.
345	if !shouldElimIfElse(no, yes, post, f.Config.arch) {
346		return false
347	}
348
349	// now we're committed: rewrite each Phi as a CondSelect
350	swap := post.Preds[0].Block() != b.Succs[0].Block()
351	for _, v := range post.Values {
352		if v.Op != OpPhi {
353			continue
354		}
355		v.Op = OpCondSelect
356		if swap {
357			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
358		}
359		v.AddArg(b.Controls[0])
360	}
361
362	// Move the contents of all of these
363	// blocks into 'b' and update CFG edges accordingly
364	b.Kind = post.Kind
365	b.CopyControls(post)
366	b.Aux = post.Aux
367	b.Succs = append(b.Succs[:0], post.Succs...)
368	for i := range b.Succs {
369		e := b.Succs[i]
370		e.b.Preds[e.i].b = b
371	}
372	for i := range post.Values {
373		post.Values[i].Block = b
374	}
375	for i := range yes.Values {
376		yes.Values[i].Block = b
377	}
378	for i := range no.Values {
379		no.Values[i].Block = b
380	}
381	b.Values = append(b.Values, yes.Values...)
382	b.Values = append(b.Values, no.Values...)
383	b.Values = append(b.Values, post.Values...)
384
385	// trash post, yes, and no
386	clobberBlock(yes)
387	clobberBlock(no)
388	clobberBlock(post)
389
390	f.invalidateCFG()
391	return true
392}
393
394// shouldElimIfElse reports whether estimated cost of eliminating branch
395// is lower than threshold.
396func shouldElimIfElse(no, yes, post *Block, arch string) bool {
397	switch arch {
398	default:
399		return true
400	case "amd64":
401		const maxcost = 2
402		phi := 0
403		other := 0
404		for _, v := range post.Values {
405			if v.Op == OpPhi {
406				// Each phi results in CondSelect, which lowers into CMOV,
407				// CMOV has latency >1 on most CPUs.
408				phi++
409			}
410			for _, x := range v.Args {
411				if x.Block == no || x.Block == yes {
412					other++
413				}
414			}
415		}
416		cost := phi * 1
417		if phi > 1 {
418			// If we have more than 1 phi and some values in post have args
419			// in yes or no blocks, we may have to recalculate condition, because
420			// those args may clobber flags. For now assume that all operations clobber flags.
421			cost += other * 1
422		}
423		return cost < maxcost
424	}
425}
426
427// canSpeculativelyExecute reports whether every value in the block can
428// be evaluated without causing any observable side effects (memory
429// accesses, panics and so on) except for execution time changes. It
430// also ensures that the block does not contain any phis which we can't
431// speculatively execute.
432// Warning: this function cannot currently detect values that represent
433// instructions the execution of which need to be guarded with CPU
434// hardware feature checks. See issue #34950.
435func canSpeculativelyExecute(b *Block) bool {
436	// don't fuse memory ops, Phi ops, divides (can panic),
437	// or anything else with side-effects
438	for _, v := range b.Values {
439		if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() ||
440			v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
441			return false
442		}
443	}
444	return true
445}
446
447func isDivMod(op Op) bool {
448	switch op {
449	case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
450		OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
451		OpDiv32F, OpDiv64F,
452		OpMod8, OpMod8u, OpMod16, OpMod16u,
453		OpMod32, OpMod32u, OpMod64, OpMod64u:
454		return true
455	default:
456		return false
457	}
458}
459
460func isPtrArithmetic(op Op) bool {
461	// Pointer arithmetic can't be speculatively executed because the result
462	// may be an invalid pointer (if, for example, the condition is that the
463	// base pointer is not nil). See issue 56990.
464	switch op {
465	case OpOffPtr, OpAddPtr, OpSubPtr:
466		return true
467	default:
468		return false
469	}
470}
471