1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import (
8	"cmd/compile/internal/types"
9	"fmt"
10)
11
12// an edgeMem records a backedge, together with the memory
13// phi functions at the target of the backedge that must
14// be updated when a rescheduling check replaces the backedge.
15type edgeMem struct {
16	e Edge
17	m *Value // phi for memory at dest of e
18}
19
20// a rewriteTarget is a value-argindex pair indicating
21// where a rewrite is applied.  Note that this is for values,
22// not for block controls, because block controls are not targets
23// for the rewrites performed in inserting rescheduling checks.
24type rewriteTarget struct {
25	v *Value
26	i int
27}
28
29type rewrite struct {
30	before, after *Value          // before is the expected value before rewrite, after is the new value installed.
31	rewrites      []rewriteTarget // all the targets for this rewrite.
32}
33
34func (r *rewrite) String() string {
35	s := "\n\tbefore=" + r.before.String() + ", after=" + r.after.String()
36	for _, rw := range r.rewrites {
37		s += ", (i=" + fmt.Sprint(rw.i) + ", v=" + rw.v.LongString() + ")"
38	}
39	s += "\n"
40	return s
41}
42
43// insertLoopReschedChecks inserts rescheduling checks on loop backedges.
44func insertLoopReschedChecks(f *Func) {
45	// TODO: when split information is recorded in export data, insert checks only on backedges that can be reached on a split-call-free path.
46
47	// Loop reschedule checks compare the stack pointer with
48	// the per-g stack bound.  If the pointer appears invalid,
49	// that means a reschedule check is needed.
50	//
51	// Steps:
52	// 1. locate backedges.
53	// 2. Record memory definitions at block end so that
54	//    the SSA graph for mem can be properly modified.
55	// 3. Ensure that phi functions that will-be-needed for mem
56	//    are present in the graph, initially with trivial inputs.
57	// 4. Record all to-be-modified uses of mem;
58	//    apply modifications (split into two steps to simplify and
59	//    avoided nagging order-dependencies).
60	// 5. Rewrite backedges to include reschedule check,
61	//    and modify destination phi function appropriately with new
62	//    definitions for mem.
63
64	if f.NoSplit { // nosplit functions don't reschedule.
65		return
66	}
67
68	backedges := backedges(f)
69	if len(backedges) == 0 { // no backedges means no rescheduling checks.
70		return
71	}
72
73	lastMems := findLastMems(f)
74
75	idom := f.Idom()
76	po := f.postorder()
77	// The ordering in the dominator tree matters; it's important that
78	// the walk of the dominator tree also be a preorder (i.e., a node is
79	// visited only after all its non-backedge predecessors have been visited).
80	sdom := newSparseOrderedTree(f, idom, po)
81
82	if f.pass.debug > 1 {
83		fmt.Printf("before %s = %s\n", f.Name, sdom.treestructure(f.Entry))
84	}
85
86	tofixBackedges := []edgeMem{}
87
88	for _, e := range backedges { // TODO: could filter here by calls in loops, if declared and inferred nosplit are recorded in export data.
89		tofixBackedges = append(tofixBackedges, edgeMem{e, nil})
90	}
91
92	// It's possible that there is no memory state (no global/pointer loads/stores or calls)
93	if lastMems[f.Entry.ID] == nil {
94		lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem)
95	}
96
97	memDefsAtBlockEnds := f.Cache.allocValueSlice(f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block.
98	defer f.Cache.freeValueSlice(memDefsAtBlockEnds)
99
100	// Propagate last mem definitions forward through successor blocks.
101	for i := len(po) - 1; i >= 0; i-- {
102		b := po[i]
103		mem := lastMems[b.ID]
104		for j := 0; mem == nil; j++ { // if there's no def, then there's no phi, so the visible mem is identical in all predecessors.
105			// loop because there might be backedges that haven't been visited yet.
106			mem = memDefsAtBlockEnds[b.Preds[j].b.ID]
107		}
108		memDefsAtBlockEnds[b.ID] = mem
109		if f.pass.debug > 2 {
110			fmt.Printf("memDefsAtBlockEnds[%s] = %s\n", b, mem)
111		}
112	}
113
114	// Maps from block to newly-inserted phi function in block.
115	newmemphis := make(map[*Block]rewrite)
116
117	// Insert phi functions as necessary for future changes to flow graph.
118	for i, emc := range tofixBackedges {
119		e := emc.e
120		h := e.b
121
122		// find the phi function for the memory input at "h", if there is one.
123		var headerMemPhi *Value // look for header mem phi
124
125		for _, v := range h.Values {
126			if v.Op == OpPhi && v.Type.IsMemory() {
127				headerMemPhi = v
128			}
129		}
130
131		if headerMemPhi == nil {
132			// if the header is nil, make a trivial phi from the dominator
133			mem0 := memDefsAtBlockEnds[idom[h.ID].ID]
134			headerMemPhi = newPhiFor(h, mem0)
135			newmemphis[h] = rewrite{before: mem0, after: headerMemPhi}
136			addDFphis(mem0, h, h, f, memDefsAtBlockEnds, newmemphis, sdom)
137
138		}
139		tofixBackedges[i].m = headerMemPhi
140
141	}
142	if f.pass.debug > 0 {
143		for b, r := range newmemphis {
144			fmt.Printf("before b=%s, rewrite=%s\n", b, r.String())
145		}
146	}
147
148	// dfPhiTargets notes inputs to phis in dominance frontiers that should not
149	// be rewritten as part of the dominated children of some outer rewrite.
150	dfPhiTargets := make(map[rewriteTarget]bool)
151
152	rewriteNewPhis(f.Entry, f.Entry, f, memDefsAtBlockEnds, newmemphis, dfPhiTargets, sdom)
153
154	if f.pass.debug > 0 {
155		for b, r := range newmemphis {
156			fmt.Printf("after b=%s, rewrite=%s\n", b, r.String())
157		}
158	}
159
160	// Apply collected rewrites.
161	for _, r := range newmemphis {
162		for _, rw := range r.rewrites {
163			rw.v.SetArg(rw.i, r.after)
164		}
165	}
166
167	// Rewrite backedges to include reschedule checks.
168	for _, emc := range tofixBackedges {
169		e := emc.e
170		headerMemPhi := emc.m
171		h := e.b
172		i := e.i
173		p := h.Preds[i]
174		bb := p.b
175		mem0 := headerMemPhi.Args[i]
176		// bb e->p h,
177		// Because we're going to insert a rare-call, make sure the
178		// looping edge still looks likely.
179		likely := BranchLikely
180		if p.i != 0 {
181			likely = BranchUnlikely
182		}
183		if bb.Kind != BlockPlain { // backedges can be unconditional. e.g., if x { something; continue }
184			bb.Likely = likely
185		}
186
187		// rewrite edge to include reschedule check
188		// existing edges:
189		//
190		// bb.Succs[p.i] == Edge{h, i}
191		// h.Preds[i] == p == Edge{bb,p.i}
192		//
193		// new block(s):
194		// test:
195		//    if sp < g.limit { goto sched }
196		//    goto join
197		// sched:
198		//    mem1 := call resched (mem0)
199		//    goto join
200		// join:
201		//    mem2 := phi(mem0, mem1)
202		//    goto h
203		//
204		// and correct arg i of headerMemPhi and headerCtrPhi
205		//
206		// EXCEPT: join block containing only phi functions is bad
207		// for the register allocator.  Therefore, there is no
208		// join, and branches targeting join must instead target
209		// the header, and the other phi functions within header are
210		// adjusted for the additional input.
211
212		test := f.NewBlock(BlockIf)
213		sched := f.NewBlock(BlockPlain)
214
215		test.Pos = bb.Pos
216		sched.Pos = bb.Pos
217
218		// if sp < g.limit { goto sched }
219		// goto header
220
221		cfgtypes := &f.Config.Types
222		pt := cfgtypes.Uintptr
223		g := test.NewValue1(bb.Pos, OpGetG, pt, mem0)
224		sp := test.NewValue0(bb.Pos, OpSP, pt)
225		cmpOp := OpLess64U
226		if pt.Size() == 4 {
227			cmpOp = OpLess32U
228		}
229		limaddr := test.NewValue1I(bb.Pos, OpOffPtr, pt, 2*pt.Size(), g)
230		lim := test.NewValue2(bb.Pos, OpLoad, pt, limaddr, mem0)
231		cmp := test.NewValue2(bb.Pos, cmpOp, cfgtypes.Bool, sp, lim)
232		test.SetControl(cmp)
233
234		// if true, goto sched
235		test.AddEdgeTo(sched)
236
237		// if false, rewrite edge to header.
238		// do NOT remove+add, because that will perturb all the other phi functions
239		// as well as messing up other edges to the header.
240		test.Succs = append(test.Succs, Edge{h, i})
241		h.Preds[i] = Edge{test, 1}
242		headerMemPhi.SetArg(i, mem0)
243
244		test.Likely = BranchUnlikely
245
246		// sched:
247		//    mem1 := call resched (mem0)
248		//    goto header
249		resched := f.fe.Syslook("goschedguarded")
250		call := sched.NewValue1A(bb.Pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(resched, bb.Func.ABIDefault.ABIAnalyzeTypes(nil, nil)), mem0)
251		mem1 := sched.NewValue1I(bb.Pos, OpSelectN, types.TypeMem, 0, call)
252		sched.AddEdgeTo(h)
253		headerMemPhi.AddArg(mem1)
254
255		bb.Succs[p.i] = Edge{test, 0}
256		test.Preds = append(test.Preds, Edge{bb, p.i})
257
258		// Must correct all the other phi functions in the header for new incoming edge.
259		// Except for mem phis, it will be the same value seen on the original
260		// backedge at index i.
261		for _, v := range h.Values {
262			if v.Op == OpPhi && v != headerMemPhi {
263				v.AddArg(v.Args[i])
264			}
265		}
266	}
267
268	f.invalidateCFG()
269
270	if f.pass.debug > 1 {
271		sdom = newSparseTree(f, f.Idom())
272		fmt.Printf("after %s = %s\n", f.Name, sdom.treestructure(f.Entry))
273	}
274}
275
276// newPhiFor inserts a new Phi function into b,
277// with all inputs set to v.
278func newPhiFor(b *Block, v *Value) *Value {
279	phiV := b.NewValue0(b.Pos, OpPhi, v.Type)
280
281	for range b.Preds {
282		phiV.AddArg(v)
283	}
284	return phiV
285}
286
287// rewriteNewPhis updates newphis[h] to record all places where the new phi function inserted
288// in block h will replace a previous definition.  Block b is the block currently being processed;
289// if b has its own phi definition then it takes the place of h.
290// defsForUses provides information about other definitions of the variable that are present
291// (and if nil, indicates that the variable is no longer live)
292// sdom must yield a preorder of the flow graph if recursively walked, root-to-children.
293// The result of newSparseOrderedTree with order supplied by a dfs-postorder satisfies this
294// requirement.
295func rewriteNewPhis(h, b *Block, f *Func, defsForUses []*Value, newphis map[*Block]rewrite, dfPhiTargets map[rewriteTarget]bool, sdom SparseTree) {
296	// If b is a block with a new phi, then a new rewrite applies below it in the dominator tree.
297	if _, ok := newphis[b]; ok {
298		h = b
299	}
300	change := newphis[h]
301	x := change.before
302	y := change.after
303
304	// Apply rewrites to this block
305	if x != nil { // don't waste time on the common case of no definition.
306		p := &change.rewrites
307		for _, v := range b.Values {
308			if v == y { // don't rewrite self -- phi inputs are handled below.
309				continue
310			}
311			for i, w := range v.Args {
312				if w != x {
313					continue
314				}
315				tgt := rewriteTarget{v, i}
316
317				// It's possible dominated control flow will rewrite this instead.
318				// Visiting in preorder (a property of how sdom was constructed)
319				// ensures that these are seen in the proper order.
320				if dfPhiTargets[tgt] {
321					continue
322				}
323				*p = append(*p, tgt)
324				if f.pass.debug > 1 {
325					fmt.Printf("added block target for h=%v, b=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
326						h, b, x, y, v, i)
327				}
328			}
329		}
330
331		// Rewrite appropriate inputs of phis reached in successors
332		// in dominance frontier, self, and dominated.
333		// If the variable def reaching uses in b is itself defined in b, then the new phi function
334		// does not reach the successors of b.  (This assumes a bit about the structure of the
335		// phi use-def graph, but it's true for memory.)
336		if dfu := defsForUses[b.ID]; dfu != nil && dfu.Block != b {
337			for _, e := range b.Succs {
338				s := e.b
339
340				for _, v := range s.Values {
341					if v.Op == OpPhi && v.Args[e.i] == x {
342						tgt := rewriteTarget{v, e.i}
343						*p = append(*p, tgt)
344						dfPhiTargets[tgt] = true
345						if f.pass.debug > 1 {
346							fmt.Printf("added phi target for h=%v, b=%v, s=%v, x=%v, y=%v, tgt.v=%s, tgt.i=%d\n",
347								h, b, s, x, y, v.LongString(), e.i)
348						}
349						break
350					}
351				}
352			}
353		}
354		newphis[h] = change
355	}
356
357	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
358		rewriteNewPhis(h, c, f, defsForUses, newphis, dfPhiTargets, sdom) // TODO: convert to explicit stack from recursion.
359	}
360}
361
362// addDFphis creates new trivial phis that are necessary to correctly reflect (within SSA)
363// a new definition for variable "x" inserted at h (usually but not necessarily a phi).
364// These new phis can only occur at the dominance frontier of h; block s is in the dominance
365// frontier of h if h does not strictly dominate s and if s is a successor of a block b where
366// either b = h or h strictly dominates b.
367// These newly created phis are themselves new definitions that may require addition of their
368// own trivial phi functions in their own dominance frontier, and this is handled recursively.
369func addDFphis(x *Value, h, b *Block, f *Func, defForUses []*Value, newphis map[*Block]rewrite, sdom SparseTree) {
370	oldv := defForUses[b.ID]
371	if oldv != x { // either a new definition replacing x, or nil if it is proven that there are no uses reachable from b
372		return
373	}
374	idom := f.Idom()
375outer:
376	for _, e := range b.Succs {
377		s := e.b
378		// check phi functions in the dominance frontier
379		if sdom.isAncestor(h, s) {
380			continue // h dominates s, successor of b, therefore s is not in the frontier.
381		}
382		if _, ok := newphis[s]; ok {
383			continue // successor s of b already has a new phi function, so there is no need to add another.
384		}
385		if x != nil {
386			for _, v := range s.Values {
387				if v.Op == OpPhi && v.Args[e.i] == x {
388					continue outer // successor s of b has an old phi function, so there is no need to add another.
389				}
390			}
391		}
392
393		old := defForUses[idom[s.ID].ID] // new phi function is correct-but-redundant, combining value "old" on all inputs.
394		headerPhi := newPhiFor(s, old)
395		// the new phi will replace "old" in block s and all blocks dominated by s.
396		newphis[s] = rewrite{before: old, after: headerPhi} // record new phi, to have inputs labeled "old" rewritten to "headerPhi"
397		addDFphis(old, s, s, f, defForUses, newphis, sdom)  // the new definition may also create new phi functions.
398	}
399	for c := sdom[b.ID].child; c != nil; c = sdom[c.ID].sibling {
400		addDFphis(x, h, c, f, defForUses, newphis, sdom) // TODO: convert to explicit stack from recursion.
401	}
402}
403
404// findLastMems maps block ids to last memory-output op in a block, if any.
405func findLastMems(f *Func) []*Value {
406
407	var stores []*Value
408	lastMems := f.Cache.allocValueSlice(f.NumBlocks())
409	defer f.Cache.freeValueSlice(lastMems)
410	storeUse := f.newSparseSet(f.NumValues())
411	defer f.retSparseSet(storeUse)
412	for _, b := range f.Blocks {
413		// Find all the stores in this block. Categorize their uses:
414		//  storeUse contains stores which are used by a subsequent store.
415		storeUse.clear()
416		stores = stores[:0]
417		var memPhi *Value
418		for _, v := range b.Values {
419			if v.Op == OpPhi {
420				if v.Type.IsMemory() {
421					memPhi = v
422				}
423				continue
424			}
425			if v.Type.IsMemory() {
426				stores = append(stores, v)
427				for _, a := range v.Args {
428					if a.Block == b && a.Type.IsMemory() {
429						storeUse.add(a.ID)
430					}
431				}
432			}
433		}
434		if len(stores) == 0 {
435			lastMems[b.ID] = memPhi
436			continue
437		}
438
439		// find last store in the block
440		var last *Value
441		for _, v := range stores {
442			if storeUse.contains(v.ID) {
443				continue
444			}
445			if last != nil {
446				b.Fatalf("two final stores - simultaneous live stores %s %s", last, v)
447			}
448			last = v
449		}
450		if last == nil {
451			b.Fatalf("no last store found - cycle?")
452		}
453
454		// If this is a tuple containing a mem, select just
455		// the mem. This will generate ops we don't need, but
456		// it's the easiest thing to do.
457		if last.Type.IsTuple() {
458			last = b.NewValue1(last.Pos, OpSelect1, types.TypeMem, last)
459		} else if last.Type.IsResults() {
460			last = b.NewValue1I(last.Pos, OpSelectN, types.TypeMem, int64(last.Type.NumFields()-1), last)
461		}
462
463		lastMems[b.ID] = last
464	}
465	return lastMems
466}
467
468// mark values
469type markKind uint8
470
471const (
472	notFound    markKind = iota // block has not been discovered yet
473	notExplored                 // discovered and in queue, outedges not processed yet
474	explored                    // discovered and in queue, outedges processed
475	done                        // all done, in output ordering
476)
477
478type backedgesState struct {
479	b *Block
480	i int
481}
482
483// backedges returns a slice of successor edges that are back
484// edges.  For reducible loops, edge.b is the header.
485func backedges(f *Func) []Edge {
486	edges := []Edge{}
487	mark := make([]markKind, f.NumBlocks())
488	stack := []backedgesState{}
489
490	mark[f.Entry.ID] = notExplored
491	stack = append(stack, backedgesState{f.Entry, 0})
492
493	for len(stack) > 0 {
494		l := len(stack)
495		x := stack[l-1]
496		if x.i < len(x.b.Succs) {
497			e := x.b.Succs[x.i]
498			stack[l-1].i++
499			s := e.b
500			if mark[s.ID] == notFound {
501				mark[s.ID] = notExplored
502				stack = append(stack, backedgesState{s, 0})
503			} else if mark[s.ID] == notExplored {
504				edges = append(edges, e)
505			}
506		} else {
507			mark[x.b.ID] = done
508			stack = stack[0 : l-1]
509		}
510	}
511	return edges
512}
513