1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Register allocation.
6//
7// We use a version of a linear scan register allocator. We treat the
8// whole function as a single long basic block and run through
9// it using a greedy register allocator. Then all merge edges
10// (those targeting a block with len(Preds)>1) are processed to
11// shuffle data into the place that the target of the edge expects.
12//
13// The greedy allocator moves values into registers just before they
14// are used, spills registers only when necessary, and spills the
15// value whose next use is farthest in the future.
16//
17// The register allocator requires that a block is not scheduled until
18// at least one of its predecessors have been scheduled. The most recent
19// such predecessor provides the starting register state for a block.
20//
21// It also requires that there are no critical edges (critical =
22// comes from a block with >1 successor and goes to a block with >1
23// predecessor).  This makes it easy to add fixup code on merge edges -
24// the source of a merge edge has only one successor, so we can add
25// fixup code to the end of that block.
26
27// Spilling
28//
29// During the normal course of the allocator, we might throw a still-live
30// value out of all registers. When that value is subsequently used, we must
31// load it from a slot on the stack. We must also issue an instruction to
32// initialize that stack location with a copy of v.
33//
34// pre-regalloc:
35//   (1) v = Op ...
36//   (2) x = Op ...
37//   (3) ... = Op v ...
38//
39// post-regalloc:
40//   (1) v = Op ...    : AX // computes v, store result in AX
41//       s = StoreReg v     // spill v to a stack slot
42//   (2) x = Op ...    : AX // some other op uses AX
43//       c = LoadReg s : CX // restore v from stack slot
44//   (3) ... = Op c ...     // use the restored value
45//
46// Allocation occurs normally until we reach (3) and we realize we have
47// a use of v and it isn't in any register. At that point, we allocate
48// a spill (a StoreReg) for v. We can't determine the correct place for
49// the spill at this point, so we allocate the spill as blockless initially.
50// The restore is then generated to load v back into a register so it can
51// be used. Subsequent uses of v will use the restored value c instead.
52//
53// What remains is the question of where to schedule the spill.
54// During allocation, we keep track of the dominator of all restores of v.
55// The spill of v must dominate that block. The spill must also be issued at
56// a point where v is still in a register.
57//
58// To find the right place, start at b, the block which dominates all restores.
59//  - If b is v.Block, then issue the spill right after v.
60//    It is known to be in a register at that point, and dominates any restores.
61//  - Otherwise, if v is in a register at the start of b,
62//    put the spill of v at the start of b.
63//  - Otherwise, set b = immediate dominator of b, and repeat.
64//
65// Phi values are special, as always. We define two kinds of phis, those
66// where the merge happens in a register (a "register" phi) and those where
67// the merge happens in a stack location (a "stack" phi).
68//
69// A register phi must have the phi and all of its inputs allocated to the
70// same register. Register phis are spilled similarly to regular ops.
71//
72// A stack phi must have the phi and all of its inputs allocated to the same
73// stack location. Stack phis start out life already spilled - each phi
74// input must be a store (using StoreReg) at the end of the corresponding
75// predecessor block.
76//     b1: y = ... : AX        b2: z = ... : BX
77//         y2 = StoreReg y         z2 = StoreReg z
78//         goto b3                 goto b3
79//     b3: x = phi(y2, z2)
80// The stack allocator knows that StoreReg args of stack-allocated phis
81// must be allocated to the same stack slot as the phi that uses them.
82// x is now a spilled value and a restore must appear before its first use.
83
84// TODO
85
86// Use an affinity graph to mark two values which should use the
87// same register. This affinity graph will be used to prefer certain
88// registers for allocation. This affinity helps eliminate moves that
89// are required for phi implementations and helps generate allocations
90// for 2-register architectures.
91
92// Note: regalloc generates a not-quite-SSA output. If we have:
93//
94//             b1: x = ... : AX
95//                 x2 = StoreReg x
96//                 ... AX gets reused for something else ...
97//                 if ... goto b3 else b4
98//
99//   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
100//       ... use x3 ...                 ... use x4 ...
101//
102//             b2: ... use x3 ...
103//
104// If b3 is the primary predecessor of b2, then we use x3 in b2 and
105// add a x4:CX->BX copy at the end of b4.
106// But the definition of x3 doesn't dominate b2.  We should really
107// insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
108// SSA form. For now, we ignore this problem as remaining in strict
109// SSA form isn't needed after regalloc. We'll just leave the use
110// of x3 not dominated by the definition of x3, and the CX->BX copy
111// will have no use (so don't run deadcode after regalloc!).
112// TODO: maybe we should introduce these extra phis?
113
114package ssa
115
116import (
117	"cmd/compile/internal/base"
118	"cmd/compile/internal/ir"
119	"cmd/compile/internal/types"
120	"cmd/internal/src"
121	"cmd/internal/sys"
122	"fmt"
123	"internal/buildcfg"
124	"math/bits"
125	"unsafe"
126)
127
128const (
129	moveSpills = iota
130	logSpills
131	regDebug
132	stackDebug
133)
134
135// distance is a measure of how far into the future values are used.
136// distance is measured in units of instructions.
137const (
138	likelyDistance   = 1
139	normalDistance   = 10
140	unlikelyDistance = 100
141)
142
143// regalloc performs register allocation on f. It sets f.RegAlloc
144// to the resulting allocation.
145func regalloc(f *Func) {
146	var s regAllocState
147	s.init(f)
148	s.regalloc(f)
149	s.close()
150}
151
152type register uint8
153
154const noRegister register = 255
155
156// For bulk initializing
157var noRegisters [32]register = [32]register{
158	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
159	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
161	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162}
163
164// A regMask encodes a set of machine registers.
165// TODO: regMask -> regSet?
166type regMask uint64
167
168func (m regMask) String() string {
169	s := ""
170	for r := register(0); m != 0; r++ {
171		if m>>r&1 == 0 {
172			continue
173		}
174		m &^= regMask(1) << r
175		if s != "" {
176			s += " "
177		}
178		s += fmt.Sprintf("r%d", r)
179	}
180	return s
181}
182
183func (s *regAllocState) RegMaskString(m regMask) string {
184	str := ""
185	for r := register(0); m != 0; r++ {
186		if m>>r&1 == 0 {
187			continue
188		}
189		m &^= regMask(1) << r
190		if str != "" {
191			str += " "
192		}
193		str += s.registers[r].String()
194	}
195	return str
196}
197
198// countRegs returns the number of set bits in the register mask.
199func countRegs(r regMask) int {
200	return bits.OnesCount64(uint64(r))
201}
202
203// pickReg picks an arbitrary register from the register mask.
204func pickReg(r regMask) register {
205	if r == 0 {
206		panic("can't pick a register from an empty set")
207	}
208	// pick the lowest one
209	return register(bits.TrailingZeros64(uint64(r)))
210}
211
212type use struct {
213	dist int32    // distance from start of the block to a use of a value
214	pos  src.XPos // source position of the use
215	next *use     // linked list of uses of a value in nondecreasing dist order
216}
217
218// A valState records the register allocation state for a (pre-regalloc) value.
219type valState struct {
220	regs              regMask // the set of registers holding a Value (usually just one)
221	uses              *use    // list of uses in this block
222	spill             *Value  // spilled copy of the Value (if any)
223	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
224	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
225	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
226	rematerializeable bool    // cached value of v.rematerializeable()
227}
228
229type regState struct {
230	v *Value // Original (preregalloc) Value stored in this register.
231	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
232	// If a register is unused, v==c==nil
233}
234
235type regAllocState struct {
236	f *Func
237
238	sdom        SparseTree
239	registers   []Register
240	numRegs     register
241	SPReg       register
242	SBReg       register
243	GReg        register
244	allocatable regMask
245
246	// live values at the end of each block.  live[b.ID] is a list of value IDs
247	// which are live at the end of b, together with a count of how many instructions
248	// forward to the next use.
249	live [][]liveInfo
250	// desired register assignments at the end of each block.
251	// Note that this is a static map computed before allocation occurs. Dynamic
252	// register desires (from partially completed allocations) will trump
253	// this information.
254	desired []desiredState
255
256	// current state of each (preregalloc) Value
257	values []valState
258
259	// ID of SP, SB values
260	sp, sb ID
261
262	// For each Value, map from its value ID back to the
263	// preregalloc Value it was derived from.
264	orig []*Value
265
266	// current state of each register
267	regs []regState
268
269	// registers that contain values which can't be kicked out
270	nospill regMask
271
272	// mask of registers currently in use
273	used regMask
274
275	// mask of registers used since the start of the current block
276	usedSinceBlockStart regMask
277
278	// mask of registers used in the current instruction
279	tmpused regMask
280
281	// current block we're working on
282	curBlock *Block
283
284	// cache of use records
285	freeUseRecords *use
286
287	// endRegs[blockid] is the register state at the end of each block.
288	// encoded as a set of endReg records.
289	endRegs [][]endReg
290
291	// startRegs[blockid] is the register state at the start of merge blocks.
292	// saved state does not include the state of phi ops in the block.
293	startRegs [][]startReg
294
295	// startRegsMask is a mask of the registers in startRegs[curBlock.ID].
296	// Registers dropped from startRegsMask are later synchronoized back to
297	// startRegs by dropping from there as well.
298	startRegsMask regMask
299
300	// spillLive[blockid] is the set of live spills at the end of each block
301	spillLive [][]ID
302
303	// a set of copies we generated to move things around, and
304	// whether it is used in shuffle. Unused copies will be deleted.
305	copies map[*Value]bool
306
307	loopnest *loopnest
308
309	// choose a good order in which to visit blocks for allocation purposes.
310	visitOrder []*Block
311
312	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
313	blockOrder []int32
314
315	// whether to insert instructions that clobber dead registers at call sites
316	doClobber bool
317}
318
319type endReg struct {
320	r register
321	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
322	c *Value // cached version of the value
323}
324
325type startReg struct {
326	r   register
327	v   *Value   // pre-regalloc value needed in this register
328	c   *Value   // cached version of the value
329	pos src.XPos // source position of use of this register
330}
331
332// freeReg frees up register r. Any current user of r is kicked out.
333func (s *regAllocState) freeReg(r register) {
334	v := s.regs[r].v
335	if v == nil {
336		s.f.Fatalf("tried to free an already free register %d\n", r)
337	}
338
339	// Mark r as unused.
340	if s.f.pass.debug > regDebug {
341		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
342	}
343	s.regs[r] = regState{}
344	s.values[v.ID].regs &^= regMask(1) << r
345	s.used &^= regMask(1) << r
346}
347
348// freeRegs frees up all registers listed in m.
349func (s *regAllocState) freeRegs(m regMask) {
350	for m&s.used != 0 {
351		s.freeReg(pickReg(m & s.used))
352	}
353}
354
355// clobberRegs inserts instructions that clobber registers listed in m.
356func (s *regAllocState) clobberRegs(m regMask) {
357	m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
358	for m != 0 {
359		r := pickReg(m)
360		m &^= 1 << r
361		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
362		s.f.setHome(x, &s.registers[r])
363	}
364}
365
366// setOrig records that c's original value is the same as
367// v's original value.
368func (s *regAllocState) setOrig(c *Value, v *Value) {
369	if int(c.ID) >= cap(s.orig) {
370		x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
371		copy(x, s.orig)
372		s.f.Cache.freeValueSlice(s.orig)
373		s.orig = x
374	}
375	for int(c.ID) >= len(s.orig) {
376		s.orig = append(s.orig, nil)
377	}
378	if s.orig[c.ID] != nil {
379		s.f.Fatalf("orig value set twice %s %s", c, v)
380	}
381	s.orig[c.ID] = s.orig[v.ID]
382}
383
384// assignReg assigns register r to hold c, a copy of v.
385// r must be unused.
386func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
387	if s.f.pass.debug > regDebug {
388		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
389	}
390	if s.regs[r].v != nil {
391		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
392	}
393
394	// Update state.
395	s.regs[r] = regState{v, c}
396	s.values[v.ID].regs |= regMask(1) << r
397	s.used |= regMask(1) << r
398	s.f.setHome(c, &s.registers[r])
399}
400
401// allocReg chooses a register from the set of registers in mask.
402// If there is no unused register, a Value will be kicked out of
403// a register to make room.
404func (s *regAllocState) allocReg(mask regMask, v *Value) register {
405	if v.OnWasmStack {
406		return noRegister
407	}
408
409	mask &= s.allocatable
410	mask &^= s.nospill
411	if mask == 0 {
412		s.f.Fatalf("no register available for %s", v.LongString())
413	}
414
415	// Pick an unused register if one is available.
416	if mask&^s.used != 0 {
417		r := pickReg(mask &^ s.used)
418		s.usedSinceBlockStart |= regMask(1) << r
419		return r
420	}
421
422	// Pick a value to spill. Spill the value with the
423	// farthest-in-the-future use.
424	// TODO: Prefer registers with already spilled Values?
425	// TODO: Modify preference using affinity graph.
426	// TODO: if a single value is in multiple registers, spill one of them
427	// before spilling a value in just a single register.
428
429	// Find a register to spill. We spill the register containing the value
430	// whose next use is as far in the future as possible.
431	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
432	var r register
433	maxuse := int32(-1)
434	for t := register(0); t < s.numRegs; t++ {
435		if mask>>t&1 == 0 {
436			continue
437		}
438		v := s.regs[t].v
439		if n := s.values[v.ID].uses.dist; n > maxuse {
440			// v's next use is farther in the future than any value
441			// we've seen so far. A new best spill candidate.
442			r = t
443			maxuse = n
444		}
445	}
446	if maxuse == -1 {
447		s.f.Fatalf("couldn't find register to spill")
448	}
449
450	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
451		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
452		// So if there is still a free register, the allocation should have picked that one in the first place instead of
453		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
454		s.freeReg(r)
455		return r
456	}
457
458	// Try to move it around before kicking out, if there is a free register.
459	// We generate a Copy and record it. It will be deleted if never used.
460	v2 := s.regs[r].v
461	m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
462	if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
463		s.usedSinceBlockStart |= regMask(1) << r
464		r2 := pickReg(m)
465		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
466		s.copies[c] = false
467		if s.f.pass.debug > regDebug {
468			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
469		}
470		s.setOrig(c, v2)
471		s.assignReg(r2, v2, c)
472	}
473
474	// If the evicted register isn't used between the start of the block
475	// and now then there is no reason to even request it on entry. We can
476	// drop from startRegs in that case.
477	if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
478		if s.startRegsMask&(regMask(1)<<r) == 1 {
479			if s.f.pass.debug > regDebug {
480				fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
481			}
482			s.startRegsMask &^= regMask(1) << r
483		}
484	}
485
486	s.freeReg(r)
487	s.usedSinceBlockStart |= regMask(1) << r
488	return r
489}
490
491// makeSpill returns a Value which represents the spilled value of v.
492// b is the block in which the spill is used.
493func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
494	vi := &s.values[v.ID]
495	if vi.spill != nil {
496		// Final block not known - keep track of subtree where restores reside.
497		vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
498		vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
499		return vi.spill
500	}
501	// Make a spill for v. We don't know where we want
502	// to put it yet, so we leave it blockless for now.
503	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
504	// We also don't know what the spill's arg will be.
505	// Leave it argless for now.
506	s.setOrig(spill, v)
507	vi.spill = spill
508	vi.restoreMin = s.sdom[b.ID].entry
509	vi.restoreMax = s.sdom[b.ID].exit
510	return spill
511}
512
513// allocValToReg allocates v to a register selected from regMask and
514// returns the register copy of v. Any previous user is kicked out and spilled
515// (if necessary). Load code is added at the current pc. If nospill is set the
516// allocated register is marked nospill so the assignment cannot be
517// undone until the caller allows it by clearing nospill. Returns a
518// *Value which is either v or a copy of v allocated to the chosen register.
519func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
520	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
521		c := v.copyIntoWithXPos(s.curBlock, pos)
522		c.OnWasmStack = true
523		s.setOrig(c, v)
524		return c
525	}
526	if v.OnWasmStack {
527		return v
528	}
529
530	vi := &s.values[v.ID]
531	pos = pos.WithNotStmt()
532	// Check if v is already in a requested register.
533	if mask&vi.regs != 0 {
534		r := pickReg(mask & vi.regs)
535		if s.regs[r].v != v || s.regs[r].c == nil {
536			panic("bad register state")
537		}
538		if nospill {
539			s.nospill |= regMask(1) << r
540		}
541		s.usedSinceBlockStart |= regMask(1) << r
542		return s.regs[r].c
543	}
544
545	var r register
546	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
547	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
548	if !onWasmStack {
549		// Allocate a register.
550		r = s.allocReg(mask, v)
551	}
552
553	// Allocate v to the new register.
554	var c *Value
555	if vi.regs != 0 {
556		// Copy from a register that v is already in.
557		r2 := pickReg(vi.regs)
558		if s.regs[r2].v != v {
559			panic("bad register state")
560		}
561		s.usedSinceBlockStart |= regMask(1) << r2
562		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
563	} else if v.rematerializeable() {
564		// Rematerialize instead of loading from the spill location.
565		c = v.copyIntoWithXPos(s.curBlock, pos)
566	} else {
567		// Load v from its spill location.
568		spill := s.makeSpill(v, s.curBlock)
569		if s.f.pass.debug > logSpills {
570			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
571		}
572		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
573	}
574
575	s.setOrig(c, v)
576
577	if onWasmStack {
578		c.OnWasmStack = true
579		return c
580	}
581
582	s.assignReg(r, v, c)
583	if c.Op == OpLoadReg && s.isGReg(r) {
584		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
585	}
586	if nospill {
587		s.nospill |= regMask(1) << r
588	}
589	return c
590}
591
592// isLeaf reports whether f performs any calls.
593func isLeaf(f *Func) bool {
594	for _, b := range f.Blocks {
595		for _, v := range b.Values {
596			if v.Op.IsCall() && !v.Op.IsTailCall() {
597				// tail call is not counted as it does not save the return PC or need a frame
598				return false
599			}
600		}
601	}
602	return true
603}
604
605// needRegister reports whether v needs a register.
606func (v *Value) needRegister() bool {
607	return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
608}
609
610func (s *regAllocState) init(f *Func) {
611	s.f = f
612	s.f.RegAlloc = s.f.Cache.locs[:0]
613	s.registers = f.Config.registers
614	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
615		s.f.Fatalf("bad number of registers: %d", nr)
616	} else {
617		s.numRegs = register(nr)
618	}
619	// Locate SP, SB, and g registers.
620	s.SPReg = noRegister
621	s.SBReg = noRegister
622	s.GReg = noRegister
623	for r := register(0); r < s.numRegs; r++ {
624		switch s.registers[r].String() {
625		case "SP":
626			s.SPReg = r
627		case "SB":
628			s.SBReg = r
629		case "g":
630			s.GReg = r
631		}
632	}
633	// Make sure we found all required registers.
634	switch noRegister {
635	case s.SPReg:
636		s.f.Fatalf("no SP register found")
637	case s.SBReg:
638		s.f.Fatalf("no SB register found")
639	case s.GReg:
640		if f.Config.hasGReg {
641			s.f.Fatalf("no g register found")
642		}
643	}
644
645	// Figure out which registers we're allowed to use.
646	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
647	s.allocatable &^= 1 << s.SPReg
648	s.allocatable &^= 1 << s.SBReg
649	if s.f.Config.hasGReg {
650		s.allocatable &^= 1 << s.GReg
651	}
652	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
653		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
654	}
655	if s.f.Config.LinkReg != -1 {
656		if isLeaf(f) {
657			// Leaf functions don't save/restore the link register.
658			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
659		}
660	}
661	if s.f.Config.ctxt.Flag_dynlink {
662		switch s.f.Config.arch {
663		case "386":
664			// nothing to do.
665			// Note that for Flag_shared (position independent code)
666			// we do need to be careful, but that carefulness is hidden
667			// in the rewrite rules so we always have a free register
668			// available for global load/stores. See _gen/386.rules (search for Flag_shared).
669		case "amd64":
670			s.allocatable &^= 1 << 15 // R15
671		case "arm":
672			s.allocatable &^= 1 << 9 // R9
673		case "arm64":
674			// nothing to do
675		case "loong64": // R2 (aka TP) already reserved.
676			// nothing to do
677		case "ppc64le": // R2 already reserved.
678			// nothing to do
679		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
680			// nothing to do
681		case "s390x":
682			s.allocatable &^= 1 << 11 // R11
683		default:
684			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
685		}
686	}
687
688	// Linear scan register allocation can be influenced by the order in which blocks appear.
689	// Decouple the register allocation order from the generated block order.
690	// This also creates an opportunity for experiments to find a better order.
691	s.visitOrder = layoutRegallocOrder(f)
692
693	// Compute block order. This array allows us to distinguish forward edges
694	// from backward edges and compute how far they go.
695	s.blockOrder = make([]int32, f.NumBlocks())
696	for i, b := range s.visitOrder {
697		s.blockOrder[b.ID] = int32(i)
698	}
699
700	s.regs = make([]regState, s.numRegs)
701	nv := f.NumValues()
702	if cap(s.f.Cache.regallocValues) >= nv {
703		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
704	} else {
705		s.f.Cache.regallocValues = make([]valState, nv)
706	}
707	s.values = s.f.Cache.regallocValues
708	s.orig = s.f.Cache.allocValueSlice(nv)
709	s.copies = make(map[*Value]bool)
710	for _, b := range s.visitOrder {
711		for _, v := range b.Values {
712			if v.needRegister() {
713				s.values[v.ID].needReg = true
714				s.values[v.ID].rematerializeable = v.rematerializeable()
715				s.orig[v.ID] = v
716			}
717			// Note: needReg is false for values returning Tuple types.
718			// Instead, we mark the corresponding Selects as needReg.
719		}
720	}
721	s.computeLive()
722
723	s.endRegs = make([][]endReg, f.NumBlocks())
724	s.startRegs = make([][]startReg, f.NumBlocks())
725	s.spillLive = make([][]ID, f.NumBlocks())
726	s.sdom = f.Sdom()
727
728	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
729	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
730		canLiveOnStack := f.newSparseSet(f.NumValues())
731		defer f.retSparseSet(canLiveOnStack)
732		for _, b := range f.Blocks {
733			// New block. Clear candidate set.
734			canLiveOnStack.clear()
735			for _, c := range b.ControlValues() {
736				if c.Uses == 1 && !opcodeTable[c.Op].generic {
737					canLiveOnStack.add(c.ID)
738				}
739			}
740			// Walking backwards.
741			for i := len(b.Values) - 1; i >= 0; i-- {
742				v := b.Values[i]
743				if canLiveOnStack.contains(v.ID) {
744					v.OnWasmStack = true
745				} else {
746					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
747					canLiveOnStack.clear()
748				}
749				for _, arg := range v.Args {
750					// Value can live on the stack if:
751					// - it is only used once
752					// - it is used in the same basic block
753					// - it is not a "mem" value
754					// - it is a WebAssembly op
755					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
756						canLiveOnStack.add(arg.ID)
757					}
758				}
759			}
760		}
761	}
762
763	// The clobberdeadreg experiment inserts code to clobber dead registers
764	// at call sites.
765	// Ignore huge functions to avoid doing too much work.
766	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
767		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
768		s.doClobber = true
769	}
770}
771
772func (s *regAllocState) close() {
773	s.f.Cache.freeValueSlice(s.orig)
774}
775
776// Adds a use record for id at distance dist from the start of the block.
777// All calls to addUse must happen with nonincreasing dist.
778func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
779	r := s.freeUseRecords
780	if r != nil {
781		s.freeUseRecords = r.next
782	} else {
783		r = &use{}
784	}
785	r.dist = dist
786	r.pos = pos
787	r.next = s.values[id].uses
788	s.values[id].uses = r
789	if r.next != nil && dist > r.next.dist {
790		s.f.Fatalf("uses added in wrong order")
791	}
792}
793
794// advanceUses advances the uses of v's args from the state before v to the state after v.
795// Any values which have no more uses are deallocated from registers.
796func (s *regAllocState) advanceUses(v *Value) {
797	for _, a := range v.Args {
798		if !s.values[a.ID].needReg {
799			continue
800		}
801		ai := &s.values[a.ID]
802		r := ai.uses
803		ai.uses = r.next
804		if r.next == nil {
805			// Value is dead, free all registers that hold it.
806			s.freeRegs(ai.regs)
807		}
808		r.next = s.freeUseRecords
809		s.freeUseRecords = r
810	}
811}
812
813// liveAfterCurrentInstruction reports whether v is live after
814// the current instruction is completed.  v must be used by the
815// current instruction.
816func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
817	u := s.values[v.ID].uses
818	if u == nil {
819		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
820	}
821	d := u.dist
822	for u != nil && u.dist == d {
823		u = u.next
824	}
825	return u != nil && u.dist > d
826}
827
828// Sets the state of the registers to that encoded in regs.
829func (s *regAllocState) setState(regs []endReg) {
830	s.freeRegs(s.used)
831	for _, x := range regs {
832		s.assignReg(x.r, x.v, x.c)
833	}
834}
835
836// compatRegs returns the set of registers which can store a type t.
837func (s *regAllocState) compatRegs(t *types.Type) regMask {
838	var m regMask
839	if t.IsTuple() || t.IsFlags() {
840		return 0
841	}
842	if t.IsFloat() || t == types.TypeInt128 {
843		if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
844			m = s.f.Config.fp32RegMask
845		} else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
846			m = s.f.Config.fp64RegMask
847		} else {
848			m = s.f.Config.fpRegMask
849		}
850	} else {
851		m = s.f.Config.gpRegMask
852	}
853	return m & s.allocatable
854}
855
856// regspec returns the regInfo for operation op.
857func (s *regAllocState) regspec(v *Value) regInfo {
858	op := v.Op
859	if op == OpConvert {
860		// OpConvert is a generic op, so it doesn't have a
861		// register set in the static table. It can use any
862		// allocatable integer register.
863		m := s.allocatable & s.f.Config.gpRegMask
864		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
865	}
866	if op == OpArgIntReg {
867		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
868		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
869	}
870	if op == OpArgFloatReg {
871		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
872		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
873	}
874	if op.IsCall() {
875		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
876			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
877		}
878	}
879	if op == OpMakeResult && s.f.OwnAux.reg != nil {
880		return *s.f.OwnAux.ResultReg(s.f.Config)
881	}
882	return opcodeTable[op].reg
883}
884
885func (s *regAllocState) isGReg(r register) bool {
886	return s.f.Config.hasGReg && s.GReg == r
887}
888
889// Dummy value used to represent the value being held in a temporary register.
890var tmpVal Value
891
892func (s *regAllocState) regalloc(f *Func) {
893	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
894	defer f.retSparseSet(regValLiveSet)
895	var oldSched []*Value
896	var phis []*Value
897	var phiRegs []register
898	var args []*Value
899
900	// Data structure used for computing desired registers.
901	var desired desiredState
902
903	// Desired registers for inputs & outputs for each instruction in the block.
904	type dentry struct {
905		out [4]register    // desired output registers
906		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
907	}
908	var dinfo []dentry
909
910	if f.Entry != f.Blocks[0] {
911		f.Fatalf("entry block must be first")
912	}
913
914	for _, b := range s.visitOrder {
915		if s.f.pass.debug > regDebug {
916			fmt.Printf("Begin processing block %v\n", b)
917		}
918		s.curBlock = b
919		s.startRegsMask = 0
920		s.usedSinceBlockStart = 0
921
922		// Initialize regValLiveSet and uses fields for this block.
923		// Walk backwards through the block doing liveness analysis.
924		regValLiveSet.clear()
925		for _, e := range s.live[b.ID] {
926			s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
927			regValLiveSet.add(e.ID)
928		}
929		for _, v := range b.ControlValues() {
930			if s.values[v.ID].needReg {
931				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
932				regValLiveSet.add(v.ID)
933			}
934		}
935		for i := len(b.Values) - 1; i >= 0; i-- {
936			v := b.Values[i]
937			regValLiveSet.remove(v.ID)
938			if v.Op == OpPhi {
939				// Remove v from the live set, but don't add
940				// any inputs. This is the state the len(b.Preds)>1
941				// case below desires; it wants to process phis specially.
942				continue
943			}
944			if opcodeTable[v.Op].call {
945				// Function call clobbers all the registers but SP and SB.
946				regValLiveSet.clear()
947				if s.sp != 0 && s.values[s.sp].uses != nil {
948					regValLiveSet.add(s.sp)
949				}
950				if s.sb != 0 && s.values[s.sb].uses != nil {
951					regValLiveSet.add(s.sb)
952				}
953			}
954			for _, a := range v.Args {
955				if !s.values[a.ID].needReg {
956					continue
957				}
958				s.addUse(a.ID, int32(i), v.Pos)
959				regValLiveSet.add(a.ID)
960			}
961		}
962		if s.f.pass.debug > regDebug {
963			fmt.Printf("use distances for %s\n", b)
964			for i := range s.values {
965				vi := &s.values[i]
966				u := vi.uses
967				if u == nil {
968					continue
969				}
970				fmt.Printf("  v%d:", i)
971				for u != nil {
972					fmt.Printf(" %d", u.dist)
973					u = u.next
974				}
975				fmt.Println()
976			}
977		}
978
979		// Make a copy of the block schedule so we can generate a new one in place.
980		// We make a separate copy for phis and regular values.
981		nphi := 0
982		for _, v := range b.Values {
983			if v.Op != OpPhi {
984				break
985			}
986			nphi++
987		}
988		phis = append(phis[:0], b.Values[:nphi]...)
989		oldSched = append(oldSched[:0], b.Values[nphi:]...)
990		b.Values = b.Values[:0]
991
992		// Initialize start state of block.
993		if b == f.Entry {
994			// Regalloc state is empty to start.
995			if nphi > 0 {
996				f.Fatalf("phis in entry block")
997			}
998		} else if len(b.Preds) == 1 {
999			// Start regalloc state with the end state of the previous block.
1000			s.setState(s.endRegs[b.Preds[0].b.ID])
1001			if nphi > 0 {
1002				f.Fatalf("phis in single-predecessor block")
1003			}
1004			// Drop any values which are no longer live.
1005			// This may happen because at the end of p, a value may be
1006			// live but only used by some other successor of p.
1007			for r := register(0); r < s.numRegs; r++ {
1008				v := s.regs[r].v
1009				if v != nil && !regValLiveSet.contains(v.ID) {
1010					s.freeReg(r)
1011				}
1012			}
1013		} else {
1014			// This is the complicated case. We have more than one predecessor,
1015			// which means we may have Phi ops.
1016
1017			// Start with the final register state of the predecessor with least spill values.
1018			// This is based on the following points:
1019			// 1, The less spill value indicates that the register pressure of this path is smaller,
1020			//    so the values of this block are more likely to be allocated to registers.
1021			// 2, Avoid the predecessor that contains the function call, because the predecessor that
1022			//    contains the function call usually generates a lot of spills and lose the previous
1023			//    allocation state.
1024			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
1025			// an impact on the code size and compiler speed. But it is not easy to find a simple
1026			// and efficient method that combines multiple factors.
1027			idx := -1
1028			for i, p := range b.Preds {
1029				// If the predecessor has not been visited yet, skip it because its end state
1030				// (redRegs and spillLive) has not been computed yet.
1031				pb := p.b
1032				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1033					continue
1034				}
1035				if idx == -1 {
1036					idx = i
1037					continue
1038				}
1039				pSel := b.Preds[idx].b
1040				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1041					idx = i
1042				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1043					// Use a bit of likely information. After critical pass, pb and pSel must
1044					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
1045					// TODO: improve the prediction of the likely predecessor. The following
1046					// method is only suitable for the simplest cases. For complex cases,
1047					// the prediction may be inaccurate, but this does not affect the
1048					// correctness of the program.
1049					// According to the layout algorithm, the predecessor with the
1050					// smaller blockOrder is the true branch, and the test results show
1051					// that it is better to choose the predecessor with a smaller
1052					// blockOrder than no choice.
1053					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1054						idx = i
1055					}
1056				}
1057			}
1058			if idx < 0 {
1059				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1060			}
1061			p := b.Preds[idx].b
1062			s.setState(s.endRegs[p.ID])
1063
1064			if s.f.pass.debug > regDebug {
1065				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1066				for _, x := range s.endRegs[p.ID] {
1067					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1068				}
1069			}
1070
1071			// Decide on registers for phi ops. Use the registers determined
1072			// by the primary predecessor if we can.
1073			// TODO: pick best of (already processed) predecessors?
1074			// Majority vote? Deepest nesting level?
1075			phiRegs = phiRegs[:0]
1076			var phiUsed regMask
1077
1078			for _, v := range phis {
1079				if !s.values[v.ID].needReg {
1080					phiRegs = append(phiRegs, noRegister)
1081					continue
1082				}
1083				a := v.Args[idx]
1084				// Some instructions target not-allocatable registers.
1085				// They're not suitable for further (phi-function) allocation.
1086				m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1087				if m != 0 {
1088					r := pickReg(m)
1089					phiUsed |= regMask(1) << r
1090					phiRegs = append(phiRegs, r)
1091				} else {
1092					phiRegs = append(phiRegs, noRegister)
1093				}
1094			}
1095
1096			// Second pass - deallocate all in-register phi inputs.
1097			for i, v := range phis {
1098				if !s.values[v.ID].needReg {
1099					continue
1100				}
1101				a := v.Args[idx]
1102				r := phiRegs[i]
1103				if r == noRegister {
1104					continue
1105				}
1106				if regValLiveSet.contains(a.ID) {
1107					// Input value is still live (it is used by something other than Phi).
1108					// Try to move it around before kicking out, if there is a free register.
1109					// We generate a Copy in the predecessor block and record it. It will be
1110					// deleted later if never used.
1111					//
1112					// Pick a free register. At this point some registers used in the predecessor
1113					// block may have been deallocated. Those are the ones used for Phis. Exclude
1114					// them (and they are not going to be helpful anyway).
1115					m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1116					if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1117						r2 := pickReg(m)
1118						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1119						s.copies[c] = false
1120						if s.f.pass.debug > regDebug {
1121							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1122						}
1123						s.setOrig(c, a)
1124						s.assignReg(r2, a, c)
1125						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1126					}
1127				}
1128				s.freeReg(r)
1129			}
1130
1131			// Copy phi ops into new schedule.
1132			b.Values = append(b.Values, phis...)
1133
1134			// Third pass - pick registers for phis whose input
1135			// was not in a register in the primary predecessor.
1136			for i, v := range phis {
1137				if !s.values[v.ID].needReg {
1138					continue
1139				}
1140				if phiRegs[i] != noRegister {
1141					continue
1142				}
1143				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1144				// If one of the other inputs of v is in a register, and the register is available,
1145				// select this register, which can save some unnecessary copies.
1146				for i, pe := range b.Preds {
1147					if i == idx {
1148						continue
1149					}
1150					ri := noRegister
1151					for _, er := range s.endRegs[pe.b.ID] {
1152						if er.v == s.orig[v.Args[i].ID] {
1153							ri = er.r
1154							break
1155						}
1156					}
1157					if ri != noRegister && m>>ri&1 != 0 {
1158						m = regMask(1) << ri
1159						break
1160					}
1161				}
1162				if m != 0 {
1163					r := pickReg(m)
1164					phiRegs[i] = r
1165					phiUsed |= regMask(1) << r
1166				}
1167			}
1168
1169			// Set registers for phis. Add phi spill code.
1170			for i, v := range phis {
1171				if !s.values[v.ID].needReg {
1172					continue
1173				}
1174				r := phiRegs[i]
1175				if r == noRegister {
1176					// stack-based phi
1177					// Spills will be inserted in all the predecessors below.
1178					s.values[v.ID].spill = v // v starts life spilled
1179					continue
1180				}
1181				// register-based phi
1182				s.assignReg(r, v, v)
1183			}
1184
1185			// Deallocate any values which are no longer live. Phis are excluded.
1186			for r := register(0); r < s.numRegs; r++ {
1187				if phiUsed>>r&1 != 0 {
1188					continue
1189				}
1190				v := s.regs[r].v
1191				if v != nil && !regValLiveSet.contains(v.ID) {
1192					s.freeReg(r)
1193				}
1194			}
1195
1196			// Save the starting state for use by merge edges.
1197			// We append to a stack allocated variable that we'll
1198			// later copy into s.startRegs in one fell swoop, to save
1199			// on allocations.
1200			regList := make([]startReg, 0, 32)
1201			for r := register(0); r < s.numRegs; r++ {
1202				v := s.regs[r].v
1203				if v == nil {
1204					continue
1205				}
1206				if phiUsed>>r&1 != 0 {
1207					// Skip registers that phis used, we'll handle those
1208					// specially during merge edge processing.
1209					continue
1210				}
1211				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1212				s.startRegsMask |= regMask(1) << r
1213			}
1214			s.startRegs[b.ID] = make([]startReg, len(regList))
1215			copy(s.startRegs[b.ID], regList)
1216
1217			if s.f.pass.debug > regDebug {
1218				fmt.Printf("after phis\n")
1219				for _, x := range s.startRegs[b.ID] {
1220					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
1221				}
1222			}
1223		}
1224
1225		// Allocate space to record the desired registers for each value.
1226		if l := len(oldSched); cap(dinfo) < l {
1227			dinfo = make([]dentry, l)
1228		} else {
1229			dinfo = dinfo[:l]
1230			for i := range dinfo {
1231				dinfo[i] = dentry{}
1232			}
1233		}
1234
1235		// Load static desired register info at the end of the block.
1236		desired.copy(&s.desired[b.ID])
1237
1238		// Check actual assigned registers at the start of the next block(s).
1239		// Dynamically assigned registers will trump the static
1240		// desired registers computed during liveness analysis.
1241		// Note that we do this phase after startRegs is set above, so that
1242		// we get the right behavior for a block which branches to itself.
1243		for _, e := range b.Succs {
1244			succ := e.b
1245			// TODO: prioritize likely successor?
1246			for _, x := range s.startRegs[succ.ID] {
1247				desired.add(x.v.ID, x.r)
1248			}
1249			// Process phi ops in succ.
1250			pidx := e.i
1251			for _, v := range succ.Values {
1252				if v.Op != OpPhi {
1253					break
1254				}
1255				if !s.values[v.ID].needReg {
1256					continue
1257				}
1258				rp, ok := s.f.getHome(v.ID).(*Register)
1259				if !ok {
1260					// If v is not assigned a register, pick a register assigned to one of v's inputs.
1261					// Hopefully v will get assigned that register later.
1262					// If the inputs have allocated register information, add it to desired,
1263					// which may reduce spill or copy operations when the register is available.
1264					for _, a := range v.Args {
1265						rp, ok = s.f.getHome(a.ID).(*Register)
1266						if ok {
1267							break
1268						}
1269					}
1270					if !ok {
1271						continue
1272					}
1273				}
1274				desired.add(v.Args[pidx].ID, register(rp.num))
1275			}
1276		}
1277		// Walk values backwards computing desired register info.
1278		// See computeLive for more comments.
1279		for i := len(oldSched) - 1; i >= 0; i-- {
1280			v := oldSched[i]
1281			prefs := desired.remove(v.ID)
1282			regspec := s.regspec(v)
1283			desired.clobber(regspec.clobbers)
1284			for _, j := range regspec.inputs {
1285				if countRegs(j.regs) != 1 {
1286					continue
1287				}
1288				desired.clobber(j.regs)
1289				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1290			}
1291			if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1292				if opcodeTable[v.Op].commutative {
1293					desired.addList(v.Args[1].ID, prefs)
1294				}
1295				desired.addList(v.Args[0].ID, prefs)
1296			}
1297			// Save desired registers for this value.
1298			dinfo[i].out = prefs
1299			for j, a := range v.Args {
1300				if j >= len(dinfo[i].in) {
1301					break
1302				}
1303				dinfo[i].in[j] = desired.get(a.ID)
1304			}
1305		}
1306
1307		// Process all the non-phi values.
1308		for idx, v := range oldSched {
1309			tmpReg := noRegister
1310			if s.f.pass.debug > regDebug {
1311				fmt.Printf("  processing %s\n", v.LongString())
1312			}
1313			regspec := s.regspec(v)
1314			if v.Op == OpPhi {
1315				f.Fatalf("phi %s not at start of block", v)
1316			}
1317			if v.Op == OpSP {
1318				s.assignReg(s.SPReg, v, v)
1319				b.Values = append(b.Values, v)
1320				s.advanceUses(v)
1321				s.sp = v.ID
1322				continue
1323			}
1324			if v.Op == OpSB {
1325				s.assignReg(s.SBReg, v, v)
1326				b.Values = append(b.Values, v)
1327				s.advanceUses(v)
1328				s.sb = v.ID
1329				continue
1330			}
1331			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1332				if s.values[v.ID].needReg {
1333					if v.Op == OpSelectN {
1334						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1335					} else {
1336						var i = 0
1337						if v.Op == OpSelect1 {
1338							i = 1
1339						}
1340						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1341					}
1342				}
1343				b.Values = append(b.Values, v)
1344				s.advanceUses(v)
1345				continue
1346			}
1347			if v.Op == OpGetG && s.f.Config.hasGReg {
1348				// use hardware g register
1349				if s.regs[s.GReg].v != nil {
1350					s.freeReg(s.GReg) // kick out the old value
1351				}
1352				s.assignReg(s.GReg, v, v)
1353				b.Values = append(b.Values, v)
1354				s.advanceUses(v)
1355				continue
1356			}
1357			if v.Op == OpArg {
1358				// Args are "pre-spilled" values. We don't allocate
1359				// any register here. We just set up the spill pointer to
1360				// point at itself and any later user will restore it to use it.
1361				s.values[v.ID].spill = v
1362				b.Values = append(b.Values, v)
1363				s.advanceUses(v)
1364				continue
1365			}
1366			if v.Op == OpKeepAlive {
1367				// Make sure the argument to v is still live here.
1368				s.advanceUses(v)
1369				a := v.Args[0]
1370				vi := &s.values[a.ID]
1371				if vi.regs == 0 && !vi.rematerializeable {
1372					// Use the spill location.
1373					// This forces later liveness analysis to make the
1374					// value live at this point.
1375					v.SetArg(0, s.makeSpill(a, b))
1376				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1377					// Rematerializeable value with a gc.Node. This is the address of
1378					// a stack object (e.g. an LEAQ). Keep the object live.
1379					// Change it to VarLive, which is what plive expects for locals.
1380					v.Op = OpVarLive
1381					v.SetArgs1(v.Args[1])
1382					v.Aux = a.Aux
1383				} else {
1384					// In-register and rematerializeable values are already live.
1385					// These are typically rematerializeable constants like nil,
1386					// or values of a variable that were modified since the last call.
1387					v.Op = OpCopy
1388					v.SetArgs1(v.Args[1])
1389				}
1390				b.Values = append(b.Values, v)
1391				continue
1392			}
1393			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1394				// No register allocation required (or none specified yet)
1395				if s.doClobber && v.Op.IsCall() {
1396					s.clobberRegs(regspec.clobbers)
1397				}
1398				s.freeRegs(regspec.clobbers)
1399				b.Values = append(b.Values, v)
1400				s.advanceUses(v)
1401				continue
1402			}
1403
1404			if s.values[v.ID].rematerializeable {
1405				// Value is rematerializeable, don't issue it here.
1406				// It will get issued just before each use (see
1407				// allocValueToReg).
1408				for _, a := range v.Args {
1409					a.Uses--
1410				}
1411				s.advanceUses(v)
1412				continue
1413			}
1414
1415			if s.f.pass.debug > regDebug {
1416				fmt.Printf("value %s\n", v.LongString())
1417				fmt.Printf("  out:")
1418				for _, r := range dinfo[idx].out {
1419					if r != noRegister {
1420						fmt.Printf(" %s", &s.registers[r])
1421					}
1422				}
1423				fmt.Println()
1424				for i := 0; i < len(v.Args) && i < 3; i++ {
1425					fmt.Printf("  in%d:", i)
1426					for _, r := range dinfo[idx].in[i] {
1427						if r != noRegister {
1428							fmt.Printf(" %s", &s.registers[r])
1429						}
1430					}
1431					fmt.Println()
1432				}
1433			}
1434
1435			// Move arguments to registers.
1436			// First, if an arg must be in a specific register and it is already
1437			// in place, keep it.
1438			args = append(args[:0], make([]*Value, len(v.Args))...)
1439			for i, a := range v.Args {
1440				if !s.values[a.ID].needReg {
1441					args[i] = a
1442				}
1443			}
1444			for _, i := range regspec.inputs {
1445				mask := i.regs
1446				if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1447					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1448				}
1449			}
1450			// Then, if an arg must be in a specific register and that
1451			// register is free, allocate that one. Otherwise when processing
1452			// another input we may kick a value into the free register, which
1453			// then will be kicked out again.
1454			// This is a common case for passing-in-register arguments for
1455			// function calls.
1456			for {
1457				freed := false
1458				for _, i := range regspec.inputs {
1459					if args[i.idx] != nil {
1460						continue // already allocated
1461					}
1462					mask := i.regs
1463					if countRegs(mask) == 1 && mask&^s.used != 0 {
1464						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1465						// If the input is in other registers that will be clobbered by v,
1466						// or the input is dead, free the registers. This may make room
1467						// for other inputs.
1468						oldregs := s.values[v.Args[i.idx].ID].regs
1469						if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1470							s.freeRegs(oldregs &^ mask &^ s.nospill)
1471							freed = true
1472						}
1473					}
1474				}
1475				if !freed {
1476					break
1477				}
1478			}
1479			// Last, allocate remaining ones, in an ordering defined
1480			// by the register specification (most constrained first).
1481			for _, i := range regspec.inputs {
1482				if args[i.idx] != nil {
1483					continue // already allocated
1484				}
1485				mask := i.regs
1486				if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1487					// Need a new register for the input.
1488					mask &= s.allocatable
1489					mask &^= s.nospill
1490					// Used desired register if available.
1491					if i.idx < 3 {
1492						for _, r := range dinfo[idx].in[i.idx] {
1493							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1494								// Desired register is allowed and unused.
1495								mask = regMask(1) << r
1496								break
1497							}
1498						}
1499					}
1500					// Avoid registers we're saving for other values.
1501					if mask&^desired.avoid != 0 {
1502						mask &^= desired.avoid
1503					}
1504				}
1505				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1506			}
1507
1508			// If the output clobbers the input register, make sure we have
1509			// at least two copies of the input register so we don't
1510			// have to reload the value from the spill location.
1511			if opcodeTable[v.Op].resultInArg0 {
1512				var m regMask
1513				if !s.liveAfterCurrentInstruction(v.Args[0]) {
1514					// arg0 is dead.  We can clobber its register.
1515					goto ok
1516				}
1517				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1518					args[0], args[1] = args[1], args[0]
1519					goto ok
1520				}
1521				if s.values[v.Args[0].ID].rematerializeable {
1522					// We can rematerialize the input, don't worry about clobbering it.
1523					goto ok
1524				}
1525				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1526					args[0], args[1] = args[1], args[0]
1527					goto ok
1528				}
1529				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1530					// we have at least 2 copies of arg0.  We can afford to clobber one.
1531					goto ok
1532				}
1533				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1534					args[0], args[1] = args[1], args[0]
1535					goto ok
1536				}
1537
1538				// We can't overwrite arg0 (or arg1, if commutative).  So we
1539				// need to make a copy of an input so we have a register we can modify.
1540
1541				// Possible new registers to copy into.
1542				m = s.compatRegs(v.Args[0].Type) &^ s.used
1543				if m == 0 {
1544					// No free registers.  In this case we'll just clobber
1545					// an input and future uses of that input must use a restore.
1546					// TODO(khr): We should really do this like allocReg does it,
1547					// spilling the value with the most distant next use.
1548					goto ok
1549				}
1550
1551				// Try to move an input to the desired output, if allowed.
1552				for _, r := range dinfo[idx].out {
1553					if r != noRegister && (m&regspec.outputs[0].regs)>>r&1 != 0 {
1554						m = regMask(1) << r
1555						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1556						// Note: we update args[0] so the instruction will
1557						// use the register copy we just made.
1558						goto ok
1559					}
1560				}
1561				// Try to copy input to its desired location & use its old
1562				// location as the result register.
1563				for _, r := range dinfo[idx].in[0] {
1564					if r != noRegister && m>>r&1 != 0 {
1565						m = regMask(1) << r
1566						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1567						s.copies[c] = false
1568						// Note: no update to args[0] so the instruction will
1569						// use the original copy.
1570						goto ok
1571					}
1572				}
1573				if opcodeTable[v.Op].commutative {
1574					for _, r := range dinfo[idx].in[1] {
1575						if r != noRegister && m>>r&1 != 0 {
1576							m = regMask(1) << r
1577							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1578							s.copies[c] = false
1579							args[0], args[1] = args[1], args[0]
1580							goto ok
1581						}
1582					}
1583				}
1584
1585				// Avoid future fixed uses if we can.
1586				if m&^desired.avoid != 0 {
1587					m &^= desired.avoid
1588				}
1589				// Save input 0 to a new register so we can clobber it.
1590				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1591				s.copies[c] = false
1592
1593				// Normally we use the register of the old copy of input 0 as the target.
1594				// However, if input 0 is already in its desired register then we use
1595				// the register of the new copy instead.
1596				if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1597					if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1598						r := register(rp.num)
1599						for _, r2 := range dinfo[idx].in[0] {
1600							if r == r2 {
1601								args[0] = c
1602								break
1603							}
1604						}
1605					}
1606				}
1607			}
1608
1609		ok:
1610			// Pick a temporary register if needed.
1611			// It should be distinct from all the input registers, so we
1612			// allocate it after all the input registers, but before
1613			// the input registers are freed via advanceUses below.
1614			// (Not all instructions need that distinct part, but it is conservative.)
1615			if opcodeTable[v.Op].needIntTemp {
1616				m := s.allocatable & s.f.Config.gpRegMask
1617				if m&^desired.avoid&^s.nospill != 0 {
1618					m &^= desired.avoid
1619				}
1620				tmpReg = s.allocReg(m, &tmpVal)
1621				s.nospill |= regMask(1) << tmpReg
1622			}
1623
1624			// Now that all args are in regs, we're ready to issue the value itself.
1625			// Before we pick a register for the output value, allow input registers
1626			// to be deallocated. We do this here so that the output can use the
1627			// same register as a dying input.
1628			if !opcodeTable[v.Op].resultNotInArgs {
1629				s.tmpused = s.nospill
1630				s.nospill = 0
1631				s.advanceUses(v) // frees any registers holding args that are no longer live
1632			}
1633
1634			// Dump any registers which will be clobbered
1635			if s.doClobber && v.Op.IsCall() {
1636				// clobber registers that are marked as clobber in regmask, but
1637				// don't clobber inputs.
1638				s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1639			}
1640			s.freeRegs(regspec.clobbers)
1641			s.tmpused |= regspec.clobbers
1642
1643			// Pick registers for outputs.
1644			{
1645				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
1646				maxOutIdx := -1
1647				var used regMask
1648				if tmpReg != noRegister {
1649					// Ensure output registers are distinct from the temporary register.
1650					// (Not all instructions need that distinct part, but it is conservative.)
1651					used |= regMask(1) << tmpReg
1652				}
1653				for _, out := range regspec.outputs {
1654					mask := out.regs & s.allocatable &^ used
1655					if mask == 0 {
1656						continue
1657					}
1658					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1659						if !opcodeTable[v.Op].commutative {
1660							// Output must use the same register as input 0.
1661							r := register(s.f.getHome(args[0].ID).(*Register).num)
1662							if mask>>r&1 == 0 {
1663								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1664							}
1665							mask = regMask(1) << r
1666						} else {
1667							// Output must use the same register as input 0 or 1.
1668							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1669							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1670							// Check r0 and r1 for desired output register.
1671							found := false
1672							for _, r := range dinfo[idx].out {
1673								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1674									mask = regMask(1) << r
1675									found = true
1676									if r == r1 {
1677										args[0], args[1] = args[1], args[0]
1678									}
1679									break
1680								}
1681							}
1682							if !found {
1683								// Neither are desired, pick r0.
1684								mask = regMask(1) << r0
1685							}
1686						}
1687					}
1688					if out.idx == 0 { // desired registers only apply to the first element of a tuple result
1689						for _, r := range dinfo[idx].out {
1690							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1691								// Desired register is allowed and unused.
1692								mask = regMask(1) << r
1693								break
1694							}
1695						}
1696					}
1697					// Avoid registers we're saving for other values.
1698					if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1699						mask &^= desired.avoid
1700					}
1701					r := s.allocReg(mask, v)
1702					if out.idx > maxOutIdx {
1703						maxOutIdx = out.idx
1704					}
1705					outRegs[out.idx] = r
1706					used |= regMask(1) << r
1707					s.tmpused |= regMask(1) << r
1708				}
1709				// Record register choices
1710				if v.Type.IsTuple() {
1711					var outLocs LocPair
1712					if r := outRegs[0]; r != noRegister {
1713						outLocs[0] = &s.registers[r]
1714					}
1715					if r := outRegs[1]; r != noRegister {
1716						outLocs[1] = &s.registers[r]
1717					}
1718					s.f.setHome(v, outLocs)
1719					// Note that subsequent SelectX instructions will do the assignReg calls.
1720				} else if v.Type.IsResults() {
1721					// preallocate outLocs to the right size, which is maxOutIdx+1
1722					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1723					for i := 0; i <= maxOutIdx; i++ {
1724						if r := outRegs[i]; r != noRegister {
1725							outLocs[i] = &s.registers[r]
1726						}
1727					}
1728					s.f.setHome(v, outLocs)
1729				} else {
1730					if r := outRegs[0]; r != noRegister {
1731						s.assignReg(r, v, v)
1732					}
1733				}
1734				if tmpReg != noRegister {
1735					// Remember the temp register allocation, if any.
1736					if s.f.tempRegs == nil {
1737						s.f.tempRegs = map[ID]*Register{}
1738					}
1739					s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1740				}
1741			}
1742
1743			// deallocate dead args, if we have not done so
1744			if opcodeTable[v.Op].resultNotInArgs {
1745				s.nospill = 0
1746				s.advanceUses(v) // frees any registers holding args that are no longer live
1747			}
1748			s.tmpused = 0
1749
1750			// Issue the Value itself.
1751			for i, a := range args {
1752				v.SetArg(i, a) // use register version of arguments
1753			}
1754			b.Values = append(b.Values, v)
1755		}
1756
1757		// Copy the control values - we need this so we can reduce the
1758		// uses property of these values later.
1759		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1760
1761		// Load control values into registers.
1762		for i, v := range b.ControlValues() {
1763			if !s.values[v.ID].needReg {
1764				continue
1765			}
1766			if s.f.pass.debug > regDebug {
1767				fmt.Printf("  processing control %s\n", v.LongString())
1768			}
1769			// We assume that a control input can be passed in any
1770			// type-compatible register. If this turns out not to be true,
1771			// we'll need to introduce a regspec for a block's control value.
1772			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1773		}
1774
1775		// Reduce the uses of the control values once registers have been loaded.
1776		// This loop is equivalent to the advanceUses method.
1777		for _, v := range controls {
1778			vi := &s.values[v.ID]
1779			if !vi.needReg {
1780				continue
1781			}
1782			// Remove this use from the uses list.
1783			u := vi.uses
1784			vi.uses = u.next
1785			if u.next == nil {
1786				s.freeRegs(vi.regs) // value is dead
1787			}
1788			u.next = s.freeUseRecords
1789			s.freeUseRecords = u
1790		}
1791
1792		// If we are approaching a merge point and we are the primary
1793		// predecessor of it, find live values that we use soon after
1794		// the merge point and promote them to registers now.
1795		if len(b.Succs) == 1 {
1796			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1797				s.freeReg(s.GReg) // Spill value in G register before any merge.
1798			}
1799			// For this to be worthwhile, the loop must have no calls in it.
1800			top := b.Succs[0].b
1801			loop := s.loopnest.b2l[top.ID]
1802			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1803				goto badloop
1804			}
1805
1806			// TODO: sort by distance, pick the closest ones?
1807			for _, live := range s.live[b.ID] {
1808				if live.dist >= unlikelyDistance {
1809					// Don't preload anything live after the loop.
1810					continue
1811				}
1812				vid := live.ID
1813				vi := &s.values[vid]
1814				if vi.regs != 0 {
1815					continue
1816				}
1817				if vi.rematerializeable {
1818					continue
1819				}
1820				v := s.orig[vid]
1821				m := s.compatRegs(v.Type) &^ s.used
1822				// Used desired register if available.
1823			outerloop:
1824				for _, e := range desired.entries {
1825					if e.ID != v.ID {
1826						continue
1827					}
1828					for _, r := range e.regs {
1829						if r != noRegister && m>>r&1 != 0 {
1830							m = regMask(1) << r
1831							break outerloop
1832						}
1833					}
1834				}
1835				if m&^desired.avoid != 0 {
1836					m &^= desired.avoid
1837				}
1838				if m != 0 {
1839					s.allocValToReg(v, m, false, b.Pos)
1840				}
1841			}
1842		}
1843	badloop:
1844		;
1845
1846		// Save end-of-block register state.
1847		// First count how many, this cuts allocations in half.
1848		k := 0
1849		for r := register(0); r < s.numRegs; r++ {
1850			v := s.regs[r].v
1851			if v == nil {
1852				continue
1853			}
1854			k++
1855		}
1856		regList := make([]endReg, 0, k)
1857		for r := register(0); r < s.numRegs; r++ {
1858			v := s.regs[r].v
1859			if v == nil {
1860				continue
1861			}
1862			regList = append(regList, endReg{r, v, s.regs[r].c})
1863		}
1864		s.endRegs[b.ID] = regList
1865
1866		if checkEnabled {
1867			regValLiveSet.clear()
1868			for _, x := range s.live[b.ID] {
1869				regValLiveSet.add(x.ID)
1870			}
1871			for r := register(0); r < s.numRegs; r++ {
1872				v := s.regs[r].v
1873				if v == nil {
1874					continue
1875				}
1876				if !regValLiveSet.contains(v.ID) {
1877					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
1878				}
1879			}
1880		}
1881
1882		// If a value is live at the end of the block and
1883		// isn't in a register, generate a use for the spill location.
1884		// We need to remember this information so that
1885		// the liveness analysis in stackalloc is correct.
1886		for _, e := range s.live[b.ID] {
1887			vi := &s.values[e.ID]
1888			if vi.regs != 0 {
1889				// in a register, we'll use that source for the merge.
1890				continue
1891			}
1892			if vi.rematerializeable {
1893				// we'll rematerialize during the merge.
1894				continue
1895			}
1896			if s.f.pass.debug > regDebug {
1897				fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
1898			}
1899			spill := s.makeSpill(s.orig[e.ID], b)
1900			s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
1901		}
1902
1903		// Clear any final uses.
1904		// All that is left should be the pseudo-uses added for values which
1905		// are live at the end of b.
1906		for _, e := range s.live[b.ID] {
1907			u := s.values[e.ID].uses
1908			if u == nil {
1909				f.Fatalf("live at end, no uses v%d", e.ID)
1910			}
1911			if u.next != nil {
1912				f.Fatalf("live at end, too many uses v%d", e.ID)
1913			}
1914			s.values[e.ID].uses = nil
1915			u.next = s.freeUseRecords
1916			s.freeUseRecords = u
1917		}
1918
1919		// allocReg may have dropped registers from startRegsMask that
1920		// aren't actually needed in startRegs. Synchronize back to
1921		// startRegs.
1922		//
1923		// This must be done before placing spills, which will look at
1924		// startRegs to decide if a block is a valid block for a spill.
1925		if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
1926			regs := make([]startReg, 0, c)
1927			for _, sr := range s.startRegs[b.ID] {
1928				if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
1929					continue
1930				}
1931				regs = append(regs, sr)
1932			}
1933			s.startRegs[b.ID] = regs
1934		}
1935	}
1936
1937	// Decide where the spills we generated will go.
1938	s.placeSpills()
1939
1940	// Anything that didn't get a register gets a stack location here.
1941	// (StoreReg, stack-based phis, inputs, ...)
1942	stacklive := stackalloc(s.f, s.spillLive)
1943
1944	// Fix up all merge edges.
1945	s.shuffle(stacklive)
1946
1947	// Erase any copies we never used.
1948	// Also, an unused copy might be the only use of another copy,
1949	// so continue erasing until we reach a fixed point.
1950	for {
1951		progress := false
1952		for c, used := range s.copies {
1953			if !used && c.Uses == 0 {
1954				if s.f.pass.debug > regDebug {
1955					fmt.Printf("delete copied value %s\n", c.LongString())
1956				}
1957				c.resetArgs()
1958				f.freeValue(c)
1959				delete(s.copies, c)
1960				progress = true
1961			}
1962		}
1963		if !progress {
1964			break
1965		}
1966	}
1967
1968	for _, b := range s.visitOrder {
1969		i := 0
1970		for _, v := range b.Values {
1971			if v.Op == OpInvalid {
1972				continue
1973			}
1974			b.Values[i] = v
1975			i++
1976		}
1977		b.Values = b.Values[:i]
1978	}
1979}
1980
1981func (s *regAllocState) placeSpills() {
1982	mustBeFirst := func(op Op) bool {
1983		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
1984	}
1985
1986	// Start maps block IDs to the list of spills
1987	// that go at the start of the block (but after any phis).
1988	start := map[ID][]*Value{}
1989	// After maps value IDs to the list of spills
1990	// that go immediately after that value ID.
1991	after := map[ID][]*Value{}
1992
1993	for i := range s.values {
1994		vi := s.values[i]
1995		spill := vi.spill
1996		if spill == nil {
1997			continue
1998		}
1999		if spill.Block != nil {
2000			// Some spills are already fully set up,
2001			// like OpArgs and stack-based phis.
2002			continue
2003		}
2004		v := s.orig[i]
2005
2006		// Walk down the dominator tree looking for a good place to
2007		// put the spill of v.  At the start "best" is the best place
2008		// we have found so far.
2009		// TODO: find a way to make this O(1) without arbitrary cutoffs.
2010		if v == nil {
2011			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2012		}
2013		best := v.Block
2014		bestArg := v
2015		var bestDepth int16
2016		if l := s.loopnest.b2l[best.ID]; l != nil {
2017			bestDepth = l.depth
2018		}
2019		b := best
2020		const maxSpillSearch = 100
2021		for i := 0; i < maxSpillSearch; i++ {
2022			// Find the child of b in the dominator tree which
2023			// dominates all restores.
2024			p := b
2025			b = nil
2026			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2027				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2028					// c also dominates all restores.  Walk down into c.
2029					b = c
2030					break
2031				}
2032			}
2033			if b == nil {
2034				// Ran out of blocks which dominate all restores.
2035				break
2036			}
2037
2038			var depth int16
2039			if l := s.loopnest.b2l[b.ID]; l != nil {
2040				depth = l.depth
2041			}
2042			if depth > bestDepth {
2043				// Don't push the spill into a deeper loop.
2044				continue
2045			}
2046
2047			// If v is in a register at the start of b, we can
2048			// place the spill here (after the phis).
2049			if len(b.Preds) == 1 {
2050				for _, e := range s.endRegs[b.Preds[0].b.ID] {
2051					if e.v == v {
2052						// Found a better spot for the spill.
2053						best = b
2054						bestArg = e.c
2055						bestDepth = depth
2056						break
2057					}
2058				}
2059			} else {
2060				for _, e := range s.startRegs[b.ID] {
2061					if e.v == v {
2062						// Found a better spot for the spill.
2063						best = b
2064						bestArg = e.c
2065						bestDepth = depth
2066						break
2067					}
2068				}
2069			}
2070		}
2071
2072		// Put the spill in the best block we found.
2073		spill.Block = best
2074		spill.AddArg(bestArg)
2075		if best == v.Block && !mustBeFirst(v.Op) {
2076			// Place immediately after v.
2077			after[v.ID] = append(after[v.ID], spill)
2078		} else {
2079			// Place at the start of best block.
2080			start[best.ID] = append(start[best.ID], spill)
2081		}
2082	}
2083
2084	// Insert spill instructions into the block schedules.
2085	var oldSched []*Value
2086	for _, b := range s.visitOrder {
2087		nfirst := 0
2088		for _, v := range b.Values {
2089			if !mustBeFirst(v.Op) {
2090				break
2091			}
2092			nfirst++
2093		}
2094		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2095		b.Values = b.Values[:nfirst]
2096		b.Values = append(b.Values, start[b.ID]...)
2097		for _, v := range oldSched {
2098			b.Values = append(b.Values, v)
2099			b.Values = append(b.Values, after[v.ID]...)
2100		}
2101	}
2102}
2103
2104// shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
2105func (s *regAllocState) shuffle(stacklive [][]ID) {
2106	var e edgeState
2107	e.s = s
2108	e.cache = map[ID][]*Value{}
2109	e.contents = map[Location]contentRecord{}
2110	if s.f.pass.debug > regDebug {
2111		fmt.Printf("shuffle %s\n", s.f.Name)
2112		fmt.Println(s.f.String())
2113	}
2114
2115	for _, b := range s.visitOrder {
2116		if len(b.Preds) <= 1 {
2117			continue
2118		}
2119		e.b = b
2120		for i, edge := range b.Preds {
2121			p := edge.b
2122			e.p = p
2123			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2124			e.process()
2125		}
2126	}
2127
2128	if s.f.pass.debug > regDebug {
2129		fmt.Printf("post shuffle %s\n", s.f.Name)
2130		fmt.Println(s.f.String())
2131	}
2132}
2133
2134type edgeState struct {
2135	s    *regAllocState
2136	p, b *Block // edge goes from p->b.
2137
2138	// for each pre-regalloc value, a list of equivalent cached values
2139	cache      map[ID][]*Value
2140	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
2141
2142	// map from location to the value it contains
2143	contents map[Location]contentRecord
2144
2145	// desired destination locations
2146	destinations []dstRecord
2147	extra        []dstRecord
2148
2149	usedRegs              regMask // registers currently holding something
2150	uniqueRegs            regMask // registers holding the only copy of a value
2151	finalRegs             regMask // registers holding final target
2152	rematerializeableRegs regMask // registers that hold rematerializeable values
2153}
2154
2155type contentRecord struct {
2156	vid   ID       // pre-regalloc value
2157	c     *Value   // cached value
2158	final bool     // this is a satisfied destination
2159	pos   src.XPos // source position of use of the value
2160}
2161
2162type dstRecord struct {
2163	loc    Location // register or stack slot
2164	vid    ID       // pre-regalloc value it should contain
2165	splice **Value  // place to store reference to the generating instruction
2166	pos    src.XPos // source position of use of this location
2167}
2168
2169// setup initializes the edge state for shuffling.
2170func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2171	if e.s.f.pass.debug > regDebug {
2172		fmt.Printf("edge %s->%s\n", e.p, e.b)
2173	}
2174
2175	// Clear state.
2176	for _, vid := range e.cachedVals {
2177		delete(e.cache, vid)
2178	}
2179	e.cachedVals = e.cachedVals[:0]
2180	for k := range e.contents {
2181		delete(e.contents, k)
2182	}
2183	e.usedRegs = 0
2184	e.uniqueRegs = 0
2185	e.finalRegs = 0
2186	e.rematerializeableRegs = 0
2187
2188	// Live registers can be sources.
2189	for _, x := range srcReg {
2190		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
2191	}
2192	// So can all of the spill locations.
2193	for _, spillID := range stacklive {
2194		v := e.s.orig[spillID]
2195		spill := e.s.values[v.ID].spill
2196		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2197			// Spills were placed that only dominate the uses found
2198			// during the first regalloc pass. The edge fixup code
2199			// can't use a spill location if the spill doesn't dominate
2200			// the edge.
2201			// We are guaranteed that if the spill doesn't dominate this edge,
2202			// then the value is available in a register (because we called
2203			// makeSpill for every value not in a register at the start
2204			// of an edge).
2205			continue
2206		}
2207		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
2208	}
2209
2210	// Figure out all the destinations we need.
2211	dsts := e.destinations[:0]
2212	for _, x := range dstReg {
2213		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2214	}
2215	// Phis need their args to end up in a specific location.
2216	for _, v := range e.b.Values {
2217		if v.Op != OpPhi {
2218			break
2219		}
2220		loc := e.s.f.getHome(v.ID)
2221		if loc == nil {
2222			continue
2223		}
2224		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2225	}
2226	e.destinations = dsts
2227
2228	if e.s.f.pass.debug > regDebug {
2229		for _, vid := range e.cachedVals {
2230			a := e.cache[vid]
2231			for _, c := range a {
2232				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2233			}
2234		}
2235		for _, d := range e.destinations {
2236			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2237		}
2238	}
2239}
2240
2241// process generates code to move all the values to the right destination locations.
2242func (e *edgeState) process() {
2243	dsts := e.destinations
2244
2245	// Process the destinations until they are all satisfied.
2246	for len(dsts) > 0 {
2247		i := 0
2248		for _, d := range dsts {
2249			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2250				// Failed - save for next iteration.
2251				dsts[i] = d
2252				i++
2253			}
2254		}
2255		if i < len(dsts) {
2256			// Made some progress. Go around again.
2257			dsts = dsts[:i]
2258
2259			// Append any extras destinations we generated.
2260			dsts = append(dsts, e.extra...)
2261			e.extra = e.extra[:0]
2262			continue
2263		}
2264
2265		// We made no progress. That means that any
2266		// remaining unsatisfied moves are in simple cycles.
2267		// For example, A -> B -> C -> D -> A.
2268		//   A ----> B
2269		//   ^       |
2270		//   |       |
2271		//   |       v
2272		//   D <---- C
2273
2274		// To break the cycle, we pick an unused register, say R,
2275		// and put a copy of B there.
2276		//   A ----> B
2277		//   ^       |
2278		//   |       |
2279		//   |       v
2280		//   D <---- C <---- R=copyofB
2281		// When we resume the outer loop, the A->B move can now proceed,
2282		// and eventually the whole cycle completes.
2283
2284		// Copy any cycle location to a temp register. This duplicates
2285		// one of the cycle entries, allowing the just duplicated value
2286		// to be overwritten and the cycle to proceed.
2287		d := dsts[0]
2288		loc := d.loc
2289		vid := e.contents[loc].vid
2290		c := e.contents[loc].c
2291		r := e.findRegFor(c.Type)
2292		if e.s.f.pass.debug > regDebug {
2293			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2294		}
2295		e.erase(r)
2296		pos := d.pos.WithNotStmt()
2297		if _, isReg := loc.(*Register); isReg {
2298			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2299		} else {
2300			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2301		}
2302		e.set(r, vid, c, false, pos)
2303		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2304			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2305		}
2306	}
2307}
2308
2309// processDest generates code to put value vid into location loc. Returns true
2310// if progress was made.
2311func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2312	pos = pos.WithNotStmt()
2313	occupant := e.contents[loc]
2314	if occupant.vid == vid {
2315		// Value is already in the correct place.
2316		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2317		if splice != nil {
2318			(*splice).Uses--
2319			*splice = occupant.c
2320			occupant.c.Uses++
2321		}
2322		// Note: if splice==nil then c will appear dead. This is
2323		// non-SSA formed code, so be careful after this pass not to run
2324		// deadcode elimination.
2325		if _, ok := e.s.copies[occupant.c]; ok {
2326			// The copy at occupant.c was used to avoid spill.
2327			e.s.copies[occupant.c] = true
2328		}
2329		return true
2330	}
2331
2332	// Check if we're allowed to clobber the destination location.
2333	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2334		// We can't overwrite the last copy
2335		// of a value that needs to survive.
2336		return false
2337	}
2338
2339	// Copy from a source of v, register preferred.
2340	v := e.s.orig[vid]
2341	var c *Value
2342	var src Location
2343	if e.s.f.pass.debug > regDebug {
2344		fmt.Printf("moving v%d to %s\n", vid, loc)
2345		fmt.Printf("sources of v%d:", vid)
2346	}
2347	for _, w := range e.cache[vid] {
2348		h := e.s.f.getHome(w.ID)
2349		if e.s.f.pass.debug > regDebug {
2350			fmt.Printf(" %s:%s", h, w)
2351		}
2352		_, isreg := h.(*Register)
2353		if src == nil || isreg {
2354			c = w
2355			src = h
2356		}
2357	}
2358	if e.s.f.pass.debug > regDebug {
2359		if src != nil {
2360			fmt.Printf(" [use %s]\n", src)
2361		} else {
2362			fmt.Printf(" [no source]\n")
2363		}
2364	}
2365	_, dstReg := loc.(*Register)
2366
2367	// Pre-clobber destination. This avoids the
2368	// following situation:
2369	//   - v is currently held in R0 and stacktmp0.
2370	//   - We want to copy stacktmp1 to stacktmp0.
2371	//   - We choose R0 as the temporary register.
2372	// During the copy, both R0 and stacktmp0 are
2373	// clobbered, losing both copies of v. Oops!
2374	// Erasing the destination early means R0 will not
2375	// be chosen as the temp register, as it will then
2376	// be the last copy of v.
2377	e.erase(loc)
2378	var x *Value
2379	if c == nil || e.s.values[vid].rematerializeable {
2380		if !e.s.values[vid].rematerializeable {
2381			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2382		}
2383		if dstReg {
2384			x = v.copyInto(e.p)
2385		} else {
2386			// Rematerialize into stack slot. Need a free
2387			// register to accomplish this.
2388			r := e.findRegFor(v.Type)
2389			e.erase(r)
2390			x = v.copyIntoWithXPos(e.p, pos)
2391			e.set(r, vid, x, false, pos)
2392			// Make sure we spill with the size of the slot, not the
2393			// size of x (which might be wider due to our dropping
2394			// of narrowing conversions).
2395			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2396		}
2397	} else {
2398		// Emit move from src to dst.
2399		_, srcReg := src.(*Register)
2400		if srcReg {
2401			if dstReg {
2402				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2403			} else {
2404				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2405			}
2406		} else {
2407			if dstReg {
2408				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2409			} else {
2410				// mem->mem. Use temp register.
2411				r := e.findRegFor(c.Type)
2412				e.erase(r)
2413				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2414				e.set(r, vid, t, false, pos)
2415				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2416			}
2417		}
2418	}
2419	e.set(loc, vid, x, true, pos)
2420	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2421		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2422	}
2423	if splice != nil {
2424		(*splice).Uses--
2425		*splice = x
2426		x.Uses++
2427	}
2428	return true
2429}
2430
2431// set changes the contents of location loc to hold the given value and its cached representative.
2432func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2433	e.s.f.setHome(c, loc)
2434	e.contents[loc] = contentRecord{vid, c, final, pos}
2435	a := e.cache[vid]
2436	if len(a) == 0 {
2437		e.cachedVals = append(e.cachedVals, vid)
2438	}
2439	a = append(a, c)
2440	e.cache[vid] = a
2441	if r, ok := loc.(*Register); ok {
2442		if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2443			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2444		}
2445		e.usedRegs |= regMask(1) << uint(r.num)
2446		if final {
2447			e.finalRegs |= regMask(1) << uint(r.num)
2448		}
2449		if len(a) == 1 {
2450			e.uniqueRegs |= regMask(1) << uint(r.num)
2451		}
2452		if len(a) == 2 {
2453			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2454				e.uniqueRegs &^= regMask(1) << uint(t.num)
2455			}
2456		}
2457		if e.s.values[vid].rematerializeable {
2458			e.rematerializeableRegs |= regMask(1) << uint(r.num)
2459		}
2460	}
2461	if e.s.f.pass.debug > regDebug {
2462		fmt.Printf("%s\n", c.LongString())
2463		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2464	}
2465}
2466
2467// erase removes any user of loc.
2468func (e *edgeState) erase(loc Location) {
2469	cr := e.contents[loc]
2470	if cr.c == nil {
2471		return
2472	}
2473	vid := cr.vid
2474
2475	if cr.final {
2476		// Add a destination to move this value back into place.
2477		// Make sure it gets added to the tail of the destination queue
2478		// so we make progress on other moves first.
2479		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2480	}
2481
2482	// Remove c from the list of cached values.
2483	a := e.cache[vid]
2484	for i, c := range a {
2485		if e.s.f.getHome(c.ID) == loc {
2486			if e.s.f.pass.debug > regDebug {
2487				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2488			}
2489			a[i], a = a[len(a)-1], a[:len(a)-1]
2490			break
2491		}
2492	}
2493	e.cache[vid] = a
2494
2495	// Update register masks.
2496	if r, ok := loc.(*Register); ok {
2497		e.usedRegs &^= regMask(1) << uint(r.num)
2498		if cr.final {
2499			e.finalRegs &^= regMask(1) << uint(r.num)
2500		}
2501		e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2502	}
2503	if len(a) == 1 {
2504		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2505			e.uniqueRegs |= regMask(1) << uint(r.num)
2506		}
2507	}
2508}
2509
2510// findRegFor finds a register we can use to make a temp copy of type typ.
2511func (e *edgeState) findRegFor(typ *types.Type) Location {
2512	// Which registers are possibilities.
2513	types := &e.s.f.Config.Types
2514	m := e.s.compatRegs(typ)
2515
2516	// Pick a register. In priority order:
2517	// 1) an unused register
2518	// 2) a non-unique register not holding a final value
2519	// 3) a non-unique register
2520	// 4) a register holding a rematerializeable value
2521	x := m &^ e.usedRegs
2522	if x != 0 {
2523		return &e.s.registers[pickReg(x)]
2524	}
2525	x = m &^ e.uniqueRegs &^ e.finalRegs
2526	if x != 0 {
2527		return &e.s.registers[pickReg(x)]
2528	}
2529	x = m &^ e.uniqueRegs
2530	if x != 0 {
2531		return &e.s.registers[pickReg(x)]
2532	}
2533	x = m & e.rematerializeableRegs
2534	if x != 0 {
2535		return &e.s.registers[pickReg(x)]
2536	}
2537
2538	// No register is available.
2539	// Pick a register to spill.
2540	for _, vid := range e.cachedVals {
2541		a := e.cache[vid]
2542		for _, c := range a {
2543			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2544				if !c.rematerializeable() {
2545					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2546					// Allocate a temp location to spill a register to.
2547					// The type of the slot is immaterial - it will not be live across
2548					// any safepoint. Just use a type big enough to hold any register.
2549					t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
2550					// TODO: reuse these slots. They'll need to be erased first.
2551					e.set(t, vid, x, false, c.Pos)
2552					if e.s.f.pass.debug > regDebug {
2553						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
2554					}
2555				}
2556				// r will now be overwritten by the caller. At some point
2557				// later, the newly saved value will be moved back to its
2558				// final destination in processDest.
2559				return r
2560			}
2561		}
2562	}
2563
2564	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2565	for _, vid := range e.cachedVals {
2566		a := e.cache[vid]
2567		for _, c := range a {
2568			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2569		}
2570	}
2571	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2572	return nil
2573}
2574
2575// rematerializeable reports whether the register allocator should recompute
2576// a value instead of spilling/restoring it.
2577func (v *Value) rematerializeable() bool {
2578	if !opcodeTable[v.Op].rematerializeable {
2579		return false
2580	}
2581	for _, a := range v.Args {
2582		// SP and SB (generated by OpSP and OpSB) are always available.
2583		if a.Op != OpSP && a.Op != OpSB {
2584			return false
2585		}
2586	}
2587	return true
2588}
2589
2590type liveInfo struct {
2591	ID   ID       // ID of value
2592	dist int32    // # of instructions before next use
2593	pos  src.XPos // source position of next use
2594}
2595
2596// computeLive computes a map from block ID to a list of value IDs live at the end
2597// of that block. Together with the value ID is a count of how many instructions
2598// to the next use of that value. The resulting map is stored in s.live.
2599// computeLive also computes the desired register information at the end of each block.
2600// This desired register information is stored in s.desired.
2601// TODO: this could be quadratic if lots of variables are live across lots of
2602// basic blocks. Figure out a way to make this function (or, more precisely, the user
2603// of this function) require only linear size & time.
2604func (s *regAllocState) computeLive() {
2605	f := s.f
2606	s.live = make([][]liveInfo, f.NumBlocks())
2607	s.desired = make([]desiredState, f.NumBlocks())
2608	var phis []*Value
2609
2610	live := f.newSparseMapPos(f.NumValues())
2611	defer f.retSparseMapPos(live)
2612	t := f.newSparseMapPos(f.NumValues())
2613	defer f.retSparseMapPos(t)
2614
2615	// Keep track of which value we want in each register.
2616	var desired desiredState
2617
2618	// Instead of iterating over f.Blocks, iterate over their postordering.
2619	// Liveness information flows backward, so starting at the end
2620	// increases the probability that we will stabilize quickly.
2621	// TODO: Do a better job yet. Here's one possibility:
2622	// Calculate the dominator tree and locate all strongly connected components.
2623	// If a value is live in one block of an SCC, it is live in all.
2624	// Walk the dominator tree from end to beginning, just once, treating SCC
2625	// components as single blocks, duplicated calculated liveness information
2626	// out to all of them.
2627	po := f.postorder()
2628	s.loopnest = f.loopnest()
2629	s.loopnest.calculateDepths()
2630	for {
2631		changed := false
2632
2633		for _, b := range po {
2634			// Start with known live values at the end of the block.
2635			// Add len(b.Values) to adjust from end-of-block distance
2636			// to beginning-of-block distance.
2637			live.clear()
2638			for _, e := range s.live[b.ID] {
2639				live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2640			}
2641
2642			// Mark control values as live
2643			for _, c := range b.ControlValues() {
2644				if s.values[c.ID].needReg {
2645					live.set(c.ID, int32(len(b.Values)), b.Pos)
2646				}
2647			}
2648
2649			// Propagate backwards to the start of the block
2650			// Assumes Values have been scheduled.
2651			phis = phis[:0]
2652			for i := len(b.Values) - 1; i >= 0; i-- {
2653				v := b.Values[i]
2654				live.remove(v.ID)
2655				if v.Op == OpPhi {
2656					// save phi ops for later
2657					phis = append(phis, v)
2658					continue
2659				}
2660				if opcodeTable[v.Op].call {
2661					c := live.contents()
2662					for i := range c {
2663						c[i].val += unlikelyDistance
2664					}
2665				}
2666				for _, a := range v.Args {
2667					if s.values[a.ID].needReg {
2668						live.set(a.ID, int32(i), v.Pos)
2669					}
2670				}
2671			}
2672			// Propagate desired registers backwards.
2673			desired.copy(&s.desired[b.ID])
2674			for i := len(b.Values) - 1; i >= 0; i-- {
2675				v := b.Values[i]
2676				prefs := desired.remove(v.ID)
2677				if v.Op == OpPhi {
2678					// TODO: if v is a phi, save desired register for phi inputs.
2679					// For now, we just drop it and don't propagate
2680					// desired registers back though phi nodes.
2681					continue
2682				}
2683				regspec := s.regspec(v)
2684				// Cancel desired registers if they get clobbered.
2685				desired.clobber(regspec.clobbers)
2686				// Update desired registers if there are any fixed register inputs.
2687				for _, j := range regspec.inputs {
2688					if countRegs(j.regs) != 1 {
2689						continue
2690					}
2691					desired.clobber(j.regs)
2692					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2693				}
2694				// Set desired register of input 0 if this is a 2-operand instruction.
2695				if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
2696					// ADDQconst is added here because we want to treat it as resultInArg0 for
2697					// the purposes of desired registers, even though it is not an absolute requirement.
2698					// This is because we'd rather implement it as ADDQ instead of LEAQ.
2699					// Same for ADDLconst
2700					// Select0 is added here to propagate the desired register to the tuple-generating instruction.
2701					if opcodeTable[v.Op].commutative {
2702						desired.addList(v.Args[1].ID, prefs)
2703					}
2704					desired.addList(v.Args[0].ID, prefs)
2705				}
2706			}
2707
2708			// For each predecessor of b, expand its list of live-at-end values.
2709			// invariant: live contains the values live at the start of b (excluding phi inputs)
2710			for i, e := range b.Preds {
2711				p := e.b
2712				// Compute additional distance for the edge.
2713				// Note: delta must be at least 1 to distinguish the control
2714				// value use from the first user in a successor block.
2715				delta := int32(normalDistance)
2716				if len(p.Succs) == 2 {
2717					if p.Succs[0].b == b && p.Likely == BranchLikely ||
2718						p.Succs[1].b == b && p.Likely == BranchUnlikely {
2719						delta = likelyDistance
2720					}
2721					if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2722						p.Succs[1].b == b && p.Likely == BranchLikely {
2723						delta = unlikelyDistance
2724					}
2725				}
2726
2727				// Update any desired registers at the end of p.
2728				s.desired[p.ID].merge(&desired)
2729
2730				// Start t off with the previously known live values at the end of p.
2731				t.clear()
2732				for _, e := range s.live[p.ID] {
2733					t.set(e.ID, e.dist, e.pos)
2734				}
2735				update := false
2736
2737				// Add new live values from scanning this block.
2738				for _, e := range live.contents() {
2739					d := e.val + delta
2740					if !t.contains(e.key) || d < t.get(e.key) {
2741						update = true
2742						t.set(e.key, d, e.pos)
2743					}
2744				}
2745				// Also add the correct arg from the saved phi values.
2746				// All phis are at distance delta (we consider them
2747				// simultaneously happening at the start of the block).
2748				for _, v := range phis {
2749					id := v.Args[i].ID
2750					if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2751						update = true
2752						t.set(id, delta, v.Pos)
2753					}
2754				}
2755
2756				if !update {
2757					continue
2758				}
2759				// The live set has changed, update it.
2760				l := s.live[p.ID][:0]
2761				if cap(l) < t.size() {
2762					l = make([]liveInfo, 0, t.size())
2763				}
2764				for _, e := range t.contents() {
2765					l = append(l, liveInfo{e.key, e.val, e.pos})
2766				}
2767				s.live[p.ID] = l
2768				changed = true
2769			}
2770		}
2771
2772		if !changed {
2773			break
2774		}
2775	}
2776	if f.pass.debug > regDebug {
2777		fmt.Println("live values at end of each block")
2778		for _, b := range f.Blocks {
2779			fmt.Printf("  %s:", b)
2780			for _, x := range s.live[b.ID] {
2781				fmt.Printf(" v%d(%d)", x.ID, x.dist)
2782				for _, e := range s.desired[b.ID].entries {
2783					if e.ID != x.ID {
2784						continue
2785					}
2786					fmt.Printf("[")
2787					first := true
2788					for _, r := range e.regs {
2789						if r == noRegister {
2790							continue
2791						}
2792						if !first {
2793							fmt.Printf(",")
2794						}
2795						fmt.Print(&s.registers[r])
2796						first = false
2797					}
2798					fmt.Printf("]")
2799				}
2800			}
2801			if avoid := s.desired[b.ID].avoid; avoid != 0 {
2802				fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2803			}
2804			fmt.Println()
2805		}
2806	}
2807}
2808
2809// A desiredState represents desired register assignments.
2810type desiredState struct {
2811	// Desired assignments will be small, so we just use a list
2812	// of valueID+registers entries.
2813	entries []desiredStateEntry
2814	// Registers that other values want to be in.  This value will
2815	// contain at least the union of the regs fields of entries, but
2816	// may contain additional entries for values that were once in
2817	// this data structure but are no longer.
2818	avoid regMask
2819}
2820type desiredStateEntry struct {
2821	// (pre-regalloc) value
2822	ID ID
2823	// Registers it would like to be in, in priority order.
2824	// Unused slots are filled with noRegister.
2825	// For opcodes that return tuples, we track desired registers only
2826	// for the first element of the tuple.
2827	regs [4]register
2828}
2829
2830func (d *desiredState) clear() {
2831	d.entries = d.entries[:0]
2832	d.avoid = 0
2833}
2834
2835// get returns a list of desired registers for value vid.
2836func (d *desiredState) get(vid ID) [4]register {
2837	for _, e := range d.entries {
2838		if e.ID == vid {
2839			return e.regs
2840		}
2841	}
2842	return [4]register{noRegister, noRegister, noRegister, noRegister}
2843}
2844
2845// add records that we'd like value vid to be in register r.
2846func (d *desiredState) add(vid ID, r register) {
2847	d.avoid |= regMask(1) << r
2848	for i := range d.entries {
2849		e := &d.entries[i]
2850		if e.ID != vid {
2851			continue
2852		}
2853		if e.regs[0] == r {
2854			// Already known and highest priority
2855			return
2856		}
2857		for j := 1; j < len(e.regs); j++ {
2858			if e.regs[j] == r {
2859				// Move from lower priority to top priority
2860				copy(e.regs[1:], e.regs[:j])
2861				e.regs[0] = r
2862				return
2863			}
2864		}
2865		copy(e.regs[1:], e.regs[:])
2866		e.regs[0] = r
2867		return
2868	}
2869	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
2870}
2871
2872func (d *desiredState) addList(vid ID, regs [4]register) {
2873	// regs is in priority order, so iterate in reverse order.
2874	for i := len(regs) - 1; i >= 0; i-- {
2875		r := regs[i]
2876		if r != noRegister {
2877			d.add(vid, r)
2878		}
2879	}
2880}
2881
2882// clobber erases any desired registers in the set m.
2883func (d *desiredState) clobber(m regMask) {
2884	for i := 0; i < len(d.entries); {
2885		e := &d.entries[i]
2886		j := 0
2887		for _, r := range e.regs {
2888			if r != noRegister && m>>r&1 == 0 {
2889				e.regs[j] = r
2890				j++
2891			}
2892		}
2893		if j == 0 {
2894			// No more desired registers for this value.
2895			d.entries[i] = d.entries[len(d.entries)-1]
2896			d.entries = d.entries[:len(d.entries)-1]
2897			continue
2898		}
2899		for ; j < len(e.regs); j++ {
2900			e.regs[j] = noRegister
2901		}
2902		i++
2903	}
2904	d.avoid &^= m
2905}
2906
2907// copy copies a desired state from another desiredState x.
2908func (d *desiredState) copy(x *desiredState) {
2909	d.entries = append(d.entries[:0], x.entries...)
2910	d.avoid = x.avoid
2911}
2912
2913// remove removes the desired registers for vid and returns them.
2914func (d *desiredState) remove(vid ID) [4]register {
2915	for i := range d.entries {
2916		if d.entries[i].ID == vid {
2917			regs := d.entries[i].regs
2918			d.entries[i] = d.entries[len(d.entries)-1]
2919			d.entries = d.entries[:len(d.entries)-1]
2920			return regs
2921		}
2922	}
2923	return [4]register{noRegister, noRegister, noRegister, noRegister}
2924}
2925
2926// merge merges another desired state x into d.
2927func (d *desiredState) merge(x *desiredState) {
2928	d.avoid |= x.avoid
2929	// There should only be a few desired registers, so
2930	// linear insert is ok.
2931	for _, e := range x.entries {
2932		d.addList(e.ID, e.regs)
2933	}
2934}
2935
2936func min32(x, y int32) int32 {
2937	if x < y {
2938		return x
2939	}
2940	return y
2941}
2942func max32(x, y int32) int32 {
2943	if x > y {
2944		return x
2945	}
2946	return y
2947}
2948