1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package amd64
6
7import (
8	"fmt"
9	"internal/buildcfg"
10	"math"
11
12	"cmd/compile/internal/base"
13	"cmd/compile/internal/ir"
14	"cmd/compile/internal/logopt"
15	"cmd/compile/internal/objw"
16	"cmd/compile/internal/ssa"
17	"cmd/compile/internal/ssagen"
18	"cmd/compile/internal/types"
19	"cmd/internal/obj"
20	"cmd/internal/obj/x86"
21)
22
23// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
24func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
25	flive := b.FlagsLiveAtEnd
26	for _, c := range b.ControlValues() {
27		flive = c.Type.IsFlags() || flive
28	}
29	for i := len(b.Values) - 1; i >= 0; i-- {
30		v := b.Values[i]
31		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
32			// The "mark" is any non-nil Aux value.
33			v.Aux = ssa.AuxMark
34		}
35		if v.Type.IsFlags() {
36			flive = false
37		}
38		for _, a := range v.Args {
39			if a.Type.IsFlags() {
40				flive = true
41			}
42		}
43	}
44}
45
46// loadByType returns the load instruction of the given type.
47func loadByType(t *types.Type) obj.As {
48	// Avoid partial register write
49	if !t.IsFloat() {
50		switch t.Size() {
51		case 1:
52			return x86.AMOVBLZX
53		case 2:
54			return x86.AMOVWLZX
55		}
56	}
57	// Otherwise, there's no difference between load and store opcodes.
58	return storeByType(t)
59}
60
61// storeByType returns the store instruction of the given type.
62func storeByType(t *types.Type) obj.As {
63	width := t.Size()
64	if t.IsFloat() {
65		switch width {
66		case 4:
67			return x86.AMOVSS
68		case 8:
69			return x86.AMOVSD
70		}
71	} else {
72		switch width {
73		case 1:
74			return x86.AMOVB
75		case 2:
76			return x86.AMOVW
77		case 4:
78			return x86.AMOVL
79		case 8:
80			return x86.AMOVQ
81		case 16:
82			return x86.AMOVUPS
83		}
84	}
85	panic(fmt.Sprintf("bad store type %v", t))
86}
87
88// moveByType returns the reg->reg move instruction of the given type.
89func moveByType(t *types.Type) obj.As {
90	if t.IsFloat() {
91		// Moving the whole sse2 register is faster
92		// than moving just the correct low portion of it.
93		// There is no xmm->xmm move with 1 byte opcode,
94		// so use movups, which has 2 byte opcode.
95		return x86.AMOVUPS
96	} else {
97		switch t.Size() {
98		case 1:
99			// Avoids partial register write
100			return x86.AMOVL
101		case 2:
102			return x86.AMOVL
103		case 4:
104			return x86.AMOVL
105		case 8:
106			return x86.AMOVQ
107		case 16:
108			return x86.AMOVUPS // int128s are in SSE registers
109		default:
110			panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
111		}
112	}
113}
114
115// opregreg emits instructions for
116//
117//	dest := dest(To) op src(From)
118//
119// and also returns the created obj.Prog so it
120// may be further adjusted (offset, scale, etc).
121func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
122	p := s.Prog(op)
123	p.From.Type = obj.TYPE_REG
124	p.To.Type = obj.TYPE_REG
125	p.To.Reg = dest
126	p.From.Reg = src
127	return p
128}
129
130// memIdx fills out a as an indexed memory reference for v.
131// It assumes that the base register and the index register
132// are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
133// The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
134func memIdx(a *obj.Addr, v *ssa.Value) {
135	r, i := v.Args[0].Reg(), v.Args[1].Reg()
136	a.Type = obj.TYPE_MEM
137	a.Scale = v.Op.Scale()
138	if a.Scale == 1 && i == x86.REG_SP {
139		r, i = i, r
140	}
141	a.Reg = r
142	a.Index = i
143}
144
145// DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
146// See runtime/mkduff.go.
147func duffStart(size int64) int64 {
148	x, _ := duff(size)
149	return x
150}
151func duffAdj(size int64) int64 {
152	_, x := duff(size)
153	return x
154}
155
156// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
157// required to use the duffzero mechanism for a block of the given size.
158func duff(size int64) (int64, int64) {
159	if size < 32 || size > 1024 || size%dzClearStep != 0 {
160		panic("bad duffzero size")
161	}
162	steps := size / dzClearStep
163	blocks := steps / dzBlockLen
164	steps %= dzBlockLen
165	off := dzBlockSize * (dzBlocks - blocks)
166	var adj int64
167	if steps != 0 {
168		off -= dzLeaqSize
169		off -= dzMovSize * steps
170		adj -= dzClearStep * (dzBlockLen - steps)
171	}
172	return off, adj
173}
174
175func getgFromTLS(s *ssagen.State, r int16) {
176	// See the comments in cmd/internal/obj/x86/obj6.go
177	// near CanUse1InsnTLS for a detailed explanation of these instructions.
178	if x86.CanUse1InsnTLS(base.Ctxt) {
179		// MOVQ (TLS), r
180		p := s.Prog(x86.AMOVQ)
181		p.From.Type = obj.TYPE_MEM
182		p.From.Reg = x86.REG_TLS
183		p.To.Type = obj.TYPE_REG
184		p.To.Reg = r
185	} else {
186		// MOVQ TLS, r
187		// MOVQ (r)(TLS*1), r
188		p := s.Prog(x86.AMOVQ)
189		p.From.Type = obj.TYPE_REG
190		p.From.Reg = x86.REG_TLS
191		p.To.Type = obj.TYPE_REG
192		p.To.Reg = r
193		q := s.Prog(x86.AMOVQ)
194		q.From.Type = obj.TYPE_MEM
195		q.From.Reg = r
196		q.From.Index = x86.REG_TLS
197		q.From.Scale = 1
198		q.To.Type = obj.TYPE_REG
199		q.To.Reg = r
200	}
201}
202
203func ssaGenValue(s *ssagen.State, v *ssa.Value) {
204	switch v.Op {
205	case ssa.OpAMD64VFMADD231SD:
206		p := s.Prog(v.Op.Asm())
207		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
208		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
209		p.AddRestSourceReg(v.Args[1].Reg())
210	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
211		r := v.Reg()
212		r1 := v.Args[0].Reg()
213		r2 := v.Args[1].Reg()
214		switch {
215		case r == r1:
216			p := s.Prog(v.Op.Asm())
217			p.From.Type = obj.TYPE_REG
218			p.From.Reg = r2
219			p.To.Type = obj.TYPE_REG
220			p.To.Reg = r
221		case r == r2:
222			p := s.Prog(v.Op.Asm())
223			p.From.Type = obj.TYPE_REG
224			p.From.Reg = r1
225			p.To.Type = obj.TYPE_REG
226			p.To.Reg = r
227		default:
228			var asm obj.As
229			if v.Op == ssa.OpAMD64ADDQ {
230				asm = x86.ALEAQ
231			} else {
232				asm = x86.ALEAL
233			}
234			p := s.Prog(asm)
235			p.From.Type = obj.TYPE_MEM
236			p.From.Reg = r1
237			p.From.Scale = 1
238			p.From.Index = r2
239			p.To.Type = obj.TYPE_REG
240			p.To.Reg = r
241		}
242	// 2-address opcode arithmetic
243	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
244		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
245		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
246		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
247		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
248		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
249		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
250		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
251		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
252		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
253		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
254		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
255		ssa.OpAMD64MINSS, ssa.OpAMD64MINSD,
256		ssa.OpAMD64POR, ssa.OpAMD64PXOR,
257		ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
258		ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
259		ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
260		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
261
262	case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ:
263		p := s.Prog(v.Op.Asm())
264		lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg()
265		p.From.Type = obj.TYPE_REG
266		p.From.Reg = bits
267		p.To.Type = obj.TYPE_REG
268		p.To.Reg = lo
269		p.AddRestSourceReg(hi)
270
271	case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
272		ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
273		ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
274		p := s.Prog(v.Op.Asm())
275		p.From.Type = obj.TYPE_REG
276		p.From.Reg = v.Args[0].Reg()
277		p.To.Type = obj.TYPE_REG
278		switch v.Op {
279		case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
280			p.To.Reg = v.Reg0()
281		default:
282			p.To.Reg = v.Reg()
283		}
284
285	case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
286		p := s.Prog(v.Op.Asm())
287		p.From.Type = obj.TYPE_REG
288		p.From.Reg = v.Args[0].Reg()
289		p.To.Type = obj.TYPE_REG
290		p.To.Reg = v.Reg()
291		p.AddRestSourceReg(v.Args[1].Reg())
292
293	case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
294		ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
295		ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
296		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
297		p.AddRestSourceReg(v.Args[0].Reg())
298
299	case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
300		ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
301		ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
302		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
303		m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
304		ssagen.AddAux(&m, v)
305		p.AddRestSource(m)
306
307	case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
308		ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
309		ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
310		ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
311		ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
312		ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
313		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
314		m := obj.Addr{Type: obj.TYPE_MEM}
315		memIdx(&m, v)
316		ssagen.AddAux(&m, v)
317		p.AddRestSource(m)
318
319	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
320		// Arg[0] (the dividend) is in AX.
321		// Arg[1] (the divisor) can be in any other register.
322		// Result[0] (the quotient) is in AX.
323		// Result[1] (the remainder) is in DX.
324		r := v.Args[1].Reg()
325
326		// Zero extend dividend.
327		opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
328
329		// Issue divide.
330		p := s.Prog(v.Op.Asm())
331		p.From.Type = obj.TYPE_REG
332		p.From.Reg = r
333
334	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
335		// Arg[0] (the dividend) is in AX.
336		// Arg[1] (the divisor) can be in any other register.
337		// Result[0] (the quotient) is in AX.
338		// Result[1] (the remainder) is in DX.
339		r := v.Args[1].Reg()
340
341		var opCMP, opNEG, opSXD obj.As
342		switch v.Op {
343		case ssa.OpAMD64DIVQ:
344			opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO
345		case ssa.OpAMD64DIVL:
346			opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ
347		case ssa.OpAMD64DIVW:
348			opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD
349		}
350
351		// CPU faults upon signed overflow, which occurs when the most
352		// negative int is divided by -1. Handle divide by -1 as a special case.
353		var j1, j2 *obj.Prog
354		if ssa.DivisionNeedsFixUp(v) {
355			c := s.Prog(opCMP)
356			c.From.Type = obj.TYPE_REG
357			c.From.Reg = r
358			c.To.Type = obj.TYPE_CONST
359			c.To.Offset = -1
360
361			// Divisor is not -1, proceed with normal division.
362			j1 = s.Prog(x86.AJNE)
363			j1.To.Type = obj.TYPE_BRANCH
364
365			// Divisor is -1, manually compute quotient and remainder via fixup code.
366			// n / -1 = -n
367			n1 := s.Prog(opNEG)
368			n1.To.Type = obj.TYPE_REG
369			n1.To.Reg = x86.REG_AX
370
371			// n % -1 == 0
372			opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
373
374			// TODO(khr): issue only the -1 fixup code we need.
375			// For instance, if only the quotient is used, no point in zeroing the remainder.
376
377			// Skip over normal division.
378			j2 = s.Prog(obj.AJMP)
379			j2.To.Type = obj.TYPE_BRANCH
380		}
381
382		// Sign extend dividend and perform division.
383		p := s.Prog(opSXD)
384		if j1 != nil {
385			j1.To.SetTarget(p)
386		}
387		p = s.Prog(v.Op.Asm())
388		p.From.Type = obj.TYPE_REG
389		p.From.Reg = r
390
391		if j2 != nil {
392			j2.To.SetTarget(s.Pc())
393		}
394
395	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
396		// the frontend rewrites constant division by 8/16/32 bit integers into
397		// HMUL by a constant
398		// SSA rewrites generate the 64 bit versions
399
400		// Arg[0] is already in AX as it's the only register we allow
401		// and DX is the only output we care about (the high bits)
402		p := s.Prog(v.Op.Asm())
403		p.From.Type = obj.TYPE_REG
404		p.From.Reg = v.Args[1].Reg()
405
406		// IMULB puts the high portion in AH instead of DL,
407		// so move it to DL for consistency
408		if v.Type.Size() == 1 {
409			m := s.Prog(x86.AMOVB)
410			m.From.Type = obj.TYPE_REG
411			m.From.Reg = x86.REG_AH
412			m.To.Type = obj.TYPE_REG
413			m.To.Reg = x86.REG_DX
414		}
415
416	case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
417		// Arg[0] is already in AX as it's the only register we allow
418		// results lo in AX
419		p := s.Prog(v.Op.Asm())
420		p.From.Type = obj.TYPE_REG
421		p.From.Reg = v.Args[1].Reg()
422
423	case ssa.OpAMD64MULQU2:
424		// Arg[0] is already in AX as it's the only register we allow
425		// results hi in DX, lo in AX
426		p := s.Prog(v.Op.Asm())
427		p.From.Type = obj.TYPE_REG
428		p.From.Reg = v.Args[1].Reg()
429
430	case ssa.OpAMD64DIVQU2:
431		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
432		// results q in AX, r in DX
433		p := s.Prog(v.Op.Asm())
434		p.From.Type = obj.TYPE_REG
435		p.From.Reg = v.Args[2].Reg()
436
437	case ssa.OpAMD64AVGQU:
438		// compute (x+y)/2 unsigned.
439		// Do a 64-bit add, the overflow goes into the carry.
440		// Shift right once and pull the carry back into the 63rd bit.
441		p := s.Prog(x86.AADDQ)
442		p.From.Type = obj.TYPE_REG
443		p.To.Type = obj.TYPE_REG
444		p.To.Reg = v.Reg()
445		p.From.Reg = v.Args[1].Reg()
446		p = s.Prog(x86.ARCRQ)
447		p.From.Type = obj.TYPE_CONST
448		p.From.Offset = 1
449		p.To.Type = obj.TYPE_REG
450		p.To.Reg = v.Reg()
451
452	case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
453		r := v.Reg0()
454		r0 := v.Args[0].Reg()
455		r1 := v.Args[1].Reg()
456		switch r {
457		case r0:
458			p := s.Prog(v.Op.Asm())
459			p.From.Type = obj.TYPE_REG
460			p.From.Reg = r1
461			p.To.Type = obj.TYPE_REG
462			p.To.Reg = r
463		case r1:
464			p := s.Prog(v.Op.Asm())
465			p.From.Type = obj.TYPE_REG
466			p.From.Reg = r0
467			p.To.Type = obj.TYPE_REG
468			p.To.Reg = r
469		default:
470			v.Fatalf("output not in same register as an input %s", v.LongString())
471		}
472
473	case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
474		p := s.Prog(v.Op.Asm())
475		p.From.Type = obj.TYPE_REG
476		p.From.Reg = v.Args[1].Reg()
477		p.To.Type = obj.TYPE_REG
478		p.To.Reg = v.Reg0()
479
480	case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
481		p := s.Prog(v.Op.Asm())
482		p.From.Type = obj.TYPE_CONST
483		p.From.Offset = v.AuxInt
484		p.To.Type = obj.TYPE_REG
485		p.To.Reg = v.Reg0()
486
487	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
488		r := v.Reg()
489		a := v.Args[0].Reg()
490		if r == a {
491			switch v.AuxInt {
492			case 1:
493				var asm obj.As
494				// Software optimization manual recommends add $1,reg.
495				// But inc/dec is 1 byte smaller. ICC always uses inc
496				// Clang/GCC choose depending on flags, but prefer add.
497				// Experiments show that inc/dec is both a little faster
498				// and make a binary a little smaller.
499				if v.Op == ssa.OpAMD64ADDQconst {
500					asm = x86.AINCQ
501				} else {
502					asm = x86.AINCL
503				}
504				p := s.Prog(asm)
505				p.To.Type = obj.TYPE_REG
506				p.To.Reg = r
507				return
508			case -1:
509				var asm obj.As
510				if v.Op == ssa.OpAMD64ADDQconst {
511					asm = x86.ADECQ
512				} else {
513					asm = x86.ADECL
514				}
515				p := s.Prog(asm)
516				p.To.Type = obj.TYPE_REG
517				p.To.Reg = r
518				return
519			case 0x80:
520				// 'SUBQ $-0x80, r' is shorter to encode than
521				// and functionally equivalent to 'ADDQ $0x80, r'.
522				asm := x86.ASUBL
523				if v.Op == ssa.OpAMD64ADDQconst {
524					asm = x86.ASUBQ
525				}
526				p := s.Prog(asm)
527				p.From.Type = obj.TYPE_CONST
528				p.From.Offset = -0x80
529				p.To.Type = obj.TYPE_REG
530				p.To.Reg = r
531				return
532
533			}
534			p := s.Prog(v.Op.Asm())
535			p.From.Type = obj.TYPE_CONST
536			p.From.Offset = v.AuxInt
537			p.To.Type = obj.TYPE_REG
538			p.To.Reg = r
539			return
540		}
541		var asm obj.As
542		if v.Op == ssa.OpAMD64ADDQconst {
543			asm = x86.ALEAQ
544		} else {
545			asm = x86.ALEAL
546		}
547		p := s.Prog(asm)
548		p.From.Type = obj.TYPE_MEM
549		p.From.Reg = a
550		p.From.Offset = v.AuxInt
551		p.To.Type = obj.TYPE_REG
552		p.To.Reg = r
553
554	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
555		ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
556		ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
557		ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
558		ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
559		ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
560		ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
561		ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
562		ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
563		ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
564		ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
565		ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
566		p := s.Prog(v.Op.Asm())
567		p.From.Type = obj.TYPE_REG
568		p.From.Reg = v.Args[1].Reg()
569		p.To.Type = obj.TYPE_REG
570		p.To.Reg = v.Reg()
571
572	case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
573		// Flag condition: ^ZERO || PARITY
574		// Generate:
575		//   CMOV*NE  SRC,DST
576		//   CMOV*PS  SRC,DST
577		p := s.Prog(v.Op.Asm())
578		p.From.Type = obj.TYPE_REG
579		p.From.Reg = v.Args[1].Reg()
580		p.To.Type = obj.TYPE_REG
581		p.To.Reg = v.Reg()
582		var q *obj.Prog
583		if v.Op == ssa.OpAMD64CMOVQNEF {
584			q = s.Prog(x86.ACMOVQPS)
585		} else if v.Op == ssa.OpAMD64CMOVLNEF {
586			q = s.Prog(x86.ACMOVLPS)
587		} else {
588			q = s.Prog(x86.ACMOVWPS)
589		}
590		q.From.Type = obj.TYPE_REG
591		q.From.Reg = v.Args[1].Reg()
592		q.To.Type = obj.TYPE_REG
593		q.To.Reg = v.Reg()
594
595	case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
596		// Flag condition: ZERO && !PARITY
597		// Generate:
598		//   MOV      SRC,TMP
599		//   CMOV*NE  DST,TMP
600		//   CMOV*PC  TMP,DST
601		//
602		// TODO(rasky): we could generate:
603		//   CMOV*NE  DST,SRC
604		//   CMOV*PC  SRC,DST
605		// But this requires a way for regalloc to know that SRC might be
606		// clobbered by this instruction.
607		t := v.RegTmp()
608		opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
609
610		p := s.Prog(v.Op.Asm())
611		p.From.Type = obj.TYPE_REG
612		p.From.Reg = v.Reg()
613		p.To.Type = obj.TYPE_REG
614		p.To.Reg = t
615		var q *obj.Prog
616		if v.Op == ssa.OpAMD64CMOVQEQF {
617			q = s.Prog(x86.ACMOVQPC)
618		} else if v.Op == ssa.OpAMD64CMOVLEQF {
619			q = s.Prog(x86.ACMOVLPC)
620		} else {
621			q = s.Prog(x86.ACMOVWPC)
622		}
623		q.From.Type = obj.TYPE_REG
624		q.From.Reg = t
625		q.To.Type = obj.TYPE_REG
626		q.To.Reg = v.Reg()
627
628	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
629		r := v.Reg()
630		p := s.Prog(v.Op.Asm())
631		p.From.Type = obj.TYPE_CONST
632		p.From.Offset = v.AuxInt
633		p.To.Type = obj.TYPE_REG
634		p.To.Reg = r
635		p.AddRestSourceReg(v.Args[0].Reg())
636
637	case ssa.OpAMD64ANDQconst:
638		asm := v.Op.Asm()
639		// If the constant is positive and fits into 32 bits, use ANDL.
640		// This saves a few bytes of encoding.
641		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
642			asm = x86.AANDL
643		}
644		p := s.Prog(asm)
645		p.From.Type = obj.TYPE_CONST
646		p.From.Offset = v.AuxInt
647		p.To.Type = obj.TYPE_REG
648		p.To.Reg = v.Reg()
649
650	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
651		ssa.OpAMD64ANDLconst,
652		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
653		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
654		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
655		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
656		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
657		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
658		p := s.Prog(v.Op.Asm())
659		p.From.Type = obj.TYPE_CONST
660		p.From.Offset = v.AuxInt
661		p.To.Type = obj.TYPE_REG
662		p.To.Reg = v.Reg()
663	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
664		r := v.Reg()
665		p := s.Prog(v.Op.Asm())
666		p.From.Type = obj.TYPE_REG
667		p.From.Reg = r
668		p.To.Type = obj.TYPE_REG
669		p.To.Reg = r
670	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
671		ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
672		ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
673		p := s.Prog(v.Op.Asm())
674		memIdx(&p.From, v)
675		o := v.Reg()
676		p.To.Type = obj.TYPE_REG
677		p.To.Reg = o
678		if v.AuxInt != 0 && v.Aux == nil {
679			// Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
680			switch v.Op {
681			case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
682				p = s.Prog(x86.ALEAQ)
683			case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
684				p = s.Prog(x86.ALEAL)
685			case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
686				p = s.Prog(x86.ALEAW)
687			}
688			p.From.Type = obj.TYPE_MEM
689			p.From.Reg = o
690			p.To.Type = obj.TYPE_REG
691			p.To.Reg = o
692		}
693		ssagen.AddAux(&p.From, v)
694	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
695		p := s.Prog(v.Op.Asm())
696		p.From.Type = obj.TYPE_MEM
697		p.From.Reg = v.Args[0].Reg()
698		ssagen.AddAux(&p.From, v)
699		p.To.Type = obj.TYPE_REG
700		p.To.Reg = v.Reg()
701	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
702		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
703		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
704		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
705	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
706		// Go assembler has swapped operands for UCOMISx relative to CMP,
707		// must account for that right here.
708		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
709	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
710		p := s.Prog(v.Op.Asm())
711		p.From.Type = obj.TYPE_REG
712		p.From.Reg = v.Args[0].Reg()
713		p.To.Type = obj.TYPE_CONST
714		p.To.Offset = v.AuxInt
715	case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
716		ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
717		ssa.OpAMD64BTSQconst,
718		ssa.OpAMD64BTCQconst,
719		ssa.OpAMD64BTRQconst:
720		op := v.Op
721		if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
722			// Emit 32-bit version because it's shorter
723			op = ssa.OpAMD64BTLconst
724		}
725		p := s.Prog(op.Asm())
726		p.From.Type = obj.TYPE_CONST
727		p.From.Offset = v.AuxInt
728		p.To.Type = obj.TYPE_REG
729		p.To.Reg = v.Args[0].Reg()
730	case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
731		p := s.Prog(v.Op.Asm())
732		p.From.Type = obj.TYPE_MEM
733		p.From.Reg = v.Args[0].Reg()
734		ssagen.AddAux(&p.From, v)
735		p.To.Type = obj.TYPE_REG
736		p.To.Reg = v.Args[1].Reg()
737	case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
738		sc := v.AuxValAndOff()
739		p := s.Prog(v.Op.Asm())
740		p.From.Type = obj.TYPE_MEM
741		p.From.Reg = v.Args[0].Reg()
742		ssagen.AddAux2(&p.From, v, sc.Off64())
743		p.To.Type = obj.TYPE_CONST
744		p.To.Offset = sc.Val64()
745	case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
746		p := s.Prog(v.Op.Asm())
747		memIdx(&p.From, v)
748		ssagen.AddAux(&p.From, v)
749		p.To.Type = obj.TYPE_REG
750		p.To.Reg = v.Args[2].Reg()
751	case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
752		sc := v.AuxValAndOff()
753		p := s.Prog(v.Op.Asm())
754		memIdx(&p.From, v)
755		ssagen.AddAux2(&p.From, v, sc.Off64())
756		p.To.Type = obj.TYPE_CONST
757		p.To.Offset = sc.Val64()
758	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
759		x := v.Reg()
760
761		// If flags aren't live (indicated by v.Aux == nil),
762		// then we can rewrite MOV $0, AX into XOR AX, AX.
763		if v.AuxInt == 0 && v.Aux == nil {
764			opregreg(s, x86.AXORL, x, x)
765			break
766		}
767
768		asm := v.Op.Asm()
769		// Use MOVL to move a small constant into a register
770		// when the constant is positive and fits into 32 bits.
771		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
772			// The upper 32bit are zeroed automatically when using MOVL.
773			asm = x86.AMOVL
774		}
775		p := s.Prog(asm)
776		p.From.Type = obj.TYPE_CONST
777		p.From.Offset = v.AuxInt
778		p.To.Type = obj.TYPE_REG
779		p.To.Reg = x
780	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
781		x := v.Reg()
782		p := s.Prog(v.Op.Asm())
783		p.From.Type = obj.TYPE_FCONST
784		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
785		p.To.Type = obj.TYPE_REG
786		p.To.Reg = x
787	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
788		ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
789		ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
790		p := s.Prog(v.Op.Asm())
791		p.From.Type = obj.TYPE_MEM
792		p.From.Reg = v.Args[0].Reg()
793		ssagen.AddAux(&p.From, v)
794		p.To.Type = obj.TYPE_REG
795		p.To.Reg = v.Reg()
796	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
797		ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
798		ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
799		p := s.Prog(v.Op.Asm())
800		memIdx(&p.From, v)
801		ssagen.AddAux(&p.From, v)
802		p.To.Type = obj.TYPE_REG
803		p.To.Reg = v.Reg()
804	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
805		ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
806		ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify,
807		ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore:
808		p := s.Prog(v.Op.Asm())
809		p.From.Type = obj.TYPE_REG
810		p.From.Reg = v.Args[1].Reg()
811		p.To.Type = obj.TYPE_MEM
812		p.To.Reg = v.Args[0].Reg()
813		ssagen.AddAux(&p.To, v)
814	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
815		ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
816		ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
817		ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
818		ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
819		ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
820		ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
821		ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
822		p := s.Prog(v.Op.Asm())
823		p.From.Type = obj.TYPE_REG
824		p.From.Reg = v.Args[2].Reg()
825		memIdx(&p.To, v)
826		ssagen.AddAux(&p.To, v)
827	case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
828		sc := v.AuxValAndOff()
829		off := sc.Off64()
830		val := sc.Val()
831		if val == 1 || val == -1 {
832			var asm obj.As
833			if v.Op == ssa.OpAMD64ADDQconstmodify {
834				if val == 1 {
835					asm = x86.AINCQ
836				} else {
837					asm = x86.ADECQ
838				}
839			} else {
840				if val == 1 {
841					asm = x86.AINCL
842				} else {
843					asm = x86.ADECL
844				}
845			}
846			p := s.Prog(asm)
847			p.To.Type = obj.TYPE_MEM
848			p.To.Reg = v.Args[0].Reg()
849			ssagen.AddAux2(&p.To, v, off)
850			break
851		}
852		fallthrough
853	case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
854		ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify,
855		ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify:
856		sc := v.AuxValAndOff()
857		off := sc.Off64()
858		val := sc.Val64()
859		p := s.Prog(v.Op.Asm())
860		p.From.Type = obj.TYPE_CONST
861		p.From.Offset = val
862		p.To.Type = obj.TYPE_MEM
863		p.To.Reg = v.Args[0].Reg()
864		ssagen.AddAux2(&p.To, v, off)
865
866	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
867		p := s.Prog(v.Op.Asm())
868		p.From.Type = obj.TYPE_CONST
869		sc := v.AuxValAndOff()
870		p.From.Offset = sc.Val64()
871		p.To.Type = obj.TYPE_MEM
872		p.To.Reg = v.Args[0].Reg()
873		ssagen.AddAux2(&p.To, v, sc.Off64())
874	case ssa.OpAMD64MOVOstoreconst:
875		sc := v.AuxValAndOff()
876		if sc.Val() != 0 {
877			v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString())
878		}
879
880		if s.ABI != obj.ABIInternal {
881			// zero X15 manually
882			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
883		}
884		p := s.Prog(v.Op.Asm())
885		p.From.Type = obj.TYPE_REG
886		p.From.Reg = x86.REG_X15
887		p.To.Type = obj.TYPE_MEM
888		p.To.Reg = v.Args[0].Reg()
889		ssagen.AddAux2(&p.To, v, sc.Off64())
890
891	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
892		ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
893		ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
894		ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
895		ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
896		p := s.Prog(v.Op.Asm())
897		p.From.Type = obj.TYPE_CONST
898		sc := v.AuxValAndOff()
899		p.From.Offset = sc.Val64()
900		switch {
901		case p.As == x86.AADDQ && p.From.Offset == 1:
902			p.As = x86.AINCQ
903			p.From.Type = obj.TYPE_NONE
904		case p.As == x86.AADDQ && p.From.Offset == -1:
905			p.As = x86.ADECQ
906			p.From.Type = obj.TYPE_NONE
907		case p.As == x86.AADDL && p.From.Offset == 1:
908			p.As = x86.AINCL
909			p.From.Type = obj.TYPE_NONE
910		case p.As == x86.AADDL && p.From.Offset == -1:
911			p.As = x86.ADECL
912			p.From.Type = obj.TYPE_NONE
913		}
914		memIdx(&p.To, v)
915		ssagen.AddAux2(&p.To, v, sc.Off64())
916	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
917		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
918		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
919		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
920	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
921		r := v.Reg()
922		// Break false dependency on destination register.
923		opregreg(s, x86.AXORPS, r, r)
924		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
925	case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
926		var p *obj.Prog
927		switch v.Op {
928		case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
929			p = s.Prog(x86.AMOVQ)
930		case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
931			p = s.Prog(x86.AMOVL)
932		}
933		p.From.Type = obj.TYPE_REG
934		p.From.Reg = v.Args[0].Reg()
935		p.To.Type = obj.TYPE_REG
936		p.To.Reg = v.Reg()
937	case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
938		ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
939		ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
940		ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
941		ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
942		p := s.Prog(v.Op.Asm())
943		p.From.Type = obj.TYPE_MEM
944		p.From.Reg = v.Args[1].Reg()
945		ssagen.AddAux(&p.From, v)
946		p.To.Type = obj.TYPE_REG
947		p.To.Reg = v.Reg()
948	case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
949		ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
950		ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
951		ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
952		ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
953		ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
954		ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
955		ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
956		ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
957		p := s.Prog(v.Op.Asm())
958
959		r, i := v.Args[1].Reg(), v.Args[2].Reg()
960		p.From.Type = obj.TYPE_MEM
961		p.From.Scale = v.Op.Scale()
962		if p.From.Scale == 1 && i == x86.REG_SP {
963			r, i = i, r
964		}
965		p.From.Reg = r
966		p.From.Index = i
967
968		ssagen.AddAux(&p.From, v)
969		p.To.Type = obj.TYPE_REG
970		p.To.Reg = v.Reg()
971	case ssa.OpAMD64DUFFZERO:
972		if s.ABI != obj.ABIInternal {
973			// zero X15 manually
974			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
975		}
976		off := duffStart(v.AuxInt)
977		adj := duffAdj(v.AuxInt)
978		var p *obj.Prog
979		if adj != 0 {
980			p = s.Prog(x86.ALEAQ)
981			p.From.Type = obj.TYPE_MEM
982			p.From.Offset = adj
983			p.From.Reg = x86.REG_DI
984			p.To.Type = obj.TYPE_REG
985			p.To.Reg = x86.REG_DI
986		}
987		p = s.Prog(obj.ADUFFZERO)
988		p.To.Type = obj.TYPE_ADDR
989		p.To.Sym = ir.Syms.Duffzero
990		p.To.Offset = off
991	case ssa.OpAMD64DUFFCOPY:
992		p := s.Prog(obj.ADUFFCOPY)
993		p.To.Type = obj.TYPE_ADDR
994		p.To.Sym = ir.Syms.Duffcopy
995		if v.AuxInt%16 != 0 {
996			v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
997		}
998		p.To.Offset = 14 * (64 - v.AuxInt/16)
999		// 14 and 64 are magic constants.  14 is the number of bytes to encode:
1000		//	MOVUPS	(SI), X0
1001		//	ADDQ	$16, SI
1002		//	MOVUPS	X0, (DI)
1003		//	ADDQ	$16, DI
1004		// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
1005
1006	case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
1007		if v.Type.IsMemory() {
1008			return
1009		}
1010		x := v.Args[0].Reg()
1011		y := v.Reg()
1012		if x != y {
1013			opregreg(s, moveByType(v.Type), y, x)
1014		}
1015	case ssa.OpLoadReg:
1016		if v.Type.IsFlags() {
1017			v.Fatalf("load flags not implemented: %v", v.LongString())
1018			return
1019		}
1020		p := s.Prog(loadByType(v.Type))
1021		ssagen.AddrAuto(&p.From, v.Args[0])
1022		p.To.Type = obj.TYPE_REG
1023		p.To.Reg = v.Reg()
1024
1025	case ssa.OpStoreReg:
1026		if v.Type.IsFlags() {
1027			v.Fatalf("store flags not implemented: %v", v.LongString())
1028			return
1029		}
1030		p := s.Prog(storeByType(v.Type))
1031		p.From.Type = obj.TYPE_REG
1032		p.From.Reg = v.Args[0].Reg()
1033		ssagen.AddrAuto(&p.To, v)
1034	case ssa.OpAMD64LoweredHasCPUFeature:
1035		p := s.Prog(x86.AMOVBLZX)
1036		p.From.Type = obj.TYPE_MEM
1037		ssagen.AddAux(&p.From, v)
1038		p.To.Type = obj.TYPE_REG
1039		p.To.Reg = v.Reg()
1040	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
1041		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
1042		// The loop only runs once.
1043		for _, ap := range v.Block.Func.RegArgs {
1044			// Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack.
1045			addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize)
1046			s.FuncInfo().AddSpill(
1047				obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)})
1048		}
1049		v.Block.Func.RegArgs = nil
1050		ssagen.CheckArgReg(v)
1051	case ssa.OpAMD64LoweredGetClosurePtr:
1052		// Closure pointer is DX.
1053		ssagen.CheckLoweredGetClosurePtr(v)
1054	case ssa.OpAMD64LoweredGetG:
1055		if s.ABI == obj.ABIInternal {
1056			v.Fatalf("LoweredGetG should not appear in ABIInternal")
1057		}
1058		r := v.Reg()
1059		getgFromTLS(s, r)
1060	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail:
1061		if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
1062			// zeroing X15 when entering ABIInternal from ABI0
1063			if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9
1064				opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1065			}
1066			// set G register from TLS
1067			getgFromTLS(s, x86.REG_R14)
1068		}
1069		if v.Op == ssa.OpAMD64CALLtail {
1070			s.TailCall(v)
1071			break
1072		}
1073		s.Call(v)
1074		if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
1075			// zeroing X15 when entering ABIInternal from ABI0
1076			if buildcfg.GOOS != "plan9" { // do not use SSE on Plan 9
1077				opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1078			}
1079			// set G register from TLS
1080			getgFromTLS(s, x86.REG_R14)
1081		}
1082	case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
1083		s.Call(v)
1084
1085	case ssa.OpAMD64LoweredGetCallerPC:
1086		p := s.Prog(x86.AMOVQ)
1087		p.From.Type = obj.TYPE_MEM
1088		p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
1089		p.From.Name = obj.NAME_PARAM
1090		p.To.Type = obj.TYPE_REG
1091		p.To.Reg = v.Reg()
1092
1093	case ssa.OpAMD64LoweredGetCallerSP:
1094		// caller's SP is the address of the first arg
1095		mov := x86.AMOVQ
1096		if types.PtrSize == 4 {
1097			mov = x86.AMOVL
1098		}
1099		p := s.Prog(mov)
1100		p.From.Type = obj.TYPE_ADDR
1101		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures
1102		p.From.Name = obj.NAME_PARAM
1103		p.To.Type = obj.TYPE_REG
1104		p.To.Reg = v.Reg()
1105
1106	case ssa.OpAMD64LoweredWB:
1107		p := s.Prog(obj.ACALL)
1108		p.To.Type = obj.TYPE_MEM
1109		p.To.Name = obj.NAME_EXTERN
1110		// AuxInt encodes how many buffer entries we need.
1111		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1112
1113	case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
1114		p := s.Prog(obj.ACALL)
1115		p.To.Type = obj.TYPE_MEM
1116		p.To.Name = obj.NAME_EXTERN
1117		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
1118		s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs
1119
1120	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
1121		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
1122		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
1123		p := s.Prog(v.Op.Asm())
1124		p.To.Type = obj.TYPE_REG
1125		p.To.Reg = v.Reg()
1126
1127	case ssa.OpAMD64NEGLflags:
1128		p := s.Prog(v.Op.Asm())
1129		p.To.Type = obj.TYPE_REG
1130		p.To.Reg = v.Reg0()
1131
1132	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1133		p := s.Prog(v.Op.Asm())
1134		p.From.Type = obj.TYPE_REG
1135		p.From.Reg = v.Args[0].Reg()
1136		p.To.Type = obj.TYPE_REG
1137		switch v.Op {
1138		case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
1139			p.To.Reg = v.Reg0()
1140		case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1141			p.To.Reg = v.Reg()
1142		}
1143	case ssa.OpAMD64ROUNDSD:
1144		p := s.Prog(v.Op.Asm())
1145		val := v.AuxInt
1146		// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
1147		if val < 0 || val > 3 {
1148			v.Fatalf("Invalid rounding mode")
1149		}
1150		p.From.Offset = val
1151		p.From.Type = obj.TYPE_CONST
1152		p.AddRestSourceReg(v.Args[0].Reg())
1153		p.To.Type = obj.TYPE_REG
1154		p.To.Reg = v.Reg()
1155	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL,
1156		ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL,
1157		ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL:
1158		if v.Args[0].Reg() != v.Reg() {
1159			// POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus.
1160			// TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7.
1161			// Xor register with itself to break the dependency.
1162			opregreg(s, x86.AXORL, v.Reg(), v.Reg())
1163		}
1164		p := s.Prog(v.Op.Asm())
1165		p.From.Type = obj.TYPE_REG
1166		p.From.Reg = v.Args[0].Reg()
1167		p.To.Type = obj.TYPE_REG
1168		p.To.Reg = v.Reg()
1169
1170	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
1171		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
1172		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
1173		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
1174		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
1175		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
1176		ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
1177		ssa.OpAMD64SETO:
1178		p := s.Prog(v.Op.Asm())
1179		p.To.Type = obj.TYPE_REG
1180		p.To.Reg = v.Reg()
1181
1182	case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
1183		ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
1184		ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
1185		ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
1186		ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
1187		p := s.Prog(v.Op.Asm())
1188		p.To.Type = obj.TYPE_MEM
1189		p.To.Reg = v.Args[0].Reg()
1190		ssagen.AddAux(&p.To, v)
1191
1192	case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1,
1193		ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1,
1194		ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1,
1195		ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1,
1196		ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1:
1197		p := s.Prog(v.Op.Asm())
1198		memIdx(&p.To, v)
1199		ssagen.AddAux(&p.To, v)
1200
1201	case ssa.OpAMD64SETNEF:
1202		t := v.RegTmp()
1203		p := s.Prog(v.Op.Asm())
1204		p.To.Type = obj.TYPE_REG
1205		p.To.Reg = v.Reg()
1206		q := s.Prog(x86.ASETPS)
1207		q.To.Type = obj.TYPE_REG
1208		q.To.Reg = t
1209		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
1210		opregreg(s, x86.AORL, v.Reg(), t)
1211
1212	case ssa.OpAMD64SETEQF:
1213		t := v.RegTmp()
1214		p := s.Prog(v.Op.Asm())
1215		p.To.Type = obj.TYPE_REG
1216		p.To.Reg = v.Reg()
1217		q := s.Prog(x86.ASETPC)
1218		q.To.Type = obj.TYPE_REG
1219		q.To.Reg = t
1220		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
1221		opregreg(s, x86.AANDL, v.Reg(), t)
1222
1223	case ssa.OpAMD64InvertFlags:
1224		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1225	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
1226		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1227	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
1228		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
1229	case ssa.OpAMD64REPSTOSQ:
1230		s.Prog(x86.AREP)
1231		s.Prog(x86.ASTOSQ)
1232	case ssa.OpAMD64REPMOVSQ:
1233		s.Prog(x86.AREP)
1234		s.Prog(x86.AMOVSQ)
1235	case ssa.OpAMD64LoweredNilCheck:
1236		// Issue a load which will fault if the input is nil.
1237		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
1238		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
1239		// but it doesn't have false dependency on AX.
1240		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
1241		// That trades clobbering flags for clobbering a register.
1242		p := s.Prog(x86.ATESTB)
1243		p.From.Type = obj.TYPE_REG
1244		p.From.Reg = x86.REG_AX
1245		p.To.Type = obj.TYPE_MEM
1246		p.To.Reg = v.Args[0].Reg()
1247		if logopt.Enabled() {
1248			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1249		}
1250		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
1251			base.WarnfAt(v.Pos, "generated nil check")
1252		}
1253	case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
1254		p := s.Prog(v.Op.Asm())
1255		p.From.Type = obj.TYPE_MEM
1256		p.From.Reg = v.Args[0].Reg()
1257		ssagen.AddAux(&p.From, v)
1258		p.To.Type = obj.TYPE_REG
1259		p.To.Reg = v.Reg0()
1260	case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
1261		p := s.Prog(v.Op.Asm())
1262		p.From.Type = obj.TYPE_REG
1263		p.From.Reg = v.Reg0()
1264		p.To.Type = obj.TYPE_MEM
1265		p.To.Reg = v.Args[1].Reg()
1266		ssagen.AddAux(&p.To, v)
1267	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
1268		s.Prog(x86.ALOCK)
1269		p := s.Prog(v.Op.Asm())
1270		p.From.Type = obj.TYPE_REG
1271		p.From.Reg = v.Reg0()
1272		p.To.Type = obj.TYPE_MEM
1273		p.To.Reg = v.Args[1].Reg()
1274		ssagen.AddAux(&p.To, v)
1275	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
1276		if v.Args[1].Reg() != x86.REG_AX {
1277			v.Fatalf("input[1] not in AX %s", v.LongString())
1278		}
1279		s.Prog(x86.ALOCK)
1280		p := s.Prog(v.Op.Asm())
1281		p.From.Type = obj.TYPE_REG
1282		p.From.Reg = v.Args[2].Reg()
1283		p.To.Type = obj.TYPE_MEM
1284		p.To.Reg = v.Args[0].Reg()
1285		ssagen.AddAux(&p.To, v)
1286		p = s.Prog(x86.ASETEQ)
1287		p.To.Type = obj.TYPE_REG
1288		p.To.Reg = v.Reg0()
1289	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock:
1290		s.Prog(x86.ALOCK)
1291		p := s.Prog(v.Op.Asm())
1292		p.From.Type = obj.TYPE_REG
1293		p.From.Reg = v.Args[1].Reg()
1294		p.To.Type = obj.TYPE_MEM
1295		p.To.Reg = v.Args[0].Reg()
1296		ssagen.AddAux(&p.To, v)
1297	case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA:
1298		p := s.Prog(v.Op.Asm())
1299		p.From.Type = obj.TYPE_MEM
1300		p.From.Reg = v.Args[0].Reg()
1301	case ssa.OpClobber:
1302		p := s.Prog(x86.AMOVL)
1303		p.From.Type = obj.TYPE_CONST
1304		p.From.Offset = 0xdeaddead
1305		p.To.Type = obj.TYPE_MEM
1306		p.To.Reg = x86.REG_SP
1307		ssagen.AddAux(&p.To, v)
1308		p = s.Prog(x86.AMOVL)
1309		p.From.Type = obj.TYPE_CONST
1310		p.From.Offset = 0xdeaddead
1311		p.To.Type = obj.TYPE_MEM
1312		p.To.Reg = x86.REG_SP
1313		ssagen.AddAux(&p.To, v)
1314		p.To.Offset += 4
1315	case ssa.OpClobberReg:
1316		x := uint64(0xdeaddeaddeaddead)
1317		p := s.Prog(x86.AMOVQ)
1318		p.From.Type = obj.TYPE_CONST
1319		p.From.Offset = int64(x)
1320		p.To.Type = obj.TYPE_REG
1321		p.To.Reg = v.Reg()
1322	default:
1323		v.Fatalf("genValue not implemented: %s", v.LongString())
1324	}
1325}
1326
1327var blockJump = [...]struct {
1328	asm, invasm obj.As
1329}{
1330	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
1331	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
1332	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
1333	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
1334	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
1335	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
1336	ssa.BlockAMD64OS:  {x86.AJOS, x86.AJOC},
1337	ssa.BlockAMD64OC:  {x86.AJOC, x86.AJOS},
1338	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
1339	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
1340	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
1341	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
1342	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
1343	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
1344}
1345
1346var eqfJumps = [2][2]ssagen.IndexJump{
1347	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
1348	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
1349}
1350var nefJumps = [2][2]ssagen.IndexJump{
1351	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
1352	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
1353}
1354
1355func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
1356	switch b.Kind {
1357	case ssa.BlockPlain:
1358		if b.Succs[0].Block() != next {
1359			p := s.Prog(obj.AJMP)
1360			p.To.Type = obj.TYPE_BRANCH
1361			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1362		}
1363	case ssa.BlockDefer:
1364		// defer returns in rax:
1365		// 0 if we should continue executing
1366		// 1 if we should jump to deferreturn call
1367		p := s.Prog(x86.ATESTL)
1368		p.From.Type = obj.TYPE_REG
1369		p.From.Reg = x86.REG_AX
1370		p.To.Type = obj.TYPE_REG
1371		p.To.Reg = x86.REG_AX
1372		p = s.Prog(x86.AJNE)
1373		p.To.Type = obj.TYPE_BRANCH
1374		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
1375		if b.Succs[0].Block() != next {
1376			p := s.Prog(obj.AJMP)
1377			p.To.Type = obj.TYPE_BRANCH
1378			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
1379		}
1380	case ssa.BlockExit, ssa.BlockRetJmp:
1381	case ssa.BlockRet:
1382		s.Prog(obj.ARET)
1383
1384	case ssa.BlockAMD64EQF:
1385		s.CombJump(b, next, &eqfJumps)
1386
1387	case ssa.BlockAMD64NEF:
1388		s.CombJump(b, next, &nefJumps)
1389
1390	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
1391		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
1392		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
1393		ssa.BlockAMD64OS, ssa.BlockAMD64OC,
1394		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
1395		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
1396		jmp := blockJump[b.Kind]
1397		switch next {
1398		case b.Succs[0].Block():
1399			s.Br(jmp.invasm, b.Succs[1].Block())
1400		case b.Succs[1].Block():
1401			s.Br(jmp.asm, b.Succs[0].Block())
1402		default:
1403			if b.Likely != ssa.BranchUnlikely {
1404				s.Br(jmp.asm, b.Succs[0].Block())
1405				s.Br(obj.AJMP, b.Succs[1].Block())
1406			} else {
1407				s.Br(jmp.invasm, b.Succs[1].Block())
1408				s.Br(obj.AJMP, b.Succs[0].Block())
1409			}
1410		}
1411
1412	case ssa.BlockAMD64JUMPTABLE:
1413		// JMP      *(TABLE)(INDEX*8)
1414		p := s.Prog(obj.AJMP)
1415		p.To.Type = obj.TYPE_MEM
1416		p.To.Reg = b.Controls[1].Reg()
1417		p.To.Index = b.Controls[0].Reg()
1418		p.To.Scale = 8
1419		// Save jump tables for later resolution of the target blocks.
1420		s.JumpTables = append(s.JumpTables, b)
1421
1422	default:
1423		b.Fatalf("branch not implemented: %s", b.LongString())
1424	}
1425}
1426
1427func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
1428	p := s.Prog(loadByType(t))
1429	p.From.Type = obj.TYPE_MEM
1430	p.From.Name = obj.NAME_AUTO
1431	p.From.Sym = n.Linksym()
1432	p.From.Offset = n.FrameOffset() + off
1433	p.To.Type = obj.TYPE_REG
1434	p.To.Reg = reg
1435	return p
1436}
1437
1438func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
1439	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
1440	p.To.Name = obj.NAME_PARAM
1441	p.To.Sym = n.Linksym()
1442	p.Pos = p.Pos.WithNotStmt()
1443	return p
1444}
1445