1// Copyright © 2015 The Go Authors.  All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21package riscv
22
23import (
24	"cmd/internal/obj"
25	"cmd/internal/objabi"
26	"cmd/internal/sys"
27	"fmt"
28	"internal/abi"
29	"internal/buildcfg"
30	"log"
31	"math/bits"
32	"strings"
33)
34
35func buildop(ctxt *obj.Link) {}
36
37func jalToSym(ctxt *obj.Link, p *obj.Prog, lr int16) {
38	switch p.As {
39	case obj.ACALL, obj.AJMP, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY:
40	default:
41		ctxt.Diag("unexpected Prog in jalToSym: %v", p)
42		return
43	}
44
45	p.As = AJAL
46	p.Mark |= NEED_JAL_RELOC
47	p.From.Type = obj.TYPE_REG
48	p.From.Reg = lr
49	p.Reg = obj.REG_NONE
50}
51
52// progedit is called individually for each *obj.Prog. It normalizes instruction
53// formats and eliminates as many pseudo-instructions as possible.
54func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
55
56	// Expand binary instructions to ternary ones.
57	if p.Reg == obj.REG_NONE {
58		switch p.As {
59		case AADDI, ASLTI, ASLTIU, AANDI, AORI, AXORI, ASLLI, ASRLI, ASRAI,
60			AADDIW, ASLLIW, ASRLIW, ASRAIW, AADDW, ASUBW, ASLLW, ASRLW, ASRAW,
61			AADD, AAND, AOR, AXOR, ASLL, ASRL, ASUB, ASRA,
62			AMUL, AMULH, AMULHU, AMULHSU, AMULW, ADIV, ADIVU, ADIVW, ADIVUW,
63			AREM, AREMU, AREMW, AREMUW,
64			AADDUW, ASH1ADD, ASH1ADDUW, ASH2ADD, ASH2ADDUW, ASH3ADD, ASH3ADDUW, ASLLIUW,
65			AANDN, AORN, AXNOR, AMAX, AMAXU, AMIN, AMINU, AROL, AROLW, AROR, ARORW, ARORI, ARORIW,
66			ABCLR, ABCLRI, ABEXT, ABEXTI, ABINV, ABINVI, ABSET, ABSETI:
67			p.Reg = p.To.Reg
68		}
69	}
70
71	// Rewrite instructions with constant operands to refer to the immediate
72	// form of the instruction.
73	if p.From.Type == obj.TYPE_CONST {
74		switch p.As {
75		case AADD:
76			p.As = AADDI
77		case ASUB:
78			p.As, p.From.Offset = AADDI, -p.From.Offset
79		case ASLT:
80			p.As = ASLTI
81		case ASLTU:
82			p.As = ASLTIU
83		case AAND:
84			p.As = AANDI
85		case AOR:
86			p.As = AORI
87		case AXOR:
88			p.As = AXORI
89		case ASLL:
90			p.As = ASLLI
91		case ASRL:
92			p.As = ASRLI
93		case ASRA:
94			p.As = ASRAI
95		case AADDW:
96			p.As = AADDIW
97		case ASUBW:
98			p.As, p.From.Offset = AADDIW, -p.From.Offset
99		case ASLLW:
100			p.As = ASLLIW
101		case ASRLW:
102			p.As = ASRLIW
103		case ASRAW:
104			p.As = ASRAIW
105		case AROR:
106			p.As = ARORI
107		case ARORW:
108			p.As = ARORIW
109		case ABCLR:
110			p.As = ABCLRI
111		case ABEXT:
112			p.As = ABEXTI
113		case ABINV:
114			p.As = ABINVI
115		case ABSET:
116			p.As = ABSETI
117		}
118	}
119
120	switch p.As {
121	case obj.AJMP:
122		// Turn JMP into JAL ZERO or JALR ZERO.
123		p.From.Type = obj.TYPE_REG
124		p.From.Reg = REG_ZERO
125
126		switch p.To.Type {
127		case obj.TYPE_BRANCH:
128			p.As = AJAL
129		case obj.TYPE_MEM:
130			switch p.To.Name {
131			case obj.NAME_NONE:
132				p.As = AJALR
133			case obj.NAME_EXTERN, obj.NAME_STATIC:
134				// Handled in preprocess.
135			default:
136				ctxt.Diag("unsupported name %d for %v", p.To.Name, p)
137			}
138		default:
139			panic(fmt.Sprintf("unhandled type %+v", p.To.Type))
140		}
141
142	case obj.ACALL:
143		switch p.To.Type {
144		case obj.TYPE_MEM:
145			// Handled in preprocess.
146		case obj.TYPE_REG:
147			p.As = AJALR
148			p.From.Type = obj.TYPE_REG
149			p.From.Reg = REG_LR
150		default:
151			ctxt.Diag("unknown destination type %+v in CALL: %v", p.To.Type, p)
152		}
153
154	case obj.AUNDEF:
155		p.As = AEBREAK
156
157	case ASCALL:
158		// SCALL is the old name for ECALL.
159		p.As = AECALL
160
161	case ASBREAK:
162		// SBREAK is the old name for EBREAK.
163		p.As = AEBREAK
164
165	case AMOV:
166		if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == obj.REG_NONE && int64(int32(p.From.Offset)) != p.From.Offset {
167			ctz := bits.TrailingZeros64(uint64(p.From.Offset))
168			val := p.From.Offset >> ctz
169			if int64(int32(val)) == val {
170				// It's ok. We can handle constants with many trailing zeros.
171				break
172			}
173			// Put >32-bit constants in memory and load them.
174			p.From.Type = obj.TYPE_MEM
175			p.From.Sym = ctxt.Int64Sym(p.From.Offset)
176			p.From.Name = obj.NAME_EXTERN
177			p.From.Offset = 0
178		}
179	}
180}
181
182// addrToReg extracts the register from an Addr, handling special Addr.Names.
183func addrToReg(a obj.Addr) int16 {
184	switch a.Name {
185	case obj.NAME_PARAM, obj.NAME_AUTO:
186		return REG_SP
187	}
188	return a.Reg
189}
190
191// movToLoad converts a MOV mnemonic into the corresponding load instruction.
192func movToLoad(mnemonic obj.As) obj.As {
193	switch mnemonic {
194	case AMOV:
195		return ALD
196	case AMOVB:
197		return ALB
198	case AMOVH:
199		return ALH
200	case AMOVW:
201		return ALW
202	case AMOVBU:
203		return ALBU
204	case AMOVHU:
205		return ALHU
206	case AMOVWU:
207		return ALWU
208	case AMOVF:
209		return AFLW
210	case AMOVD:
211		return AFLD
212	default:
213		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
214	}
215}
216
217// movToStore converts a MOV mnemonic into the corresponding store instruction.
218func movToStore(mnemonic obj.As) obj.As {
219	switch mnemonic {
220	case AMOV:
221		return ASD
222	case AMOVB:
223		return ASB
224	case AMOVH:
225		return ASH
226	case AMOVW:
227		return ASW
228	case AMOVF:
229		return AFSW
230	case AMOVD:
231		return AFSD
232	default:
233		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
234	}
235}
236
237// markRelocs marks an obj.Prog that specifies a MOV pseudo-instruction and
238// requires relocation.
239func markRelocs(p *obj.Prog) {
240	switch p.As {
241	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
242		switch {
243		case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG:
244			switch p.From.Name {
245			case obj.NAME_EXTERN, obj.NAME_STATIC:
246				p.Mark |= NEED_PCREL_ITYPE_RELOC
247			}
248		case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG:
249			switch p.From.Name {
250			case obj.NAME_EXTERN, obj.NAME_STATIC:
251				p.Mark |= NEED_PCREL_ITYPE_RELOC
252			}
253		case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM:
254			switch p.To.Name {
255			case obj.NAME_EXTERN, obj.NAME_STATIC:
256				p.Mark |= NEED_PCREL_STYPE_RELOC
257			}
258		}
259	}
260}
261
262// InvertBranch inverts the condition of a conditional branch.
263func InvertBranch(as obj.As) obj.As {
264	switch as {
265	case ABEQ:
266		return ABNE
267	case ABEQZ:
268		return ABNEZ
269	case ABGE:
270		return ABLT
271	case ABGEU:
272		return ABLTU
273	case ABGEZ:
274		return ABLTZ
275	case ABGT:
276		return ABLE
277	case ABGTU:
278		return ABLEU
279	case ABGTZ:
280		return ABLEZ
281	case ABLE:
282		return ABGT
283	case ABLEU:
284		return ABGTU
285	case ABLEZ:
286		return ABGTZ
287	case ABLT:
288		return ABGE
289	case ABLTU:
290		return ABGEU
291	case ABLTZ:
292		return ABGEZ
293	case ABNE:
294		return ABEQ
295	case ABNEZ:
296		return ABEQZ
297	default:
298		panic("InvertBranch: not a branch")
299	}
300}
301
302// containsCall reports whether the symbol contains a CALL (or equivalent)
303// instruction. Must be called after progedit.
304func containsCall(sym *obj.LSym) bool {
305	// CALLs are CALL or JAL(R) with link register LR.
306	for p := sym.Func().Text; p != nil; p = p.Link {
307		switch p.As {
308		case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
309			return true
310		case AJAL, AJALR:
311			if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
312				return true
313			}
314		}
315	}
316
317	return false
318}
319
320// setPCs sets the Pc field in all instructions reachable from p.
321// It uses pc as the initial value and returns the next available pc.
322func setPCs(p *obj.Prog, pc int64) int64 {
323	for ; p != nil; p = p.Link {
324		p.Pc = pc
325		for _, ins := range instructionsForProg(p) {
326			pc += int64(ins.length())
327		}
328
329		if p.As == obj.APCALIGN {
330			alignedValue := p.From.Offset
331			v := pcAlignPadLength(pc, alignedValue)
332			pc += int64(v)
333		}
334	}
335	return pc
336}
337
338// stackOffset updates Addr offsets based on the current stack size.
339//
340// The stack looks like:
341// -------------------
342// |                 |
343// |      PARAMs     |
344// |                 |
345// |                 |
346// -------------------
347// |    Parent RA    |   SP on function entry
348// -------------------
349// |                 |
350// |                 |
351// |       AUTOs     |
352// |                 |
353// |                 |
354// -------------------
355// |        RA       |   SP during function execution
356// -------------------
357//
358// FixedFrameSize makes other packages aware of the space allocated for RA.
359//
360// A nicer version of this diagram can be found on slide 21 of the presentation
361// attached to https://golang.org/issue/16922#issuecomment-243748180.
362func stackOffset(a *obj.Addr, stacksize int64) {
363	switch a.Name {
364	case obj.NAME_AUTO:
365		// Adjust to the top of AUTOs.
366		a.Offset += stacksize
367	case obj.NAME_PARAM:
368		// Adjust to the bottom of PARAMs.
369		a.Offset += stacksize + 8
370	}
371}
372
373// preprocess generates prologue and epilogue code, computes PC-relative branch
374// and jump offsets, and resolves pseudo-registers.
375//
376// preprocess is called once per linker symbol.
377//
378// When preprocess finishes, all instructions in the symbol are either
379// concrete, real RISC-V instructions or directive pseudo-ops like TEXT,
380// PCDATA, and FUNCDATA.
381func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
382	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
383		return
384	}
385
386	// Generate the prologue.
387	text := cursym.Func().Text
388	if text.As != obj.ATEXT {
389		ctxt.Diag("preprocess: found symbol that does not start with TEXT directive")
390		return
391	}
392
393	stacksize := text.To.Offset
394	if stacksize == -8 {
395		// Historical way to mark NOFRAME.
396		text.From.Sym.Set(obj.AttrNoFrame, true)
397		stacksize = 0
398	}
399	if stacksize < 0 {
400		ctxt.Diag("negative frame size %d - did you mean NOFRAME?", stacksize)
401	}
402	if text.From.Sym.NoFrame() {
403		if stacksize != 0 {
404			ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", stacksize)
405		}
406	}
407
408	if !containsCall(cursym) {
409		text.From.Sym.Set(obj.AttrLeaf, true)
410		if stacksize == 0 {
411			// A leaf function with no locals has no frame.
412			text.From.Sym.Set(obj.AttrNoFrame, true)
413		}
414	}
415
416	// Save LR unless there is no frame.
417	if !text.From.Sym.NoFrame() {
418		stacksize += ctxt.Arch.FixedFrameSize
419	}
420
421	cursym.Func().Args = text.To.Val.(int32)
422	cursym.Func().Locals = int32(stacksize)
423
424	prologue := text
425
426	if !cursym.Func().Text.From.Sym.NoSplit() {
427		prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check
428	}
429
430	if stacksize != 0 {
431		prologue = ctxt.StartUnsafePoint(prologue, newprog)
432
433		// Actually save LR.
434		prologue = obj.Appendp(prologue, newprog)
435		prologue.As = AMOV
436		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
437		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -stacksize}
438
439		// Insert stack adjustment.
440		prologue = obj.Appendp(prologue, newprog)
441		prologue.As = AADDI
442		prologue.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -stacksize}
443		prologue.Reg = REG_SP
444		prologue.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
445		prologue.Spadj = int32(stacksize)
446
447		prologue = ctxt.EndUnsafePoint(prologue, newprog, -1)
448
449		// On Linux, in a cgo binary we may get a SIGSETXID signal early on
450		// before the signal stack is set, as glibc doesn't allow us to block
451		// SIGSETXID. So a signal may land on the current stack and clobber
452		// the content below the SP. We store the LR again after the SP is
453		// decremented.
454		prologue = obj.Appendp(prologue, newprog)
455		prologue.As = AMOV
456		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
457		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
458	}
459
460	if cursym.Func().Text.From.Sym.Wrapper() {
461		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
462		//
463		//   MOV g_panic(g), X5
464		//   BNE X5, ZERO, adjust
465		// end:
466		//   NOP
467		// ...rest of function..
468		// adjust:
469		//   MOV panic_argp(X5), X6
470		//   ADD $(autosize+FIXED_FRAME), SP, X7
471		//   BNE X6, X7, end
472		//   ADD $FIXED_FRAME, SP, X6
473		//   MOV X6, panic_argp(X5)
474		//   JMP end
475		//
476		// The NOP is needed to give the jumps somewhere to land.
477
478		ldpanic := obj.Appendp(prologue, newprog)
479
480		ldpanic.As = AMOV
481		ldpanic.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGG, Offset: 4 * int64(ctxt.Arch.PtrSize)} // G.panic
482		ldpanic.Reg = obj.REG_NONE
483		ldpanic.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5}
484
485		bneadj := obj.Appendp(ldpanic, newprog)
486		bneadj.As = ABNE
487		bneadj.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5}
488		bneadj.Reg = REG_ZERO
489		bneadj.To.Type = obj.TYPE_BRANCH
490
491		endadj := obj.Appendp(bneadj, newprog)
492		endadj.As = obj.ANOP
493
494		last := endadj
495		for last.Link != nil {
496			last = last.Link
497		}
498
499		getargp := obj.Appendp(last, newprog)
500		getargp.As = AMOV
501		getargp.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp
502		getargp.Reg = obj.REG_NONE
503		getargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
504
505		bneadj.To.SetTarget(getargp)
506
507		calcargp := obj.Appendp(getargp, newprog)
508		calcargp.As = AADDI
509		calcargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize + ctxt.Arch.FixedFrameSize}
510		calcargp.Reg = REG_SP
511		calcargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X7}
512
513		testargp := obj.Appendp(calcargp, newprog)
514		testargp.As = ABNE
515		testargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
516		testargp.Reg = REG_X7
517		testargp.To.Type = obj.TYPE_BRANCH
518		testargp.To.SetTarget(endadj)
519
520		adjargp := obj.Appendp(testargp, newprog)
521		adjargp.As = AADDI
522		adjargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(ctxt.Arch.PtrSize)}
523		adjargp.Reg = REG_SP
524		adjargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
525
526		setargp := obj.Appendp(adjargp, newprog)
527		setargp.As = AMOV
528		setargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
529		setargp.Reg = obj.REG_NONE
530		setargp.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp
531
532		godone := obj.Appendp(setargp, newprog)
533		godone.As = AJAL
534		godone.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
535		godone.To.Type = obj.TYPE_BRANCH
536		godone.To.SetTarget(endadj)
537	}
538
539	// Update stack-based offsets.
540	for p := cursym.Func().Text; p != nil; p = p.Link {
541		stackOffset(&p.From, stacksize)
542		stackOffset(&p.To, stacksize)
543	}
544
545	// Additional instruction rewriting.
546	for p := cursym.Func().Text; p != nil; p = p.Link {
547		switch p.As {
548		case obj.AGETCALLERPC:
549			if cursym.Leaf() {
550				// MOV LR, Rd
551				p.As = AMOV
552				p.From.Type = obj.TYPE_REG
553				p.From.Reg = REG_LR
554			} else {
555				// MOV (RSP), Rd
556				p.As = AMOV
557				p.From.Type = obj.TYPE_MEM
558				p.From.Reg = REG_SP
559			}
560
561		case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
562			switch p.To.Type {
563			case obj.TYPE_MEM:
564				jalToSym(ctxt, p, REG_LR)
565			}
566
567		case obj.AJMP:
568			switch p.To.Type {
569			case obj.TYPE_MEM:
570				switch p.To.Name {
571				case obj.NAME_EXTERN, obj.NAME_STATIC:
572					jalToSym(ctxt, p, REG_ZERO)
573				}
574			}
575
576		case obj.ARET:
577			// Replace RET with epilogue.
578			retJMP := p.To.Sym
579
580			if stacksize != 0 {
581				// Restore LR.
582				p.As = AMOV
583				p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
584				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
585				p = obj.Appendp(p, newprog)
586
587				p.As = AADDI
588				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize}
589				p.Reg = REG_SP
590				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
591				p.Spadj = int32(-stacksize)
592				p = obj.Appendp(p, newprog)
593			}
594
595			if retJMP != nil {
596				p.As = obj.ARET
597				p.To.Sym = retJMP
598				jalToSym(ctxt, p, REG_ZERO)
599			} else {
600				p.As = AJALR
601				p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
602				p.Reg = obj.REG_NONE
603				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
604			}
605
606			// "Add back" the stack removed in the previous instruction.
607			//
608			// This is to avoid confusing pctospadj, which sums
609			// Spadj from function entry to each PC, and shouldn't
610			// count adjustments from earlier epilogues, since they
611			// won't affect later PCs.
612			p.Spadj = int32(stacksize)
613
614		case AADDI:
615			// Refine Spadjs account for adjustment via ADDI instruction.
616			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.From.Type == obj.TYPE_CONST {
617				p.Spadj = int32(-p.From.Offset)
618			}
619		}
620
621		if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
622			f := cursym.Func()
623			if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
624				f.FuncFlag |= abi.FuncFlagSPWrite
625				if ctxt.Debugvlog || !ctxt.IsAsm {
626					ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
627					if !ctxt.IsAsm {
628						ctxt.Diag("invalid auto-SPWRITE in non-assembly")
629						ctxt.DiagFlush()
630						log.Fatalf("bad SPWRITE")
631					}
632				}
633			}
634		}
635	}
636
637	var callCount int
638	for p := cursym.Func().Text; p != nil; p = p.Link {
639		markRelocs(p)
640		if p.Mark&NEED_JAL_RELOC == NEED_JAL_RELOC {
641			callCount++
642		}
643	}
644	const callTrampSize = 8 // 2 machine instructions.
645	maxTrampSize := int64(callCount * callTrampSize)
646
647	// Compute instruction addresses.  Once we do that, we need to check for
648	// overextended jumps and branches.  Within each iteration, Pc differences
649	// are always lower bounds (since the program gets monotonically longer,
650	// a fixed point will be reached).  No attempt to handle functions > 2GiB.
651	for {
652		big, rescan := false, false
653		maxPC := setPCs(cursym.Func().Text, 0)
654		if maxPC+maxTrampSize > (1 << 20) {
655			big = true
656		}
657
658		for p := cursym.Func().Text; p != nil; p = p.Link {
659			switch p.As {
660			case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
661				if p.To.Type != obj.TYPE_BRANCH {
662					panic("assemble: instruction with branch-like opcode lacks destination")
663				}
664				offset := p.To.Target().Pc - p.Pc
665				if offset < -4096 || 4096 <= offset {
666					// Branch is long.  Replace it with a jump.
667					jmp := obj.Appendp(p, newprog)
668					jmp.As = AJAL
669					jmp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
670					jmp.To = obj.Addr{Type: obj.TYPE_BRANCH}
671					jmp.To.SetTarget(p.To.Target())
672
673					p.As = InvertBranch(p.As)
674					p.To.SetTarget(jmp.Link)
675
676					// We may have made previous branches too long,
677					// so recheck them.
678					rescan = true
679				}
680			case AJAL:
681				// Linker will handle the intersymbol case and trampolines.
682				if p.To.Target() == nil {
683					if !big {
684						break
685					}
686					// This function is going to be too large for JALs
687					// to reach trampolines. Replace with AUIPC+JALR.
688					jmp := obj.Appendp(p, newprog)
689					jmp.As = AJALR
690					jmp.From = p.From
691					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
692
693					p.As = AAUIPC
694					p.Mark = (p.Mark &^ NEED_JAL_RELOC) | NEED_CALL_RELOC
695					p.AddRestSource(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym})
696					p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
697					p.Reg = obj.REG_NONE
698					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
699
700					rescan = true
701					break
702				}
703				offset := p.To.Target().Pc - p.Pc
704				if offset < -(1<<20) || (1<<20) <= offset {
705					// Replace with 2-instruction sequence. This assumes
706					// that TMP is not live across J instructions, since
707					// it is reserved by SSA.
708					jmp := obj.Appendp(p, newprog)
709					jmp.As = AJALR
710					jmp.From = p.From
711					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
712
713					// p.From is not generally valid, however will be
714					// fixed up in the next loop.
715					p.As = AAUIPC
716					p.From = obj.Addr{Type: obj.TYPE_BRANCH, Sym: p.From.Sym}
717					p.From.SetTarget(p.To.Target())
718					p.Reg = obj.REG_NONE
719					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
720
721					rescan = true
722				}
723			}
724		}
725
726		if !rescan {
727			break
728		}
729	}
730
731	// Now that there are no long branches, resolve branch and jump targets.
732	// At this point, instruction rewriting which changes the number of
733	// instructions will break everything--don't do it!
734	for p := cursym.Func().Text; p != nil; p = p.Link {
735		switch p.As {
736		case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
737			switch p.To.Type {
738			case obj.TYPE_BRANCH:
739				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
740			case obj.TYPE_MEM:
741				panic("unhandled type")
742			}
743
744		case AJAL:
745			// Linker will handle the intersymbol case and trampolines.
746			if p.To.Target() != nil {
747				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
748			}
749
750		case AAUIPC:
751			if p.From.Type == obj.TYPE_BRANCH {
752				low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc)
753				if err != nil {
754					ctxt.Diag("%v: jump displacement %d too large", p, p.To.Target().Pc-p.Pc)
755				}
756				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high, Sym: cursym}
757				p.Link.To.Offset = low
758			}
759
760		case obj.APCALIGN:
761			alignedValue := p.From.Offset
762			if (alignedValue&(alignedValue-1) != 0) || 4 > alignedValue || alignedValue > 2048 {
763				ctxt.Diag("alignment value of an instruction must be a power of two and in the range [4, 2048], got %d\n", alignedValue)
764			}
765			// Update the current text symbol alignment value.
766			if int32(alignedValue) > cursym.Func().Align {
767				cursym.Func().Align = int32(alignedValue)
768			}
769		}
770	}
771
772	// Validate all instructions - this provides nice error messages.
773	for p := cursym.Func().Text; p != nil; p = p.Link {
774		for _, ins := range instructionsForProg(p) {
775			ins.validate(ctxt)
776		}
777	}
778}
779
780func pcAlignPadLength(pc int64, alignedValue int64) int {
781	return int(-pc & (alignedValue - 1))
782}
783
784func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgAlloc, framesize int64) *obj.Prog {
785	// Leaf function with no frame is effectively NOSPLIT.
786	if framesize == 0 {
787		return p
788	}
789
790	if ctxt.Flag_maymorestack != "" {
791		// Save LR and REGCTXT
792		const frameSize = 16
793		p = ctxt.StartUnsafePoint(p, newprog)
794
795		// Spill Arguments. This has to happen before we open
796		// any more frame space.
797		p = cursym.Func().SpillRegisterArgs(p, newprog)
798
799		// MOV LR, -16(SP)
800		p = obj.Appendp(p, newprog)
801		p.As = AMOV
802		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
803		p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -frameSize}
804		// ADDI $-16, SP
805		p = obj.Appendp(p, newprog)
806		p.As = AADDI
807		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -frameSize}
808		p.Reg = REG_SP
809		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
810		p.Spadj = frameSize
811		// MOV REGCTXT, 8(SP)
812		p = obj.Appendp(p, newprog)
813		p.As = AMOV
814		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT}
815		p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8}
816
817		// CALL maymorestack
818		p = obj.Appendp(p, newprog)
819		p.As = obj.ACALL
820		p.To.Type = obj.TYPE_BRANCH
821		// See ../x86/obj6.go
822		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
823		jalToSym(ctxt, p, REG_X5)
824
825		// Restore LR and REGCTXT
826
827		// MOV 8(SP), REGCTXT
828		p = obj.Appendp(p, newprog)
829		p.As = AMOV
830		p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8}
831		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT}
832		// MOV (SP), LR
833		p = obj.Appendp(p, newprog)
834		p.As = AMOV
835		p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
836		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
837		// ADDI $16, SP
838		p = obj.Appendp(p, newprog)
839		p.As = AADDI
840		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: frameSize}
841		p.Reg = REG_SP
842		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
843		p.Spadj = -frameSize
844
845		// Unspill arguments
846		p = cursym.Func().UnspillRegisterArgs(p, newprog)
847		p = ctxt.EndUnsafePoint(p, newprog, -1)
848	}
849
850	// Jump back to here after morestack returns.
851	startPred := p
852
853	// MOV	g_stackguard(g), X6
854	p = obj.Appendp(p, newprog)
855	p.As = AMOV
856	p.From.Type = obj.TYPE_MEM
857	p.From.Reg = REGG
858	p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
859	if cursym.CFunc() {
860		p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
861	}
862	p.To.Type = obj.TYPE_REG
863	p.To.Reg = REG_X6
864
865	// Mark the stack bound check and morestack call async nonpreemptible.
866	// If we get preempted here, when resumed the preemption request is
867	// cleared, but we'll still call morestack, which will double the stack
868	// unnecessarily. See issue #35470.
869	p = ctxt.StartUnsafePoint(p, newprog)
870
871	var to_done, to_more *obj.Prog
872
873	if framesize <= abi.StackSmall {
874		// small stack
875		//	// if SP > stackguard { goto done }
876		//	BLTU	stackguard, SP, done
877		p = obj.Appendp(p, newprog)
878		p.As = ABLTU
879		p.From.Type = obj.TYPE_REG
880		p.From.Reg = REG_X6
881		p.Reg = REG_SP
882		p.To.Type = obj.TYPE_BRANCH
883		to_done = p
884	} else {
885		// large stack: SP-framesize < stackguard-StackSmall
886		offset := int64(framesize) - abi.StackSmall
887		if framesize > abi.StackBig {
888			// Such a large stack we need to protect against underflow.
889			// The runtime guarantees SP > objabi.StackBig, but
890			// framesize is large enough that SP-framesize may
891			// underflow, causing a direct comparison with the
892			// stack guard to incorrectly succeed. We explicitly
893			// guard against underflow.
894			//
895			//	MOV	$(framesize-StackSmall), X7
896			//	BLTU	SP, X7, label-of-call-to-morestack
897
898			p = obj.Appendp(p, newprog)
899			p.As = AMOV
900			p.From.Type = obj.TYPE_CONST
901			p.From.Offset = offset
902			p.To.Type = obj.TYPE_REG
903			p.To.Reg = REG_X7
904
905			p = obj.Appendp(p, newprog)
906			p.As = ABLTU
907			p.From.Type = obj.TYPE_REG
908			p.From.Reg = REG_SP
909			p.Reg = REG_X7
910			p.To.Type = obj.TYPE_BRANCH
911			to_more = p
912		}
913
914		// Check against the stack guard. We've ensured this won't underflow.
915		//	ADD	$-(framesize-StackSmall), SP, X7
916		//	// if X7 > stackguard { goto done }
917		//	BLTU	stackguard, X7, done
918		p = obj.Appendp(p, newprog)
919		p.As = AADDI
920		p.From.Type = obj.TYPE_CONST
921		p.From.Offset = -offset
922		p.Reg = REG_SP
923		p.To.Type = obj.TYPE_REG
924		p.To.Reg = REG_X7
925
926		p = obj.Appendp(p, newprog)
927		p.As = ABLTU
928		p.From.Type = obj.TYPE_REG
929		p.From.Reg = REG_X6
930		p.Reg = REG_X7
931		p.To.Type = obj.TYPE_BRANCH
932		to_done = p
933	}
934
935	// Spill the register args that could be clobbered by the
936	// morestack code
937	p = ctxt.EmitEntryStackMap(cursym, p, newprog)
938	p = cursym.Func().SpillRegisterArgs(p, newprog)
939
940	// CALL runtime.morestack(SB)
941	p = obj.Appendp(p, newprog)
942	p.As = obj.ACALL
943	p.To.Type = obj.TYPE_BRANCH
944
945	if cursym.CFunc() {
946		p.To.Sym = ctxt.Lookup("runtime.morestackc")
947	} else if !cursym.Func().Text.From.Sym.NeedCtxt() {
948		p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt")
949	} else {
950		p.To.Sym = ctxt.Lookup("runtime.morestack")
951	}
952	if to_more != nil {
953		to_more.To.SetTarget(p)
954	}
955	jalToSym(ctxt, p, REG_X5)
956
957	// The instructions which unspill regs should be preemptible.
958	p = ctxt.EndUnsafePoint(p, newprog, -1)
959	p = cursym.Func().UnspillRegisterArgs(p, newprog)
960
961	// JMP start
962	p = obj.Appendp(p, newprog)
963	p.As = AJAL
964	p.To = obj.Addr{Type: obj.TYPE_BRANCH}
965	p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
966	p.To.SetTarget(startPred.Link)
967
968	// placeholder for to_done's jump target
969	p = obj.Appendp(p, newprog)
970	p.As = obj.ANOP // zero-width place holder
971	to_done.To.SetTarget(p)
972
973	return p
974}
975
976// signExtend sign extends val starting at bit bit.
977func signExtend(val int64, bit uint) int64 {
978	return val << (64 - bit) >> (64 - bit)
979}
980
981// Split32BitImmediate splits a signed 32-bit immediate into a signed 20-bit
982// upper immediate and a signed 12-bit lower immediate to be added to the upper
983// result. For example, high may be used in LUI and low in a following ADDI to
984// generate a full 32-bit constant.
985func Split32BitImmediate(imm int64) (low, high int64, err error) {
986	if err := immIFits(imm, 32); err != nil {
987		return 0, 0, err
988	}
989
990	// Nothing special needs to be done if the immediate fits in 12 bits.
991	if err := immIFits(imm, 12); err == nil {
992		return imm, 0, nil
993	}
994
995	high = imm >> 12
996
997	// The bottom 12 bits will be treated as signed.
998	//
999	// If that will result in a negative 12 bit number, add 1 to
1000	// our upper bits to adjust for the borrow.
1001	//
1002	// It is not possible for this increment to overflow. To
1003	// overflow, the 20 top bits would be 1, and the sign bit for
1004	// the low 12 bits would be set, in which case the entire 32
1005	// bit pattern fits in a 12 bit signed value.
1006	if imm&(1<<11) != 0 {
1007		high++
1008	}
1009
1010	low = signExtend(imm, 12)
1011	high = signExtend(high, 20)
1012
1013	return low, high, nil
1014}
1015
1016func regVal(r, min, max uint32) uint32 {
1017	if r < min || r > max {
1018		panic(fmt.Sprintf("register out of range, want %d <= %d <= %d", min, r, max))
1019	}
1020	return r - min
1021}
1022
1023// regI returns an integer register.
1024func regI(r uint32) uint32 {
1025	return regVal(r, REG_X0, REG_X31)
1026}
1027
1028// regF returns a float register.
1029func regF(r uint32) uint32 {
1030	return regVal(r, REG_F0, REG_F31)
1031}
1032
1033// regAddr extracts a register from an Addr.
1034func regAddr(a obj.Addr, min, max uint32) uint32 {
1035	if a.Type != obj.TYPE_REG {
1036		panic(fmt.Sprintf("ill typed: %+v", a))
1037	}
1038	return regVal(uint32(a.Reg), min, max)
1039}
1040
1041// regIAddr extracts the integer register from an Addr.
1042func regIAddr(a obj.Addr) uint32 {
1043	return regAddr(a, REG_X0, REG_X31)
1044}
1045
1046// regFAddr extracts the float register from an Addr.
1047func regFAddr(a obj.Addr) uint32 {
1048	return regAddr(a, REG_F0, REG_F31)
1049}
1050
1051// immEven checks that the immediate is a multiple of two. If it
1052// is not, an error is returned.
1053func immEven(x int64) error {
1054	if x&1 != 0 {
1055		return fmt.Errorf("immediate %#x is not a multiple of two", x)
1056	}
1057	return nil
1058}
1059
1060// immIFits checks whether the immediate value x fits in nbits bits
1061// as a signed integer. If it does not, an error is returned.
1062func immIFits(x int64, nbits uint) error {
1063	nbits--
1064	min := int64(-1) << nbits
1065	max := int64(1)<<nbits - 1
1066	if x < min || x > max {
1067		if nbits <= 16 {
1068			return fmt.Errorf("signed immediate %d must be in range [%d, %d] (%d bits)", x, min, max, nbits)
1069		}
1070		return fmt.Errorf("signed immediate %#x must be in range [%#x, %#x] (%d bits)", x, min, max, nbits)
1071	}
1072	return nil
1073}
1074
1075// immI extracts the signed integer of the specified size from an immediate.
1076func immI(as obj.As, imm int64, nbits uint) uint32 {
1077	if err := immIFits(imm, nbits); err != nil {
1078		panic(fmt.Sprintf("%v: %v", as, err))
1079	}
1080	return uint32(imm)
1081}
1082
1083func wantImmI(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) {
1084	if err := immIFits(imm, nbits); err != nil {
1085		ctxt.Diag("%v: %v", ins, err)
1086	}
1087}
1088
1089func wantReg(ctxt *obj.Link, ins *instruction, pos string, descr string, r, min, max uint32) {
1090	if r < min || r > max {
1091		var suffix string
1092		if r != obj.REG_NONE {
1093			suffix = fmt.Sprintf(" but got non-%s register %s", descr, RegName(int(r)))
1094		}
1095		ctxt.Diag("%v: expected %s register in %s position%s", ins, descr, pos, suffix)
1096	}
1097}
1098
1099func wantNoneReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
1100	if r != obj.REG_NONE {
1101		ctxt.Diag("%v: expected no register in %s but got register %s", ins, pos, RegName(int(r)))
1102	}
1103}
1104
1105// wantIntReg checks that r is an integer register.
1106func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
1107	wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31)
1108}
1109
1110// wantFloatReg checks that r is a floating-point register.
1111func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
1112	wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31)
1113}
1114
1115// wantEvenOffset checks that the offset is a multiple of two.
1116func wantEvenOffset(ctxt *obj.Link, ins *instruction, offset int64) {
1117	if err := immEven(offset); err != nil {
1118		ctxt.Diag("%v: %v", ins, err)
1119	}
1120}
1121
1122func validateRII(ctxt *obj.Link, ins *instruction) {
1123	wantIntReg(ctxt, ins, "rd", ins.rd)
1124	wantIntReg(ctxt, ins, "rs1", ins.rs1)
1125	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
1126	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1127}
1128
1129func validateRIII(ctxt *obj.Link, ins *instruction) {
1130	wantIntReg(ctxt, ins, "rd", ins.rd)
1131	wantIntReg(ctxt, ins, "rs1", ins.rs1)
1132	wantIntReg(ctxt, ins, "rs2", ins.rs2)
1133	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1134}
1135
1136func validateRFFF(ctxt *obj.Link, ins *instruction) {
1137	wantFloatReg(ctxt, ins, "rd", ins.rd)
1138	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
1139	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
1140	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1141}
1142
1143func validateRFFFF(ctxt *obj.Link, ins *instruction) {
1144	wantFloatReg(ctxt, ins, "rd", ins.rd)
1145	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
1146	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
1147	wantFloatReg(ctxt, ins, "rs3", ins.rs3)
1148}
1149
1150func validateRFFI(ctxt *obj.Link, ins *instruction) {
1151	wantIntReg(ctxt, ins, "rd", ins.rd)
1152	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
1153	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
1154	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1155}
1156
1157func validateRFI(ctxt *obj.Link, ins *instruction) {
1158	wantIntReg(ctxt, ins, "rd", ins.rd)
1159	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
1160	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
1161	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1162}
1163
1164func validateRIF(ctxt *obj.Link, ins *instruction) {
1165	wantFloatReg(ctxt, ins, "rd", ins.rd)
1166	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
1167	wantIntReg(ctxt, ins, "rs2", ins.rs2)
1168	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1169}
1170
1171func validateRFF(ctxt *obj.Link, ins *instruction) {
1172	wantFloatReg(ctxt, ins, "rd", ins.rd)
1173	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
1174	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
1175	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1176}
1177
1178func validateII(ctxt *obj.Link, ins *instruction) {
1179	wantImmI(ctxt, ins, ins.imm, 12)
1180	wantIntReg(ctxt, ins, "rd", ins.rd)
1181	wantIntReg(ctxt, ins, "rs1", ins.rs1)
1182	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
1183	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1184}
1185
1186func validateIF(ctxt *obj.Link, ins *instruction) {
1187	wantImmI(ctxt, ins, ins.imm, 12)
1188	wantFloatReg(ctxt, ins, "rd", ins.rd)
1189	wantIntReg(ctxt, ins, "rs1", ins.rs1)
1190	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
1191	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1192}
1193
1194func validateSI(ctxt *obj.Link, ins *instruction) {
1195	wantImmI(ctxt, ins, ins.imm, 12)
1196	wantIntReg(ctxt, ins, "rd", ins.rd)
1197	wantIntReg(ctxt, ins, "rs1", ins.rs1)
1198	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
1199	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1200}
1201
1202func validateSF(ctxt *obj.Link, ins *instruction) {
1203	wantImmI(ctxt, ins, ins.imm, 12)
1204	wantIntReg(ctxt, ins, "rd", ins.rd)
1205	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
1206	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
1207	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1208}
1209
1210func validateB(ctxt *obj.Link, ins *instruction) {
1211	// Offsets are multiples of two, so accept 13 bit immediates for the
1212	// 12 bit slot. We implicitly drop the least significant bit in encodeB.
1213	wantEvenOffset(ctxt, ins, ins.imm)
1214	wantImmI(ctxt, ins, ins.imm, 13)
1215	wantNoneReg(ctxt, ins, "rd", ins.rd)
1216	wantIntReg(ctxt, ins, "rs1", ins.rs1)
1217	wantIntReg(ctxt, ins, "rs2", ins.rs2)
1218	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1219}
1220
1221func validateU(ctxt *obj.Link, ins *instruction) {
1222	wantImmI(ctxt, ins, ins.imm, 20)
1223	wantIntReg(ctxt, ins, "rd", ins.rd)
1224	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
1225	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
1226	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1227}
1228
1229func validateJ(ctxt *obj.Link, ins *instruction) {
1230	// Offsets are multiples of two, so accept 21 bit immediates for the
1231	// 20 bit slot. We implicitly drop the least significant bit in encodeJ.
1232	wantEvenOffset(ctxt, ins, ins.imm)
1233	wantImmI(ctxt, ins, ins.imm, 21)
1234	wantIntReg(ctxt, ins, "rd", ins.rd)
1235	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
1236	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
1237	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
1238}
1239
1240func validateRaw(ctxt *obj.Link, ins *instruction) {
1241	// Treat the raw value specially as a 32-bit unsigned integer.
1242	// Nobody wants to enter negative machine code.
1243	if ins.imm < 0 || 1<<32 <= ins.imm {
1244		ctxt.Diag("%v: immediate %d in raw position cannot be larger than 32 bits", ins.as, ins.imm)
1245	}
1246}
1247
1248// extractBitAndShift extracts the specified bit from the given immediate,
1249// before shifting it to the requested position and returning it.
1250func extractBitAndShift(imm uint32, bit, pos int) uint32 {
1251	return ((imm >> bit) & 1) << pos
1252}
1253
1254// encodeR encodes an R-type RISC-V instruction.
1255func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 {
1256	enc := encode(as)
1257	if enc == nil {
1258		panic("encodeR: could not encode instruction")
1259	}
1260	if enc.rs2 != 0 && rs2 != 0 {
1261		panic("encodeR: instruction uses rs2, but rs2 was nonzero")
1262	}
1263	return funct7<<25 | enc.funct7<<25 | enc.rs2<<20 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
1264}
1265
1266// encodeR4 encodes an R4-type RISC-V instruction.
1267func encodeR4(as obj.As, rs1, rs2, rs3, rd, funct3, funct2 uint32) uint32 {
1268	enc := encode(as)
1269	if enc == nil {
1270		panic("encodeR4: could not encode instruction")
1271	}
1272	if enc.rs2 != 0 {
1273		panic("encodeR4: instruction uses rs2")
1274	}
1275	funct2 |= enc.funct7
1276	if funct2&^3 != 0 {
1277		panic("encodeR4: funct2 requires more than 2 bits")
1278	}
1279	return rs3<<27 | funct2<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
1280}
1281
1282func encodeRII(ins *instruction) uint32 {
1283	return encodeR(ins.as, regI(ins.rs1), 0, regI(ins.rd), ins.funct3, ins.funct7)
1284}
1285
1286func encodeRIII(ins *instruction) uint32 {
1287	return encodeR(ins.as, regI(ins.rs1), regI(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
1288}
1289
1290func encodeRFFF(ins *instruction) uint32 {
1291	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rd), ins.funct3, ins.funct7)
1292}
1293
1294func encodeRFFFF(ins *instruction) uint32 {
1295	return encodeR4(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rs3), regF(ins.rd), ins.funct3, ins.funct7)
1296}
1297
1298func encodeRFFI(ins *instruction) uint32 {
1299	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
1300}
1301
1302func encodeRFI(ins *instruction) uint32 {
1303	return encodeR(ins.as, regF(ins.rs2), 0, regI(ins.rd), ins.funct3, ins.funct7)
1304}
1305
1306func encodeRIF(ins *instruction) uint32 {
1307	return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
1308}
1309
1310func encodeRFF(ins *instruction) uint32 {
1311	return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
1312}
1313
1314// encodeI encodes an I-type RISC-V instruction.
1315func encodeI(as obj.As, rs1, rd, imm uint32) uint32 {
1316	enc := encode(as)
1317	if enc == nil {
1318		panic("encodeI: could not encode instruction")
1319	}
1320	imm |= uint32(enc.csr)
1321	return imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode
1322}
1323
1324func encodeII(ins *instruction) uint32 {
1325	return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm))
1326}
1327
1328func encodeIF(ins *instruction) uint32 {
1329	return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm))
1330}
1331
1332// encodeS encodes an S-type RISC-V instruction.
1333func encodeS(as obj.As, rs1, rs2, imm uint32) uint32 {
1334	enc := encode(as)
1335	if enc == nil {
1336		panic("encodeS: could not encode instruction")
1337	}
1338	return (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode
1339}
1340
1341func encodeSI(ins *instruction) uint32 {
1342	return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm))
1343}
1344
1345func encodeSF(ins *instruction) uint32 {
1346	return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm))
1347}
1348
1349// encodeBImmediate encodes an immediate for a B-type RISC-V instruction.
1350func encodeBImmediate(imm uint32) uint32 {
1351	return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7
1352}
1353
1354// encodeB encodes a B-type RISC-V instruction.
1355func encodeB(ins *instruction) uint32 {
1356	imm := immI(ins.as, ins.imm, 13)
1357	rs2 := regI(ins.rs1)
1358	rs1 := regI(ins.rs2)
1359	enc := encode(ins.as)
1360	if enc == nil {
1361		panic("encodeB: could not encode instruction")
1362	}
1363	return encodeBImmediate(imm) | rs2<<20 | rs1<<15 | enc.funct3<<12 | enc.opcode
1364}
1365
1366// encodeU encodes a U-type RISC-V instruction.
1367func encodeU(ins *instruction) uint32 {
1368	// The immediates for encodeU are the upper 20 bits of a 32 bit value.
1369	// Rather than have the user/compiler generate a 32 bit constant, the
1370	// bottommost bits of which must all be zero, instead accept just the
1371	// top bits.
1372	imm := immI(ins.as, ins.imm, 20)
1373	rd := regI(ins.rd)
1374	enc := encode(ins.as)
1375	if enc == nil {
1376		panic("encodeU: could not encode instruction")
1377	}
1378	return imm<<12 | rd<<7 | enc.opcode
1379}
1380
1381// encodeJImmediate encodes an immediate for a J-type RISC-V instruction.
1382func encodeJImmediate(imm uint32) uint32 {
1383	return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12
1384}
1385
1386// encodeJ encodes a J-type RISC-V instruction.
1387func encodeJ(ins *instruction) uint32 {
1388	imm := immI(ins.as, ins.imm, 21)
1389	rd := regI(ins.rd)
1390	enc := encode(ins.as)
1391	if enc == nil {
1392		panic("encodeJ: could not encode instruction")
1393	}
1394	return encodeJImmediate(imm) | rd<<7 | enc.opcode
1395}
1396
1397// encodeCBImmediate encodes an immediate for a CB-type RISC-V instruction.
1398func encodeCBImmediate(imm uint32) uint32 {
1399	// Bit order - [8|4:3|7:6|2:1|5]
1400	bits := extractBitAndShift(imm, 8, 7)
1401	bits |= extractBitAndShift(imm, 4, 6)
1402	bits |= extractBitAndShift(imm, 3, 5)
1403	bits |= extractBitAndShift(imm, 7, 4)
1404	bits |= extractBitAndShift(imm, 6, 3)
1405	bits |= extractBitAndShift(imm, 2, 2)
1406	bits |= extractBitAndShift(imm, 1, 1)
1407	bits |= extractBitAndShift(imm, 5, 0)
1408	return (bits>>5)<<10 | (bits&0x1f)<<2
1409}
1410
1411// encodeCJImmediate encodes an immediate for a CJ-type RISC-V instruction.
1412func encodeCJImmediate(imm uint32) uint32 {
1413	// Bit order - [11|4|9:8|10|6|7|3:1|5]
1414	bits := extractBitAndShift(imm, 11, 10)
1415	bits |= extractBitAndShift(imm, 4, 9)
1416	bits |= extractBitAndShift(imm, 9, 8)
1417	bits |= extractBitAndShift(imm, 8, 7)
1418	bits |= extractBitAndShift(imm, 10, 6)
1419	bits |= extractBitAndShift(imm, 6, 5)
1420	bits |= extractBitAndShift(imm, 7, 4)
1421	bits |= extractBitAndShift(imm, 3, 3)
1422	bits |= extractBitAndShift(imm, 2, 2)
1423	bits |= extractBitAndShift(imm, 1, 1)
1424	bits |= extractBitAndShift(imm, 5, 0)
1425	return bits << 2
1426}
1427
1428func encodeRawIns(ins *instruction) uint32 {
1429	// Treat the raw value specially as a 32-bit unsigned integer.
1430	// Nobody wants to enter negative machine code.
1431	if ins.imm < 0 || 1<<32 <= ins.imm {
1432		panic(fmt.Sprintf("immediate %d cannot fit in 32 bits", ins.imm))
1433	}
1434	return uint32(ins.imm)
1435}
1436
1437func EncodeBImmediate(imm int64) (int64, error) {
1438	if err := immIFits(imm, 13); err != nil {
1439		return 0, err
1440	}
1441	if err := immEven(imm); err != nil {
1442		return 0, err
1443	}
1444	return int64(encodeBImmediate(uint32(imm))), nil
1445}
1446
1447func EncodeCBImmediate(imm int64) (int64, error) {
1448	if err := immIFits(imm, 9); err != nil {
1449		return 0, err
1450	}
1451	if err := immEven(imm); err != nil {
1452		return 0, err
1453	}
1454	return int64(encodeCBImmediate(uint32(imm))), nil
1455}
1456
1457func EncodeCJImmediate(imm int64) (int64, error) {
1458	if err := immIFits(imm, 12); err != nil {
1459		return 0, err
1460	}
1461	if err := immEven(imm); err != nil {
1462		return 0, err
1463	}
1464	return int64(encodeCJImmediate(uint32(imm))), nil
1465}
1466
1467func EncodeIImmediate(imm int64) (int64, error) {
1468	if err := immIFits(imm, 12); err != nil {
1469		return 0, err
1470	}
1471	return imm << 20, nil
1472}
1473
1474func EncodeJImmediate(imm int64) (int64, error) {
1475	if err := immIFits(imm, 21); err != nil {
1476		return 0, err
1477	}
1478	if err := immEven(imm); err != nil {
1479		return 0, err
1480	}
1481	return int64(encodeJImmediate(uint32(imm))), nil
1482}
1483
1484func EncodeSImmediate(imm int64) (int64, error) {
1485	if err := immIFits(imm, 12); err != nil {
1486		return 0, err
1487	}
1488	return ((imm >> 5) << 25) | ((imm & 0x1f) << 7), nil
1489}
1490
1491func EncodeUImmediate(imm int64) (int64, error) {
1492	if err := immIFits(imm, 20); err != nil {
1493		return 0, err
1494	}
1495	return imm << 12, nil
1496}
1497
1498type encoding struct {
1499	encode   func(*instruction) uint32     // encode returns the machine code for an instruction
1500	validate func(*obj.Link, *instruction) // validate validates an instruction
1501	length   int                           // length of encoded instruction; 0 for pseudo-ops, 4 otherwise
1502}
1503
1504var (
1505	// Encodings have the following naming convention:
1506	//
1507	//  1. the instruction encoding (R/I/S/B/U/J), in lowercase
1508	//  2. zero or more register operand identifiers (I = integer
1509	//     register, F = float register), in uppercase
1510	//  3. the word "Encoding"
1511	//
1512	// For example, rIIIEncoding indicates an R-type instruction with two
1513	// integer register inputs and an integer register output; sFEncoding
1514	// indicates an S-type instruction with rs2 being a float register.
1515
1516	rIIIEncoding  = encoding{encode: encodeRIII, validate: validateRIII, length: 4}
1517	rIIEncoding   = encoding{encode: encodeRII, validate: validateRII, length: 4}
1518	rFFFEncoding  = encoding{encode: encodeRFFF, validate: validateRFFF, length: 4}
1519	rFFFFEncoding = encoding{encode: encodeRFFFF, validate: validateRFFFF, length: 4}
1520	rFFIEncoding  = encoding{encode: encodeRFFI, validate: validateRFFI, length: 4}
1521	rFIEncoding   = encoding{encode: encodeRFI, validate: validateRFI, length: 4}
1522	rIFEncoding   = encoding{encode: encodeRIF, validate: validateRIF, length: 4}
1523	rFFEncoding   = encoding{encode: encodeRFF, validate: validateRFF, length: 4}
1524
1525	iIEncoding = encoding{encode: encodeII, validate: validateII, length: 4}
1526	iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4}
1527
1528	sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4}
1529	sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4}
1530
1531	bEncoding = encoding{encode: encodeB, validate: validateB, length: 4}
1532	uEncoding = encoding{encode: encodeU, validate: validateU, length: 4}
1533	jEncoding = encoding{encode: encodeJ, validate: validateJ, length: 4}
1534
1535	// rawEncoding encodes a raw instruction byte sequence.
1536	rawEncoding = encoding{encode: encodeRawIns, validate: validateRaw, length: 4}
1537
1538	// pseudoOpEncoding panics if encoding is attempted, but does no validation.
1539	pseudoOpEncoding = encoding{encode: nil, validate: func(*obj.Link, *instruction) {}, length: 0}
1540
1541	// badEncoding is used when an invalid op is encountered.
1542	// An error has already been generated, so let anything else through.
1543	badEncoding = encoding{encode: func(*instruction) uint32 { return 0 }, validate: func(*obj.Link, *instruction) {}, length: 0}
1544)
1545
1546// encodings contains the encodings for RISC-V instructions.
1547// Instructions are masked with obj.AMask to keep indices small.
1548var encodings = [ALAST & obj.AMask]encoding{
1549
1550	// Unprivileged ISA
1551
1552	// 2.4: Integer Computational Instructions
1553	AADDI & obj.AMask:  iIEncoding,
1554	ASLTI & obj.AMask:  iIEncoding,
1555	ASLTIU & obj.AMask: iIEncoding,
1556	AANDI & obj.AMask:  iIEncoding,
1557	AORI & obj.AMask:   iIEncoding,
1558	AXORI & obj.AMask:  iIEncoding,
1559	ASLLI & obj.AMask:  iIEncoding,
1560	ASRLI & obj.AMask:  iIEncoding,
1561	ASRAI & obj.AMask:  iIEncoding,
1562	ALUI & obj.AMask:   uEncoding,
1563	AAUIPC & obj.AMask: uEncoding,
1564	AADD & obj.AMask:   rIIIEncoding,
1565	ASLT & obj.AMask:   rIIIEncoding,
1566	ASLTU & obj.AMask:  rIIIEncoding,
1567	AAND & obj.AMask:   rIIIEncoding,
1568	AOR & obj.AMask:    rIIIEncoding,
1569	AXOR & obj.AMask:   rIIIEncoding,
1570	ASLL & obj.AMask:   rIIIEncoding,
1571	ASRL & obj.AMask:   rIIIEncoding,
1572	ASUB & obj.AMask:   rIIIEncoding,
1573	ASRA & obj.AMask:   rIIIEncoding,
1574
1575	// 2.5: Control Transfer Instructions
1576	AJAL & obj.AMask:  jEncoding,
1577	AJALR & obj.AMask: iIEncoding,
1578	ABEQ & obj.AMask:  bEncoding,
1579	ABNE & obj.AMask:  bEncoding,
1580	ABLT & obj.AMask:  bEncoding,
1581	ABLTU & obj.AMask: bEncoding,
1582	ABGE & obj.AMask:  bEncoding,
1583	ABGEU & obj.AMask: bEncoding,
1584
1585	// 2.6: Load and Store Instructions
1586	ALW & obj.AMask:  iIEncoding,
1587	ALWU & obj.AMask: iIEncoding,
1588	ALH & obj.AMask:  iIEncoding,
1589	ALHU & obj.AMask: iIEncoding,
1590	ALB & obj.AMask:  iIEncoding,
1591	ALBU & obj.AMask: iIEncoding,
1592	ASW & obj.AMask:  sIEncoding,
1593	ASH & obj.AMask:  sIEncoding,
1594	ASB & obj.AMask:  sIEncoding,
1595
1596	// 2.7: Memory Ordering
1597	AFENCE & obj.AMask: iIEncoding,
1598
1599	// 5.2: Integer Computational Instructions (RV64I)
1600	AADDIW & obj.AMask: iIEncoding,
1601	ASLLIW & obj.AMask: iIEncoding,
1602	ASRLIW & obj.AMask: iIEncoding,
1603	ASRAIW & obj.AMask: iIEncoding,
1604	AADDW & obj.AMask:  rIIIEncoding,
1605	ASLLW & obj.AMask:  rIIIEncoding,
1606	ASRLW & obj.AMask:  rIIIEncoding,
1607	ASUBW & obj.AMask:  rIIIEncoding,
1608	ASRAW & obj.AMask:  rIIIEncoding,
1609
1610	// 5.3: Load and Store Instructions (RV64I)
1611	ALD & obj.AMask: iIEncoding,
1612	ASD & obj.AMask: sIEncoding,
1613
1614	// 7.1: Multiplication Operations
1615	AMUL & obj.AMask:    rIIIEncoding,
1616	AMULH & obj.AMask:   rIIIEncoding,
1617	AMULHU & obj.AMask:  rIIIEncoding,
1618	AMULHSU & obj.AMask: rIIIEncoding,
1619	AMULW & obj.AMask:   rIIIEncoding,
1620	ADIV & obj.AMask:    rIIIEncoding,
1621	ADIVU & obj.AMask:   rIIIEncoding,
1622	AREM & obj.AMask:    rIIIEncoding,
1623	AREMU & obj.AMask:   rIIIEncoding,
1624	ADIVW & obj.AMask:   rIIIEncoding,
1625	ADIVUW & obj.AMask:  rIIIEncoding,
1626	AREMW & obj.AMask:   rIIIEncoding,
1627	AREMUW & obj.AMask:  rIIIEncoding,
1628
1629	// 8.2: Load-Reserved/Store-Conditional
1630	ALRW & obj.AMask: rIIIEncoding,
1631	ALRD & obj.AMask: rIIIEncoding,
1632	ASCW & obj.AMask: rIIIEncoding,
1633	ASCD & obj.AMask: rIIIEncoding,
1634
1635	// 8.3: Atomic Memory Operations
1636	AAMOSWAPW & obj.AMask: rIIIEncoding,
1637	AAMOSWAPD & obj.AMask: rIIIEncoding,
1638	AAMOADDW & obj.AMask:  rIIIEncoding,
1639	AAMOADDD & obj.AMask:  rIIIEncoding,
1640	AAMOANDW & obj.AMask:  rIIIEncoding,
1641	AAMOANDD & obj.AMask:  rIIIEncoding,
1642	AAMOORW & obj.AMask:   rIIIEncoding,
1643	AAMOORD & obj.AMask:   rIIIEncoding,
1644	AAMOXORW & obj.AMask:  rIIIEncoding,
1645	AAMOXORD & obj.AMask:  rIIIEncoding,
1646	AAMOMAXW & obj.AMask:  rIIIEncoding,
1647	AAMOMAXD & obj.AMask:  rIIIEncoding,
1648	AAMOMAXUW & obj.AMask: rIIIEncoding,
1649	AAMOMAXUD & obj.AMask: rIIIEncoding,
1650	AAMOMINW & obj.AMask:  rIIIEncoding,
1651	AAMOMIND & obj.AMask:  rIIIEncoding,
1652	AAMOMINUW & obj.AMask: rIIIEncoding,
1653	AAMOMINUD & obj.AMask: rIIIEncoding,
1654
1655	// 10.1: Base Counters and Timers
1656	ARDCYCLE & obj.AMask:   iIEncoding,
1657	ARDTIME & obj.AMask:    iIEncoding,
1658	ARDINSTRET & obj.AMask: iIEncoding,
1659
1660	// 11.5: Single-Precision Load and Store Instructions
1661	AFLW & obj.AMask: iFEncoding,
1662	AFSW & obj.AMask: sFEncoding,
1663
1664	// 11.6: Single-Precision Floating-Point Computational Instructions
1665	AFADDS & obj.AMask:   rFFFEncoding,
1666	AFSUBS & obj.AMask:   rFFFEncoding,
1667	AFMULS & obj.AMask:   rFFFEncoding,
1668	AFDIVS & obj.AMask:   rFFFEncoding,
1669	AFMINS & obj.AMask:   rFFFEncoding,
1670	AFMAXS & obj.AMask:   rFFFEncoding,
1671	AFSQRTS & obj.AMask:  rFFFEncoding,
1672	AFMADDS & obj.AMask:  rFFFFEncoding,
1673	AFMSUBS & obj.AMask:  rFFFFEncoding,
1674	AFNMSUBS & obj.AMask: rFFFFEncoding,
1675	AFNMADDS & obj.AMask: rFFFFEncoding,
1676
1677	// 11.7: Single-Precision Floating-Point Conversion and Move Instructions
1678	AFCVTWS & obj.AMask:  rFIEncoding,
1679	AFCVTLS & obj.AMask:  rFIEncoding,
1680	AFCVTSW & obj.AMask:  rIFEncoding,
1681	AFCVTSL & obj.AMask:  rIFEncoding,
1682	AFCVTWUS & obj.AMask: rFIEncoding,
1683	AFCVTLUS & obj.AMask: rFIEncoding,
1684	AFCVTSWU & obj.AMask: rIFEncoding,
1685	AFCVTSLU & obj.AMask: rIFEncoding,
1686	AFSGNJS & obj.AMask:  rFFFEncoding,
1687	AFSGNJNS & obj.AMask: rFFFEncoding,
1688	AFSGNJXS & obj.AMask: rFFFEncoding,
1689	AFMVXS & obj.AMask:   rFIEncoding,
1690	AFMVSX & obj.AMask:   rIFEncoding,
1691	AFMVXW & obj.AMask:   rFIEncoding,
1692	AFMVWX & obj.AMask:   rIFEncoding,
1693
1694	// 11.8: Single-Precision Floating-Point Compare Instructions
1695	AFEQS & obj.AMask: rFFIEncoding,
1696	AFLTS & obj.AMask: rFFIEncoding,
1697	AFLES & obj.AMask: rFFIEncoding,
1698
1699	// 11.9: Single-Precision Floating-Point Classify Instruction
1700	AFCLASSS & obj.AMask: rFIEncoding,
1701
1702	// 12.3: Double-Precision Load and Store Instructions
1703	AFLD & obj.AMask: iFEncoding,
1704	AFSD & obj.AMask: sFEncoding,
1705
1706	// 12.4: Double-Precision Floating-Point Computational Instructions
1707	AFADDD & obj.AMask:   rFFFEncoding,
1708	AFSUBD & obj.AMask:   rFFFEncoding,
1709	AFMULD & obj.AMask:   rFFFEncoding,
1710	AFDIVD & obj.AMask:   rFFFEncoding,
1711	AFMIND & obj.AMask:   rFFFEncoding,
1712	AFMAXD & obj.AMask:   rFFFEncoding,
1713	AFSQRTD & obj.AMask:  rFFFEncoding,
1714	AFMADDD & obj.AMask:  rFFFFEncoding,
1715	AFMSUBD & obj.AMask:  rFFFFEncoding,
1716	AFNMSUBD & obj.AMask: rFFFFEncoding,
1717	AFNMADDD & obj.AMask: rFFFFEncoding,
1718
1719	// 12.5: Double-Precision Floating-Point Conversion and Move Instructions
1720	AFCVTWD & obj.AMask:  rFIEncoding,
1721	AFCVTLD & obj.AMask:  rFIEncoding,
1722	AFCVTDW & obj.AMask:  rIFEncoding,
1723	AFCVTDL & obj.AMask:  rIFEncoding,
1724	AFCVTWUD & obj.AMask: rFIEncoding,
1725	AFCVTLUD & obj.AMask: rFIEncoding,
1726	AFCVTDWU & obj.AMask: rIFEncoding,
1727	AFCVTDLU & obj.AMask: rIFEncoding,
1728	AFCVTSD & obj.AMask:  rFFEncoding,
1729	AFCVTDS & obj.AMask:  rFFEncoding,
1730	AFSGNJD & obj.AMask:  rFFFEncoding,
1731	AFSGNJND & obj.AMask: rFFFEncoding,
1732	AFSGNJXD & obj.AMask: rFFFEncoding,
1733	AFMVXD & obj.AMask:   rFIEncoding,
1734	AFMVDX & obj.AMask:   rIFEncoding,
1735
1736	// 12.6: Double-Precision Floating-Point Compare Instructions
1737	AFEQD & obj.AMask: rFFIEncoding,
1738	AFLTD & obj.AMask: rFFIEncoding,
1739	AFLED & obj.AMask: rFFIEncoding,
1740
1741	// 12.7: Double-Precision Floating-Point Classify Instruction
1742	AFCLASSD & obj.AMask: rFIEncoding,
1743
1744	// Privileged ISA
1745
1746	// 3.2.1: Environment Call and Breakpoint
1747	AECALL & obj.AMask:  iIEncoding,
1748	AEBREAK & obj.AMask: iIEncoding,
1749
1750	//
1751	// RISC-V Bit-Manipulation ISA-extensions (1.0)
1752	//
1753
1754	// 1.1: Address Generation Instructions (Zba)
1755	AADDUW & obj.AMask:    rIIIEncoding,
1756	ASH1ADD & obj.AMask:   rIIIEncoding,
1757	ASH1ADDUW & obj.AMask: rIIIEncoding,
1758	ASH2ADD & obj.AMask:   rIIIEncoding,
1759	ASH2ADDUW & obj.AMask: rIIIEncoding,
1760	ASH3ADD & obj.AMask:   rIIIEncoding,
1761	ASH3ADDUW & obj.AMask: rIIIEncoding,
1762	ASLLIUW & obj.AMask:   iIEncoding,
1763
1764	// 1.2: Basic Bit Manipulation (Zbb)
1765	AANDN & obj.AMask:  rIIIEncoding,
1766	ACLZ & obj.AMask:   rIIEncoding,
1767	ACLZW & obj.AMask:  rIIEncoding,
1768	ACPOP & obj.AMask:  rIIEncoding,
1769	ACPOPW & obj.AMask: rIIEncoding,
1770	ACTZ & obj.AMask:   rIIEncoding,
1771	ACTZW & obj.AMask:  rIIEncoding,
1772	AMAX & obj.AMask:   rIIIEncoding,
1773	AMAXU & obj.AMask:  rIIIEncoding,
1774	AMIN & obj.AMask:   rIIIEncoding,
1775	AMINU & obj.AMask:  rIIIEncoding,
1776	AORN & obj.AMask:   rIIIEncoding,
1777	ASEXTB & obj.AMask: rIIEncoding,
1778	ASEXTH & obj.AMask: rIIEncoding,
1779	AXNOR & obj.AMask:  rIIIEncoding,
1780	AZEXTH & obj.AMask: rIIEncoding,
1781
1782	// 1.3: Bitwise Rotation (Zbb)
1783	AROL & obj.AMask:   rIIIEncoding,
1784	AROLW & obj.AMask:  rIIIEncoding,
1785	AROR & obj.AMask:   rIIIEncoding,
1786	ARORI & obj.AMask:  iIEncoding,
1787	ARORIW & obj.AMask: iIEncoding,
1788	ARORW & obj.AMask:  rIIIEncoding,
1789	AORCB & obj.AMask:  iIEncoding,
1790	AREV8 & obj.AMask:  iIEncoding,
1791
1792	// 1.5: Single-bit Instructions (Zbs)
1793	ABCLR & obj.AMask:  rIIIEncoding,
1794	ABCLRI & obj.AMask: iIEncoding,
1795	ABEXT & obj.AMask:  rIIIEncoding,
1796	ABEXTI & obj.AMask: iIEncoding,
1797	ABINV & obj.AMask:  rIIIEncoding,
1798	ABINVI & obj.AMask: iIEncoding,
1799	ABSET & obj.AMask:  rIIIEncoding,
1800	ABSETI & obj.AMask: iIEncoding,
1801
1802	// Escape hatch
1803	AWORD & obj.AMask: rawEncoding,
1804
1805	// Pseudo-operations
1806	obj.AFUNCDATA: pseudoOpEncoding,
1807	obj.APCDATA:   pseudoOpEncoding,
1808	obj.ATEXT:     pseudoOpEncoding,
1809	obj.ANOP:      pseudoOpEncoding,
1810	obj.ADUFFZERO: pseudoOpEncoding,
1811	obj.ADUFFCOPY: pseudoOpEncoding,
1812	obj.APCALIGN:  pseudoOpEncoding,
1813}
1814
1815// encodingForAs returns the encoding for an obj.As.
1816func encodingForAs(as obj.As) (encoding, error) {
1817	if base := as &^ obj.AMask; base != obj.ABaseRISCV && base != 0 {
1818		return badEncoding, fmt.Errorf("encodingForAs: not a RISC-V instruction %s", as)
1819	}
1820	asi := as & obj.AMask
1821	if int(asi) >= len(encodings) {
1822		return badEncoding, fmt.Errorf("encodingForAs: bad RISC-V instruction %s", as)
1823	}
1824	enc := encodings[asi]
1825	if enc.validate == nil {
1826		return badEncoding, fmt.Errorf("encodingForAs: no encoding for instruction %s", as)
1827	}
1828	return enc, nil
1829}
1830
1831type instruction struct {
1832	p      *obj.Prog // Prog that instruction is for
1833	as     obj.As    // Assembler opcode
1834	rd     uint32    // Destination register
1835	rs1    uint32    // Source register 1
1836	rs2    uint32    // Source register 2
1837	rs3    uint32    // Source register 3
1838	imm    int64     // Immediate
1839	funct3 uint32    // Function 3
1840	funct7 uint32    // Function 7 (or Function 2)
1841}
1842
1843func (ins *instruction) String() string {
1844	if ins.p == nil {
1845		return ins.as.String()
1846	}
1847	var suffix string
1848	if ins.p.As != ins.as {
1849		suffix = fmt.Sprintf(" (%v)", ins.as)
1850	}
1851	return fmt.Sprintf("%v%v", ins.p, suffix)
1852}
1853
1854func (ins *instruction) encode() (uint32, error) {
1855	enc, err := encodingForAs(ins.as)
1856	if err != nil {
1857		return 0, err
1858	}
1859	if enc.length <= 0 {
1860		return 0, fmt.Errorf("%v: encoding called for a pseudo instruction", ins.as)
1861	}
1862	return enc.encode(ins), nil
1863}
1864
1865func (ins *instruction) length() int {
1866	enc, err := encodingForAs(ins.as)
1867	if err != nil {
1868		return 0
1869	}
1870	return enc.length
1871}
1872
1873func (ins *instruction) validate(ctxt *obj.Link) {
1874	enc, err := encodingForAs(ins.as)
1875	if err != nil {
1876		ctxt.Diag(err.Error())
1877		return
1878	}
1879	enc.validate(ctxt, ins)
1880}
1881
1882func (ins *instruction) usesRegTmp() bool {
1883	return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP
1884}
1885
1886// instructionForProg returns the default *obj.Prog to instruction mapping.
1887func instructionForProg(p *obj.Prog) *instruction {
1888	ins := &instruction{
1889		as:  p.As,
1890		rd:  uint32(p.To.Reg),
1891		rs1: uint32(p.Reg),
1892		rs2: uint32(p.From.Reg),
1893		imm: p.From.Offset,
1894	}
1895	if len(p.RestArgs) == 1 {
1896		ins.rs3 = uint32(p.RestArgs[0].Reg)
1897	}
1898	return ins
1899}
1900
1901// instructionsForOpImmediate returns the machine instructions for an immediate
1902// operand. The instruction is specified by as and the source register is
1903// specified by rs, instead of the obj.Prog.
1904func instructionsForOpImmediate(p *obj.Prog, as obj.As, rs int16) []*instruction {
1905	// <opi> $imm, REG, TO
1906	ins := instructionForProg(p)
1907	ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE
1908
1909	low, high, err := Split32BitImmediate(ins.imm)
1910	if err != nil {
1911		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm, err)
1912		return nil
1913	}
1914	if high == 0 {
1915		return []*instruction{ins}
1916	}
1917
1918	// Split into two additions, if possible.
1919	// Do not split SP-writing instructions, as otherwise the recorded SP delta may be wrong.
1920	if p.Spadj == 0 && ins.as == AADDI && ins.imm >= -(1<<12) && ins.imm < 1<<12-1 {
1921		imm0 := ins.imm / 2
1922		imm1 := ins.imm - imm0
1923
1924		// ADDI $(imm/2), REG, TO
1925		// ADDI $(imm-imm/2), TO, TO
1926		ins.imm = imm0
1927		insADDI := &instruction{as: AADDI, rd: ins.rd, rs1: ins.rd, imm: imm1}
1928		return []*instruction{ins, insADDI}
1929	}
1930
1931	// LUI $high, TMP
1932	// ADDIW $low, TMP, TMP
1933	// <op> TMP, REG, TO
1934	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
1935	insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP, imm: low}
1936	switch ins.as {
1937	case AADDI:
1938		ins.as = AADD
1939	case AANDI:
1940		ins.as = AAND
1941	case AORI:
1942		ins.as = AOR
1943	case AXORI:
1944		ins.as = AXOR
1945	default:
1946		p.Ctxt.Diag("unsupported immediate instruction %v for splitting", p)
1947		return nil
1948	}
1949	ins.rs2 = REG_TMP
1950	if low == 0 {
1951		return []*instruction{insLUI, ins}
1952	}
1953	return []*instruction{insLUI, insADDIW, ins}
1954}
1955
1956// instructionsForLoad returns the machine instructions for a load. The load
1957// instruction is specified by as and the base/source register is specified
1958// by rs, instead of the obj.Prog.
1959func instructionsForLoad(p *obj.Prog, as obj.As, rs int16) []*instruction {
1960	if p.From.Type != obj.TYPE_MEM {
1961		p.Ctxt.Diag("%v requires memory for source", p)
1962		return nil
1963	}
1964
1965	switch as {
1966	case ALD, ALB, ALH, ALW, ALBU, ALHU, ALWU, AFLW, AFLD:
1967	default:
1968		p.Ctxt.Diag("%v: unknown load instruction %v", p, as)
1969		return nil
1970	}
1971
1972	// <load> $imm, REG, TO (load $imm+(REG), TO)
1973	ins := instructionForProg(p)
1974	ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE
1975	ins.imm = p.From.Offset
1976
1977	low, high, err := Split32BitImmediate(ins.imm)
1978	if err != nil {
1979		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm)
1980		return nil
1981	}
1982	if high == 0 {
1983		return []*instruction{ins}
1984	}
1985
1986	// LUI $high, TMP
1987	// ADD TMP, REG, TMP
1988	// <load> $low, TMP, TO
1989	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
1990	insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rs1}
1991	ins.rs1, ins.imm = REG_TMP, low
1992
1993	return []*instruction{insLUI, insADD, ins}
1994}
1995
1996// instructionsForStore returns the machine instructions for a store. The store
1997// instruction is specified by as and the target/source register is specified
1998// by rd, instead of the obj.Prog.
1999func instructionsForStore(p *obj.Prog, as obj.As, rd int16) []*instruction {
2000	if p.To.Type != obj.TYPE_MEM {
2001		p.Ctxt.Diag("%v requires memory for destination", p)
2002		return nil
2003	}
2004
2005	switch as {
2006	case ASW, ASH, ASB, ASD, AFSW, AFSD:
2007	default:
2008		p.Ctxt.Diag("%v: unknown store instruction %v", p, as)
2009		return nil
2010	}
2011
2012	// <store> $imm, REG, TO (store $imm+(TO), REG)
2013	ins := instructionForProg(p)
2014	ins.as, ins.rd, ins.rs1, ins.rs2 = as, uint32(rd), uint32(p.From.Reg), obj.REG_NONE
2015	ins.imm = p.To.Offset
2016
2017	low, high, err := Split32BitImmediate(ins.imm)
2018	if err != nil {
2019		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm)
2020		return nil
2021	}
2022	if high == 0 {
2023		return []*instruction{ins}
2024	}
2025
2026	// LUI $high, TMP
2027	// ADD TMP, TO, TMP
2028	// <store> $low, REG, TMP
2029	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
2030	insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rd}
2031	ins.rd, ins.imm = REG_TMP, low
2032
2033	return []*instruction{insLUI, insADD, ins}
2034}
2035
2036func instructionsForTLS(p *obj.Prog, ins *instruction) []*instruction {
2037	insAddTP := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: REG_TP}
2038
2039	var inss []*instruction
2040	if p.Ctxt.Flag_shared {
2041		// TLS initial-exec mode - load TLS offset from GOT, add the thread pointer
2042		// register, then load from or store to the resulting memory location.
2043		insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
2044		insLoadTLSOffset := &instruction{as: ALD, rd: REG_TMP, rs1: REG_TMP}
2045		inss = []*instruction{insAUIPC, insLoadTLSOffset, insAddTP, ins}
2046	} else {
2047		// TLS local-exec mode - load upper TLS offset, add the lower TLS offset,
2048		// add the thread pointer register, then load from or store to the resulting
2049		// memory location. Note that this differs from the suggested three
2050		// instruction sequence, as the Go linker does not currently have an
2051		// easy way to handle relocation across 12 bytes of machine code.
2052		insLUI := &instruction{as: ALUI, rd: REG_TMP}
2053		insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP}
2054		inss = []*instruction{insLUI, insADDIW, insAddTP, ins}
2055	}
2056	return inss
2057}
2058
2059func instructionsForTLSLoad(p *obj.Prog) []*instruction {
2060	if p.From.Sym.Type != objabi.STLSBSS {
2061		p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.From.Sym)
2062		return nil
2063	}
2064
2065	ins := instructionForProg(p)
2066	ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), REG_TMP, obj.REG_NONE, 0
2067
2068	return instructionsForTLS(p, ins)
2069}
2070
2071func instructionsForTLSStore(p *obj.Prog) []*instruction {
2072	if p.To.Sym.Type != objabi.STLSBSS {
2073		p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.To.Sym)
2074		return nil
2075	}
2076
2077	ins := instructionForProg(p)
2078	ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
2079
2080	return instructionsForTLS(p, ins)
2081}
2082
2083// instructionsForMOV returns the machine instructions for an *obj.Prog that
2084// uses a MOV pseudo-instruction.
2085func instructionsForMOV(p *obj.Prog) []*instruction {
2086	ins := instructionForProg(p)
2087	inss := []*instruction{ins}
2088
2089	if p.Reg != 0 {
2090		p.Ctxt.Diag("%v: illegal MOV instruction", p)
2091		return nil
2092	}
2093
2094	switch {
2095	case p.From.Type == obj.TYPE_CONST && p.To.Type == obj.TYPE_REG:
2096		// Handle constant to register moves.
2097		if p.As != AMOV {
2098			p.Ctxt.Diag("%v: unsupported constant load", p)
2099			return nil
2100		}
2101
2102		// For constants larger than 32 bits in size that have trailing zeros,
2103		// use the value with the trailing zeros removed and then use a SLLI
2104		// instruction to restore the original constant.
2105		// For example:
2106		// 	MOV $0x8000000000000000, X10
2107		// becomes
2108		// 	MOV $1, X10
2109		// 	SLLI $63, X10, X10
2110		var insSLLI *instruction
2111		if err := immIFits(ins.imm, 32); err != nil {
2112			ctz := bits.TrailingZeros64(uint64(ins.imm))
2113			if err := immIFits(ins.imm>>ctz, 32); err == nil {
2114				ins.imm = ins.imm >> ctz
2115				insSLLI = &instruction{as: ASLLI, rd: ins.rd, rs1: ins.rd, imm: int64(ctz)}
2116			}
2117		}
2118
2119		low, high, err := Split32BitImmediate(ins.imm)
2120		if err != nil {
2121			p.Ctxt.Diag("%v: constant %d too large: %v", p, ins.imm, err)
2122			return nil
2123		}
2124
2125		// MOV $c, R -> ADD $c, ZERO, R
2126		ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, REG_ZERO, obj.REG_NONE, low
2127
2128		// LUI is only necessary if the constant does not fit in 12 bits.
2129		if high != 0 {
2130			// LUI top20bits(c), R
2131			// ADD bottom12bits(c), R, R
2132			insLUI := &instruction{as: ALUI, rd: ins.rd, imm: high}
2133			inss = []*instruction{insLUI}
2134			if low != 0 {
2135				ins.as, ins.rs1 = AADDIW, ins.rd
2136				inss = append(inss, ins)
2137			}
2138		}
2139		if insSLLI != nil {
2140			inss = append(inss, insSLLI)
2141		}
2142
2143	case p.From.Type == obj.TYPE_CONST && p.To.Type != obj.TYPE_REG:
2144		p.Ctxt.Diag("%v: constant load must target register", p)
2145		return nil
2146
2147	case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG:
2148		// Handle register to register moves.
2149		switch p.As {
2150		case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb
2151			ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, uint32(p.From.Reg), obj.REG_NONE, 0
2152		case AMOVW: // MOVW Ra, Rb -> ADDIW $0, Ra, Rb
2153			ins.as, ins.rs1, ins.rs2, ins.imm = AADDIW, uint32(p.From.Reg), obj.REG_NONE, 0
2154		case AMOVBU: // MOVBU Ra, Rb -> ANDI $255, Ra, Rb
2155			ins.as, ins.rs1, ins.rs2, ins.imm = AANDI, uint32(p.From.Reg), obj.REG_NONE, 255
2156		case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb
2157			ins.as, ins.rs1 = AFSGNJS, uint32(p.From.Reg)
2158		case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb
2159			ins.as, ins.rs1 = AFSGNJD, uint32(p.From.Reg)
2160		case AMOVB, AMOVH:
2161			if buildcfg.GORISCV64 >= 22 {
2162				// Use SEXTB or SEXTH to extend.
2163				ins.as, ins.rs1, ins.rs2 = ASEXTB, uint32(p.From.Reg), obj.REG_NONE
2164				if p.As == AMOVH {
2165					ins.as = ASEXTH
2166				}
2167			} else {
2168				// Use SLLI/SRAI sequence to extend.
2169				ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE
2170				if p.As == AMOVB {
2171					ins.imm = 56
2172				} else if p.As == AMOVH {
2173					ins.imm = 48
2174				}
2175				ins2 := &instruction{as: ASRAI, rd: ins.rd, rs1: ins.rd, imm: ins.imm}
2176				inss = append(inss, ins2)
2177			}
2178		case AMOVHU, AMOVWU:
2179			if buildcfg.GORISCV64 >= 22 {
2180				// Use ZEXTH or ADDUW to extend.
2181				ins.as, ins.rs1, ins.rs2, ins.imm = AZEXTH, uint32(p.From.Reg), obj.REG_NONE, 0
2182				if p.As == AMOVWU {
2183					ins.as, ins.rs2 = AADDUW, REG_ZERO
2184				}
2185			} else {
2186				// Use SLLI/SRLI sequence to extend.
2187				ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE
2188				if p.As == AMOVHU {
2189					ins.imm = 48
2190				} else if p.As == AMOVWU {
2191					ins.imm = 32
2192				}
2193				ins2 := &instruction{as: ASRLI, rd: ins.rd, rs1: ins.rd, imm: ins.imm}
2194				inss = append(inss, ins2)
2195			}
2196		}
2197
2198	case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG:
2199		// Memory to register loads.
2200		switch p.From.Name {
2201		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
2202			// MOV c(Rs), Rd -> L $c, Rs, Rd
2203			inss = instructionsForLoad(p, movToLoad(p.As), addrToReg(p.From))
2204
2205		case obj.NAME_EXTERN, obj.NAME_STATIC:
2206			if p.From.Sym.Type == objabi.STLSBSS {
2207				return instructionsForTLSLoad(p)
2208			}
2209
2210			// Note that the values for $off_hi and $off_lo are currently
2211			// zero and will be assigned during relocation.
2212			//
2213			// AUIPC $off_hi, Rd
2214			// L $off_lo, Rd, Rd
2215			insAUIPC := &instruction{as: AAUIPC, rd: ins.rd}
2216			ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), ins.rd, obj.REG_NONE, 0
2217			inss = []*instruction{insAUIPC, ins}
2218
2219		default:
2220			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
2221			return nil
2222		}
2223
2224	case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM:
2225		// Register to memory stores.
2226		switch p.As {
2227		case AMOVBU, AMOVHU, AMOVWU:
2228			p.Ctxt.Diag("%v: unsupported unsigned store", p)
2229			return nil
2230		}
2231		switch p.To.Name {
2232		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
2233			// MOV Rs, c(Rd) -> S $c, Rs, Rd
2234			inss = instructionsForStore(p, movToStore(p.As), addrToReg(p.To))
2235
2236		case obj.NAME_EXTERN, obj.NAME_STATIC:
2237			if p.To.Sym.Type == objabi.STLSBSS {
2238				return instructionsForTLSStore(p)
2239			}
2240
2241			// Note that the values for $off_hi and $off_lo are currently
2242			// zero and will be assigned during relocation.
2243			//
2244			// AUIPC $off_hi, Rtmp
2245			// S $off_lo, Rtmp, Rd
2246			insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
2247			ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
2248			inss = []*instruction{insAUIPC, ins}
2249
2250		default:
2251			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
2252			return nil
2253		}
2254
2255	case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG:
2256		// MOV $sym+off(SP/SB), R
2257		if p.As != AMOV {
2258			p.Ctxt.Diag("%v: unsupported address load", p)
2259			return nil
2260		}
2261		switch p.From.Name {
2262		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
2263			inss = instructionsForOpImmediate(p, AADDI, addrToReg(p.From))
2264
2265		case obj.NAME_EXTERN, obj.NAME_STATIC:
2266			// Note that the values for $off_hi and $off_lo are currently
2267			// zero and will be assigned during relocation.
2268			//
2269			// AUIPC $off_hi, R
2270			// ADDI $off_lo, R
2271			insAUIPC := &instruction{as: AAUIPC, rd: ins.rd}
2272			ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, ins.rd, obj.REG_NONE, 0
2273			inss = []*instruction{insAUIPC, ins}
2274
2275		default:
2276			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
2277			return nil
2278		}
2279
2280	case p.From.Type == obj.TYPE_ADDR && p.To.Type != obj.TYPE_REG:
2281		p.Ctxt.Diag("%v: address load must target register", p)
2282		return nil
2283
2284	default:
2285		p.Ctxt.Diag("%v: unsupported MOV", p)
2286		return nil
2287	}
2288
2289	return inss
2290}
2291
2292// instructionsForRotate returns the machine instructions for a bitwise rotation.
2293func instructionsForRotate(p *obj.Prog, ins *instruction) []*instruction {
2294	if buildcfg.GORISCV64 >= 22 {
2295		// Rotation instructions are supported natively.
2296		return []*instruction{ins}
2297	}
2298
2299	switch ins.as {
2300	case AROL, AROLW, AROR, ARORW:
2301		// ROL -> OR (SLL x y) (SRL x (NEG y))
2302		// ROR -> OR (SRL x y) (SLL x (NEG y))
2303		sllOp, srlOp := ASLL, ASRL
2304		if ins.as == AROLW || ins.as == ARORW {
2305			sllOp, srlOp = ASLLW, ASRLW
2306		}
2307		shift1, shift2 := sllOp, srlOp
2308		if ins.as == AROR || ins.as == ARORW {
2309			shift1, shift2 = shift2, shift1
2310		}
2311		return []*instruction{
2312			&instruction{as: ASUB, rs1: REG_ZERO, rs2: ins.rs2, rd: REG_TMP},
2313			&instruction{as: shift2, rs1: ins.rs1, rs2: REG_TMP, rd: REG_TMP},
2314			&instruction{as: shift1, rs1: ins.rs1, rs2: ins.rs2, rd: ins.rd},
2315			&instruction{as: AOR, rs1: REG_TMP, rs2: ins.rd, rd: ins.rd},
2316		}
2317
2318	case ARORI, ARORIW:
2319		// ROR -> OR (SLLI -x y) (SRLI x y)
2320		sllOp, srlOp := ASLLI, ASRLI
2321		sllImm := int64(int8(-ins.imm) & 63)
2322		if ins.as == ARORIW {
2323			sllOp, srlOp = ASLLIW, ASRLIW
2324			sllImm = int64(int8(-ins.imm) & 31)
2325		}
2326		return []*instruction{
2327			&instruction{as: srlOp, rs1: ins.rs1, rd: REG_TMP, imm: ins.imm},
2328			&instruction{as: sllOp, rs1: ins.rs1, rd: ins.rd, imm: sllImm},
2329			&instruction{as: AOR, rs1: REG_TMP, rs2: ins.rd, rd: ins.rd},
2330		}
2331
2332	default:
2333		p.Ctxt.Diag("%v: unknown rotation", p)
2334		return nil
2335	}
2336}
2337
2338// instructionsForProg returns the machine instructions for an *obj.Prog.
2339func instructionsForProg(p *obj.Prog) []*instruction {
2340	ins := instructionForProg(p)
2341	inss := []*instruction{ins}
2342
2343	if len(p.RestArgs) > 1 {
2344		p.Ctxt.Diag("too many source registers")
2345		return nil
2346	}
2347
2348	switch ins.as {
2349	case AJAL, AJALR:
2350		ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.To.Reg), obj.REG_NONE
2351		ins.imm = p.To.Offset
2352
2353	case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
2354		switch ins.as {
2355		case ABEQZ:
2356			ins.as, ins.rs1, ins.rs2 = ABEQ, REG_ZERO, uint32(p.From.Reg)
2357		case ABGEZ:
2358			ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg)
2359		case ABGT:
2360			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), uint32(p.Reg)
2361		case ABGTU:
2362			ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.From.Reg), uint32(p.Reg)
2363		case ABGTZ:
2364			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO
2365		case ABLE:
2366			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), uint32(p.Reg)
2367		case ABLEU:
2368			ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.From.Reg), uint32(p.Reg)
2369		case ABLEZ:
2370			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO
2371		case ABLTZ:
2372			ins.as, ins.rs1, ins.rs2 = ABLT, REG_ZERO, uint32(p.From.Reg)
2373		case ABNEZ:
2374			ins.as, ins.rs1, ins.rs2 = ABNE, REG_ZERO, uint32(p.From.Reg)
2375		}
2376		ins.imm = p.To.Offset
2377
2378	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
2379		inss = instructionsForMOV(p)
2380
2381	case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD:
2382		inss = instructionsForLoad(p, ins.as, p.From.Reg)
2383
2384	case ASW, ASH, ASB, ASD, AFSW, AFSD:
2385		inss = instructionsForStore(p, ins.as, p.To.Reg)
2386
2387	case ALRW, ALRD:
2388		// Set aq to use acquire access ordering
2389		ins.funct7 = 2
2390		ins.rs1, ins.rs2 = uint32(p.From.Reg), REG_ZERO
2391
2392	case AADDI, AANDI, AORI, AXORI:
2393		inss = instructionsForOpImmediate(p, ins.as, p.Reg)
2394
2395	case ASCW, ASCD:
2396		// Set release access ordering
2397		ins.funct7 = 1
2398		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
2399
2400	case AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD,
2401		AAMOXORW, AAMOXORD, AAMOMINW, AAMOMIND, AAMOMINUW, AAMOMINUD, AAMOMAXW, AAMOMAXD, AAMOMAXUW, AAMOMAXUD:
2402		// Set aqrl to use acquire & release access ordering
2403		ins.funct7 = 3
2404		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
2405
2406	case AECALL, AEBREAK, ARDCYCLE, ARDTIME, ARDINSTRET:
2407		insEnc := encode(p.As)
2408		if p.To.Type == obj.TYPE_NONE {
2409			ins.rd = REG_ZERO
2410		}
2411		ins.rs1 = REG_ZERO
2412		ins.imm = insEnc.csr
2413
2414	case AFENCE:
2415		ins.rd, ins.rs1, ins.rs2 = REG_ZERO, REG_ZERO, obj.REG_NONE
2416		ins.imm = 0x0ff
2417
2418	case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD:
2419		// Set the default rounding mode in funct3 to round to zero.
2420		if p.Scond&rmSuffixBit == 0 {
2421			ins.funct3 = uint32(RM_RTZ)
2422		} else {
2423			ins.funct3 = uint32(p.Scond &^ rmSuffixBit)
2424		}
2425
2426	case AFNES, AFNED:
2427		// Replace FNE[SD] with FEQ[SD] and NOT.
2428		if p.To.Type != obj.TYPE_REG {
2429			p.Ctxt.Diag("%v needs an integer register output", p)
2430			return nil
2431		}
2432		if ins.as == AFNES {
2433			ins.as = AFEQS
2434		} else {
2435			ins.as = AFEQD
2436		}
2437		ins2 := &instruction{
2438			as:  AXORI, // [bit] xor 1 = not [bit]
2439			rd:  ins.rd,
2440			rs1: ins.rd,
2441			imm: 1,
2442		}
2443		inss = append(inss, ins2)
2444
2445	case AFSQRTS, AFSQRTD:
2446		// These instructions expect a zero (i.e. float register 0)
2447		// to be the second input operand.
2448		ins.rs1 = uint32(p.From.Reg)
2449		ins.rs2 = REG_F0
2450
2451	case AFMADDS, AFMSUBS, AFNMADDS, AFNMSUBS,
2452		AFMADDD, AFMSUBD, AFNMADDD, AFNMSUBD:
2453		// Swap the first two operands so that the operands are in the same
2454		// order as they are in the specification: RS1, RS2, RS3, RD.
2455		ins.rs1, ins.rs2 = ins.rs2, ins.rs1
2456
2457	case ANEG, ANEGW:
2458		// NEG rs, rd -> SUB rs, X0, rd
2459		ins.as = ASUB
2460		if p.As == ANEGW {
2461			ins.as = ASUBW
2462		}
2463		ins.rs1 = REG_ZERO
2464		if ins.rd == obj.REG_NONE {
2465			ins.rd = ins.rs2
2466		}
2467
2468	case ANOT:
2469		// NOT rs, rd -> XORI $-1, rs, rd
2470		ins.as = AXORI
2471		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
2472		if ins.rd == obj.REG_NONE {
2473			ins.rd = ins.rs1
2474		}
2475		ins.imm = -1
2476
2477	case ASEQZ:
2478		// SEQZ rs, rd -> SLTIU $1, rs, rd
2479		ins.as = ASLTIU
2480		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
2481		ins.imm = 1
2482
2483	case ASNEZ:
2484		// SNEZ rs, rd -> SLTU rs, x0, rd
2485		ins.as = ASLTU
2486		ins.rs1 = REG_ZERO
2487
2488	case AFABSS:
2489		// FABSS rs, rd -> FSGNJXS rs, rs, rd
2490		ins.as = AFSGNJXS
2491		ins.rs1 = uint32(p.From.Reg)
2492
2493	case AFABSD:
2494		// FABSD rs, rd -> FSGNJXD rs, rs, rd
2495		ins.as = AFSGNJXD
2496		ins.rs1 = uint32(p.From.Reg)
2497
2498	case AFNEGS:
2499		// FNEGS rs, rd -> FSGNJNS rs, rs, rd
2500		ins.as = AFSGNJNS
2501		ins.rs1 = uint32(p.From.Reg)
2502
2503	case AFNEGD:
2504		// FNEGD rs, rd -> FSGNJND rs, rs, rd
2505		ins.as = AFSGNJND
2506		ins.rs1 = uint32(p.From.Reg)
2507
2508	case AROL, AROLW, AROR, ARORW:
2509		inss = instructionsForRotate(p, ins)
2510
2511	case ARORI:
2512		if ins.imm < 0 || ins.imm > 63 {
2513			p.Ctxt.Diag("%v: immediate out of range 0 to 63", p)
2514		}
2515		inss = instructionsForRotate(p, ins)
2516
2517	case ARORIW:
2518		if ins.imm < 0 || ins.imm > 31 {
2519			p.Ctxt.Diag("%v: immediate out of range 0 to 31", p)
2520		}
2521		inss = instructionsForRotate(p, ins)
2522
2523	case ASLLI, ASRLI, ASRAI:
2524		if ins.imm < 0 || ins.imm > 63 {
2525			p.Ctxt.Diag("%v: immediate out of range 0 to 63", p)
2526		}
2527
2528	case ASLLIW, ASRLIW, ASRAIW:
2529		if ins.imm < 0 || ins.imm > 31 {
2530			p.Ctxt.Diag("%v: immediate out of range 0 to 31", p)
2531		}
2532
2533	case ACLZ, ACLZW, ACTZ, ACTZW, ACPOP, ACPOPW, ASEXTB, ASEXTH, AZEXTH:
2534		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
2535
2536	case AORCB, AREV8:
2537		ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), obj.REG_NONE
2538	}
2539
2540	for _, ins := range inss {
2541		ins.p = p
2542	}
2543
2544	return inss
2545}
2546
2547// assemble emits machine code.
2548// It is called at the very end of the assembly process.
2549func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
2550	if ctxt.Retpoline {
2551		ctxt.Diag("-spectre=ret not supported on riscv")
2552		ctxt.Retpoline = false // don't keep printing
2553	}
2554
2555	// If errors were encountered during preprocess/validation, proceeding
2556	// and attempting to encode said instructions will only lead to panics.
2557	if ctxt.Errors > 0 {
2558		return
2559	}
2560
2561	for p := cursym.Func().Text; p != nil; p = p.Link {
2562		switch p.As {
2563		case AJAL:
2564			if p.Mark&NEED_JAL_RELOC == NEED_JAL_RELOC {
2565				rel := obj.Addrel(cursym)
2566				rel.Off = int32(p.Pc)
2567				rel.Siz = 4
2568				rel.Sym = p.To.Sym
2569				rel.Add = p.To.Offset
2570				rel.Type = objabi.R_RISCV_JAL
2571			}
2572		case AJALR:
2573			if p.To.Sym != nil {
2574				ctxt.Diag("%v: unexpected AJALR with to symbol", p)
2575			}
2576
2577		case AAUIPC, AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
2578			var addr *obj.Addr
2579			var rt objabi.RelocType
2580			if p.Mark&NEED_CALL_RELOC == NEED_CALL_RELOC {
2581				rt = objabi.R_RISCV_CALL
2582				addr = &p.From
2583			} else if p.Mark&NEED_PCREL_ITYPE_RELOC == NEED_PCREL_ITYPE_RELOC {
2584				rt = objabi.R_RISCV_PCREL_ITYPE
2585				addr = &p.From
2586			} else if p.Mark&NEED_PCREL_STYPE_RELOC == NEED_PCREL_STYPE_RELOC {
2587				rt = objabi.R_RISCV_PCREL_STYPE
2588				addr = &p.To
2589			} else {
2590				break
2591			}
2592			if p.As == AAUIPC {
2593				if p.Link == nil {
2594					ctxt.Diag("AUIPC needing PC-relative reloc missing following instruction")
2595					break
2596				}
2597				addr = &p.RestArgs[0].Addr
2598			}
2599			if addr.Sym == nil {
2600				ctxt.Diag("PC-relative relocation missing symbol")
2601				break
2602			}
2603			if addr.Sym.Type == objabi.STLSBSS {
2604				if ctxt.Flag_shared {
2605					rt = objabi.R_RISCV_TLS_IE
2606				} else {
2607					rt = objabi.R_RISCV_TLS_LE
2608				}
2609			}
2610
2611			rel := obj.Addrel(cursym)
2612			rel.Off = int32(p.Pc)
2613			rel.Siz = 8
2614			rel.Sym = addr.Sym
2615			rel.Add = addr.Offset
2616			rel.Type = rt
2617
2618		case obj.APCALIGN:
2619			alignedValue := p.From.Offset
2620			v := pcAlignPadLength(p.Pc, alignedValue)
2621			offset := p.Pc
2622			for ; v >= 4; v -= 4 {
2623				// NOP
2624				cursym.WriteBytes(ctxt, offset, []byte{0x13, 0, 0, 0})
2625				offset += 4
2626			}
2627			continue
2628		}
2629
2630		offset := p.Pc
2631		for _, ins := range instructionsForProg(p) {
2632			if ic, err := ins.encode(); err == nil {
2633				cursym.WriteInt(ctxt, offset, ins.length(), int64(ic))
2634				offset += int64(ins.length())
2635			}
2636			if ins.usesRegTmp() {
2637				p.Mark |= USES_REG_TMP
2638			}
2639		}
2640	}
2641
2642	obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil)
2643}
2644
2645func isUnsafePoint(p *obj.Prog) bool {
2646	return p.Mark&USES_REG_TMP == USES_REG_TMP || p.From.Reg == REG_TMP || p.To.Reg == REG_TMP || p.Reg == REG_TMP
2647}
2648
2649func ParseSuffix(prog *obj.Prog, cond string) (err error) {
2650	switch prog.As {
2651	case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD:
2652		prog.Scond, err = rmSuffixEncode(strings.TrimPrefix(cond, "."))
2653	}
2654	return
2655}
2656
2657var LinkRISCV64 = obj.LinkArch{
2658	Arch:           sys.ArchRISCV64,
2659	Init:           buildop,
2660	Preprocess:     preprocess,
2661	Assemble:       assemble,
2662	Progedit:       progedit,
2663	UnaryDst:       unaryDst,
2664	DWARFRegisters: RISCV64DWARFRegisters,
2665}
2666