1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package wasm
6
7import (
8	"bytes"
9	"cmd/internal/obj"
10	"cmd/internal/objabi"
11	"cmd/internal/sys"
12	"encoding/binary"
13	"fmt"
14	"internal/abi"
15	"io"
16	"math"
17)
18
19var Register = map[string]int16{
20	"SP":    REG_SP,
21	"CTXT":  REG_CTXT,
22	"g":     REG_g,
23	"RET0":  REG_RET0,
24	"RET1":  REG_RET1,
25	"RET2":  REG_RET2,
26	"RET3":  REG_RET3,
27	"PAUSE": REG_PAUSE,
28
29	"R0":  REG_R0,
30	"R1":  REG_R1,
31	"R2":  REG_R2,
32	"R3":  REG_R3,
33	"R4":  REG_R4,
34	"R5":  REG_R5,
35	"R6":  REG_R6,
36	"R7":  REG_R7,
37	"R8":  REG_R8,
38	"R9":  REG_R9,
39	"R10": REG_R10,
40	"R11": REG_R11,
41	"R12": REG_R12,
42	"R13": REG_R13,
43	"R14": REG_R14,
44	"R15": REG_R15,
45
46	"F0":  REG_F0,
47	"F1":  REG_F1,
48	"F2":  REG_F2,
49	"F3":  REG_F3,
50	"F4":  REG_F4,
51	"F5":  REG_F5,
52	"F6":  REG_F6,
53	"F7":  REG_F7,
54	"F8":  REG_F8,
55	"F9":  REG_F9,
56	"F10": REG_F10,
57	"F11": REG_F11,
58	"F12": REG_F12,
59	"F13": REG_F13,
60	"F14": REG_F14,
61	"F15": REG_F15,
62
63	"F16": REG_F16,
64	"F17": REG_F17,
65	"F18": REG_F18,
66	"F19": REG_F19,
67	"F20": REG_F20,
68	"F21": REG_F21,
69	"F22": REG_F22,
70	"F23": REG_F23,
71	"F24": REG_F24,
72	"F25": REG_F25,
73	"F26": REG_F26,
74	"F27": REG_F27,
75	"F28": REG_F28,
76	"F29": REG_F29,
77	"F30": REG_F30,
78	"F31": REG_F31,
79
80	"PC_B": REG_PC_B,
81}
82
83var registerNames []string
84
85func init() {
86	obj.RegisterRegister(MINREG, MAXREG, rconv)
87	obj.RegisterOpcode(obj.ABaseWasm, Anames)
88
89	registerNames = make([]string, MAXREG-MINREG)
90	for name, reg := range Register {
91		registerNames[reg-MINREG] = name
92	}
93}
94
95func rconv(r int) string {
96	return registerNames[r-MINREG]
97}
98
99var unaryDst = map[obj.As]bool{
100	ASet:          true,
101	ATee:          true,
102	ACall:         true,
103	ACallIndirect: true,
104	ABr:           true,
105	ABrIf:         true,
106	ABrTable:      true,
107	AI32Store:     true,
108	AI64Store:     true,
109	AF32Store:     true,
110	AF64Store:     true,
111	AI32Store8:    true,
112	AI32Store16:   true,
113	AI64Store8:    true,
114	AI64Store16:   true,
115	AI64Store32:   true,
116	ACALLNORESUME: true,
117}
118
119var Linkwasm = obj.LinkArch{
120	Arch:       sys.ArchWasm,
121	Init:       instinit,
122	Preprocess: preprocess,
123	Assemble:   assemble,
124	UnaryDst:   unaryDst,
125}
126
127var (
128	morestack       *obj.LSym
129	morestackNoCtxt *obj.LSym
130	sigpanic        *obj.LSym
131)
132
133const (
134	/* mark flags */
135	WasmImport = 1 << 0
136)
137
138const (
139	// This is a special wasm module name that when used as the module name
140	// in //go:wasmimport will cause the generated code to pass the stack pointer
141	// directly to the imported function. In other words, any function that
142	// uses the gojs module understands the internal Go WASM ABI directly.
143	GojsModule = "gojs"
144)
145
146func instinit(ctxt *obj.Link) {
147	morestack = ctxt.Lookup("runtime.morestack")
148	morestackNoCtxt = ctxt.Lookup("runtime.morestack_noctxt")
149	sigpanic = ctxt.LookupABI("runtime.sigpanic", obj.ABIInternal)
150}
151
152func preprocess(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
153	appendp := func(p *obj.Prog, as obj.As, args ...obj.Addr) *obj.Prog {
154		if p.As != obj.ANOP {
155			p2 := obj.Appendp(p, newprog)
156			p2.Pc = p.Pc
157			p = p2
158		}
159		p.As = as
160		switch len(args) {
161		case 0:
162			p.From = obj.Addr{}
163			p.To = obj.Addr{}
164		case 1:
165			if unaryDst[as] {
166				p.From = obj.Addr{}
167				p.To = args[0]
168			} else {
169				p.From = args[0]
170				p.To = obj.Addr{}
171			}
172		case 2:
173			p.From = args[0]
174			p.To = args[1]
175		default:
176			panic("bad args")
177		}
178		return p
179	}
180
181	framesize := s.Func().Text.To.Offset
182	if framesize < 0 {
183		panic("bad framesize")
184	}
185	s.Func().Args = s.Func().Text.To.Val.(int32)
186	s.Func().Locals = int32(framesize)
187
188	// If the function exits just to call out to a wasmimport, then
189	// generate the code to translate from our internal Go-stack
190	// based call convention to the native webassembly call convention.
191	if wi := s.Func().WasmImport; wi != nil {
192		s.Func().WasmImportSym = wi.CreateSym(ctxt)
193		p := s.Func().Text
194		if p.Link != nil {
195			panic("wrapper functions for WASM imports should not have a body")
196		}
197		to := obj.Addr{
198			Type: obj.TYPE_MEM,
199			Name: obj.NAME_EXTERN,
200			Sym:  s,
201		}
202
203		// If the module that the import is for is our magic "gojs" module, then this
204		// indicates that the called function understands the Go stack-based call convention
205		// so we just pass the stack pointer to it, knowing it will read the params directly
206		// off the stack and push the results into memory based on the stack pointer.
207		if wi.Module == GojsModule {
208			// The called function has a signature of 'func(sp int)'. It has access to the memory
209			// value somewhere to be able to address the memory based on the "sp" value.
210
211			p = appendp(p, AGet, regAddr(REG_SP))
212			p = appendp(p, ACall, to)
213
214			p.Mark = WasmImport
215		} else {
216			if len(wi.Results) > 1 {
217				// TODO(evanphx) implement support for the multi-value proposal:
218				// https://github.com/WebAssembly/multi-value/blob/master/proposals/multi-value/Overview.md
219				panic("invalid results type") // impossible until multi-value proposal has landed
220			}
221			if len(wi.Results) == 1 {
222				// If we have a result (rather than returning nothing at all), then
223				// we'll write the result to the Go stack relative to the current stack pointer.
224				// We cache the current stack pointer value on the wasm stack here and then use
225				// it after the Call instruction to store the result.
226				p = appendp(p, AGet, regAddr(REG_SP))
227			}
228			for _, f := range wi.Params {
229				// Each load instructions will consume the value of sp on the stack, so
230				// we need to read sp for each param. WASM appears to not have a stack dup instruction
231				// (a strange omission for a stack-based VM), if it did, we'd be using the dup here.
232				p = appendp(p, AGet, regAddr(REG_SP))
233
234				// Offset is the location of the param on the Go stack (ie relative to sp).
235				// Because of our call convention, the parameters are located an additional 8 bytes
236				// from sp because we store the return address as an int64 at the bottom of the stack.
237				// Ie the stack looks like [return_addr, param3, param2, param1, etc]
238
239				// Ergo, we add 8 to the true byte offset of the param to skip the return address.
240				loadOffset := f.Offset + 8
241
242				// We're reading the value from the Go stack onto the WASM stack and leaving it there
243				// for CALL to pick them up.
244				switch f.Type {
245				case obj.WasmI32:
246					p = appendp(p, AI32Load, constAddr(loadOffset))
247				case obj.WasmI64:
248					p = appendp(p, AI64Load, constAddr(loadOffset))
249				case obj.WasmF32:
250					p = appendp(p, AF32Load, constAddr(loadOffset))
251				case obj.WasmF64:
252					p = appendp(p, AF64Load, constAddr(loadOffset))
253				case obj.WasmPtr:
254					p = appendp(p, AI64Load, constAddr(loadOffset))
255					p = appendp(p, AI32WrapI64)
256				default:
257					panic("bad param type")
258				}
259			}
260
261			// The call instruction is marked as being for a wasm import so that a later phase
262			// will generate relocation information that allows us to patch this with then
263			// offset of the imported function in the wasm imports.
264			p = appendp(p, ACall, to)
265			p.Mark = WasmImport
266
267			if len(wi.Results) == 1 {
268				f := wi.Results[0]
269
270				// Much like with the params, we need to adjust the offset we store the result value
271				// to by 8 bytes to account for the return address on the Go stack.
272				storeOffset := f.Offset + 8
273
274				// This code is paired the code above that reads the stack pointer onto the wasm
275				// stack. We've done this so we have a consistent view of the sp value as it might
276				// be manipulated by the call and we want to ignore that manipulation here.
277				switch f.Type {
278				case obj.WasmI32:
279					p = appendp(p, AI32Store, constAddr(storeOffset))
280				case obj.WasmI64:
281					p = appendp(p, AI64Store, constAddr(storeOffset))
282				case obj.WasmF32:
283					p = appendp(p, AF32Store, constAddr(storeOffset))
284				case obj.WasmF64:
285					p = appendp(p, AF64Store, constAddr(storeOffset))
286				case obj.WasmPtr:
287					p = appendp(p, AI64ExtendI32U)
288					p = appendp(p, AI64Store, constAddr(storeOffset))
289				default:
290					panic("bad result type")
291				}
292			}
293		}
294
295		p = appendp(p, obj.ARET)
296
297		// It should be 0 already, but we'll set it to 0 anyway just to be sure
298		// that the code below which adds frame expansion code to the function body
299		// isn't run. We don't want the frame expansion code because our function
300		// body is just the code to translate and call the imported function.
301		framesize = 0
302	} else if s.Func().Text.From.Sym.Wrapper() {
303		// if g._panic != nil && g._panic.argp == FP {
304		//   g._panic.argp = bottom-of-frame
305		// }
306		//
307		// MOVD g_panic(g), R0
308		// Get R0
309		// I64Eqz
310		// Not
311		// If
312		//   Get SP
313		//   I64ExtendI32U
314		//   I64Const $framesize+8
315		//   I64Add
316		//   I64Load panic_argp(R0)
317		//   I64Eq
318		//   If
319		//     MOVD SP, panic_argp(R0)
320		//   End
321		// End
322
323		gpanic := obj.Addr{
324			Type:   obj.TYPE_MEM,
325			Reg:    REGG,
326			Offset: 4 * 8, // g_panic
327		}
328
329		panicargp := obj.Addr{
330			Type:   obj.TYPE_MEM,
331			Reg:    REG_R0,
332			Offset: 0, // panic.argp
333		}
334
335		p := s.Func().Text
336		p = appendp(p, AMOVD, gpanic, regAddr(REG_R0))
337
338		p = appendp(p, AGet, regAddr(REG_R0))
339		p = appendp(p, AI64Eqz)
340		p = appendp(p, ANot)
341		p = appendp(p, AIf)
342
343		p = appendp(p, AGet, regAddr(REG_SP))
344		p = appendp(p, AI64ExtendI32U)
345		p = appendp(p, AI64Const, constAddr(framesize+8))
346		p = appendp(p, AI64Add)
347		p = appendp(p, AI64Load, panicargp)
348
349		p = appendp(p, AI64Eq)
350		p = appendp(p, AIf)
351		p = appendp(p, AMOVD, regAddr(REG_SP), panicargp)
352		p = appendp(p, AEnd)
353
354		p = appendp(p, AEnd)
355	}
356
357	if framesize > 0 {
358		p := s.Func().Text
359		p = appendp(p, AGet, regAddr(REG_SP))
360		p = appendp(p, AI32Const, constAddr(framesize))
361		p = appendp(p, AI32Sub)
362		p = appendp(p, ASet, regAddr(REG_SP))
363		p.Spadj = int32(framesize)
364	}
365
366	// If the framesize is 0, then imply nosplit because it's a specially
367	// generated function.
368	needMoreStack := framesize > 0 && !s.Func().Text.From.Sym.NoSplit()
369
370	// If the maymorestack debug option is enabled, insert the
371	// call to maymorestack *before* processing resume points so
372	// we can construct a resume point after maymorestack for
373	// morestack to resume at.
374	var pMorestack = s.Func().Text
375	if needMoreStack && ctxt.Flag_maymorestack != "" {
376		p := pMorestack
377
378		// Save REGCTXT on the stack.
379		const tempFrame = 8
380		p = appendp(p, AGet, regAddr(REG_SP))
381		p = appendp(p, AI32Const, constAddr(tempFrame))
382		p = appendp(p, AI32Sub)
383		p = appendp(p, ASet, regAddr(REG_SP))
384		p.Spadj = tempFrame
385		ctxtp := obj.Addr{
386			Type:   obj.TYPE_MEM,
387			Reg:    REG_SP,
388			Offset: 0,
389		}
390		p = appendp(p, AMOVD, regAddr(REGCTXT), ctxtp)
391
392		// maymorestack must not itself preempt because we
393		// don't have full stack information, so this can be
394		// ACALLNORESUME.
395		p = appendp(p, ACALLNORESUME, constAddr(0))
396		// See ../x86/obj6.go
397		sym := ctxt.LookupABI(ctxt.Flag_maymorestack, s.ABI())
398		p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: sym}
399
400		// Restore REGCTXT.
401		p = appendp(p, AMOVD, ctxtp, regAddr(REGCTXT))
402		p = appendp(p, AGet, regAddr(REG_SP))
403		p = appendp(p, AI32Const, constAddr(tempFrame))
404		p = appendp(p, AI32Add)
405		p = appendp(p, ASet, regAddr(REG_SP))
406		p.Spadj = -tempFrame
407
408		// Add an explicit ARESUMEPOINT after maymorestack for
409		// morestack to resume at.
410		pMorestack = appendp(p, ARESUMEPOINT)
411	}
412
413	// Introduce resume points for CALL instructions
414	// and collect other explicit resume points.
415	numResumePoints := 0
416	explicitBlockDepth := 0
417	pc := int64(0) // pc is only incremented when necessary, this avoids bloat of the BrTable instruction
418	var tableIdxs []uint64
419	tablePC := int64(0)
420	base := ctxt.PosTable.Pos(s.Func().Text.Pos).Base()
421	for p := s.Func().Text; p != nil; p = p.Link {
422		prevBase := base
423		base = ctxt.PosTable.Pos(p.Pos).Base()
424		switch p.As {
425		case ABlock, ALoop, AIf:
426			explicitBlockDepth++
427
428		case AEnd:
429			if explicitBlockDepth == 0 {
430				panic("End without block")
431			}
432			explicitBlockDepth--
433
434		case ARESUMEPOINT:
435			if explicitBlockDepth != 0 {
436				panic("RESUME can only be used on toplevel")
437			}
438			p.As = AEnd
439			for tablePC <= pc {
440				tableIdxs = append(tableIdxs, uint64(numResumePoints))
441				tablePC++
442			}
443			numResumePoints++
444			pc++
445
446		case obj.ACALL:
447			if explicitBlockDepth != 0 {
448				panic("CALL can only be used on toplevel, try CALLNORESUME instead")
449			}
450			appendp(p, ARESUMEPOINT)
451		}
452
453		p.Pc = pc
454
455		// Increase pc whenever some pc-value table needs a new entry. Don't increase it
456		// more often to avoid bloat of the BrTable instruction.
457		// The "base != prevBase" condition detects inlined instructions. They are an
458		// implicit call, so entering and leaving this section affects the stack trace.
459		if p.As == ACALLNORESUME || p.As == obj.ANOP || p.As == ANop || p.Spadj != 0 || base != prevBase {
460			pc++
461			if p.To.Sym == sigpanic {
462				// The panic stack trace expects the PC at the call of sigpanic,
463				// not the next one. However, runtime.Caller subtracts 1 from the
464				// PC. To make both PC and PC-1 work (have the same line number),
465				// we advance the PC by 2 at sigpanic.
466				pc++
467			}
468		}
469	}
470	tableIdxs = append(tableIdxs, uint64(numResumePoints))
471	s.Size = pc + 1
472
473	if needMoreStack {
474		p := pMorestack
475
476		if framesize <= abi.StackSmall {
477			// small stack: SP <= stackguard
478			// Get SP
479			// Get g
480			// I32WrapI64
481			// I32Load $stackguard0
482			// I32GtU
483
484			p = appendp(p, AGet, regAddr(REG_SP))
485			p = appendp(p, AGet, regAddr(REGG))
486			p = appendp(p, AI32WrapI64)
487			p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0
488			p = appendp(p, AI32LeU)
489		} else {
490			// large stack: SP-framesize <= stackguard-StackSmall
491			//              SP <= stackguard+(framesize-StackSmall)
492			// Get SP
493			// Get g
494			// I32WrapI64
495			// I32Load $stackguard0
496			// I32Const $(framesize-StackSmall)
497			// I32Add
498			// I32GtU
499
500			p = appendp(p, AGet, regAddr(REG_SP))
501			p = appendp(p, AGet, regAddr(REGG))
502			p = appendp(p, AI32WrapI64)
503			p = appendp(p, AI32Load, constAddr(2*int64(ctxt.Arch.PtrSize))) // G.stackguard0
504			p = appendp(p, AI32Const, constAddr(framesize-abi.StackSmall))
505			p = appendp(p, AI32Add)
506			p = appendp(p, AI32LeU)
507		}
508		// TODO(neelance): handle wraparound case
509
510		p = appendp(p, AIf)
511		// This CALL does *not* have a resume point after it
512		// (we already inserted all of the resume points). As
513		// a result, morestack will resume at the *previous*
514		// resume point (typically, the beginning of the
515		// function) and perform the morestack check again.
516		// This is why we don't need an explicit loop like
517		// other architectures.
518		p = appendp(p, obj.ACALL, constAddr(0))
519		if s.Func().Text.From.Sym.NeedCtxt() {
520			p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestack}
521		} else {
522			p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: morestackNoCtxt}
523		}
524		p = appendp(p, AEnd)
525	}
526
527	// record the branches targeting the entry loop and the unwind exit,
528	// their targets with be filled in later
529	var entryPointLoopBranches []*obj.Prog
530	var unwindExitBranches []*obj.Prog
531	currentDepth := 0
532	for p := s.Func().Text; p != nil; p = p.Link {
533		switch p.As {
534		case ABlock, ALoop, AIf:
535			currentDepth++
536		case AEnd:
537			currentDepth--
538		}
539
540		switch p.As {
541		case obj.AJMP:
542			jmp := *p
543			p.As = obj.ANOP
544
545			if jmp.To.Type == obj.TYPE_BRANCH {
546				// jump to basic block
547				p = appendp(p, AI32Const, constAddr(jmp.To.Val.(*obj.Prog).Pc))
548				p = appendp(p, ASet, regAddr(REG_PC_B)) // write next basic block to PC_B
549				p = appendp(p, ABr)                     // jump to beginning of entryPointLoop
550				entryPointLoopBranches = append(entryPointLoopBranches, p)
551				break
552			}
553
554			// low-level WebAssembly call to function
555			switch jmp.To.Type {
556			case obj.TYPE_MEM:
557				if !notUsePC_B[jmp.To.Sym.Name] {
558					// Set PC_B parameter to function entry.
559					p = appendp(p, AI32Const, constAddr(0))
560				}
561				p = appendp(p, ACall, jmp.To)
562
563			case obj.TYPE_NONE:
564				// (target PC is on stack)
565				p = appendp(p, AI32WrapI64)
566				p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
567				p = appendp(p, AI32ShrU)
568
569				// Set PC_B parameter to function entry.
570				// We need to push this before pushing the target PC_F,
571				// so temporarily pop PC_F, using our REG_PC_B as a
572				// scratch register, and push it back after pushing 0.
573				p = appendp(p, ASet, regAddr(REG_PC_B))
574				p = appendp(p, AI32Const, constAddr(0))
575				p = appendp(p, AGet, regAddr(REG_PC_B))
576
577				p = appendp(p, ACallIndirect)
578
579			default:
580				panic("bad target for JMP")
581			}
582
583			p = appendp(p, AReturn)
584
585		case obj.ACALL, ACALLNORESUME:
586			call := *p
587			p.As = obj.ANOP
588
589			pcAfterCall := call.Link.Pc
590			if call.To.Sym == sigpanic {
591				pcAfterCall-- // sigpanic expects to be called without advancing the pc
592			}
593
594			// SP -= 8
595			p = appendp(p, AGet, regAddr(REG_SP))
596			p = appendp(p, AI32Const, constAddr(8))
597			p = appendp(p, AI32Sub)
598			p = appendp(p, ASet, regAddr(REG_SP))
599
600			// write return address to Go stack
601			p = appendp(p, AGet, regAddr(REG_SP))
602			p = appendp(p, AI64Const, obj.Addr{
603				Type:   obj.TYPE_ADDR,
604				Name:   obj.NAME_EXTERN,
605				Sym:    s,           // PC_F
606				Offset: pcAfterCall, // PC_B
607			})
608			p = appendp(p, AI64Store, constAddr(0))
609
610			// low-level WebAssembly call to function
611			switch call.To.Type {
612			case obj.TYPE_MEM:
613				if !notUsePC_B[call.To.Sym.Name] {
614					// Set PC_B parameter to function entry.
615					p = appendp(p, AI32Const, constAddr(0))
616				}
617				p = appendp(p, ACall, call.To)
618
619			case obj.TYPE_NONE:
620				// (target PC is on stack)
621				p = appendp(p, AI32WrapI64)
622				p = appendp(p, AI32Const, constAddr(16)) // only needs PC_F bits (16-31), PC_B bits (0-15) are zero
623				p = appendp(p, AI32ShrU)
624
625				// Set PC_B parameter to function entry.
626				// We need to push this before pushing the target PC_F,
627				// so temporarily pop PC_F, using our PC_B as a
628				// scratch register, and push it back after pushing 0.
629				p = appendp(p, ASet, regAddr(REG_PC_B))
630				p = appendp(p, AI32Const, constAddr(0))
631				p = appendp(p, AGet, regAddr(REG_PC_B))
632
633				p = appendp(p, ACallIndirect)
634
635			default:
636				panic("bad target for CALL")
637			}
638
639			// return value of call is on the top of the stack, indicating whether to unwind the WebAssembly stack
640			if call.As == ACALLNORESUME && call.To.Sym != sigpanic { // sigpanic unwinds the stack, but it never resumes
641				// trying to unwind WebAssembly stack but call has no resume point, terminate with error
642				p = appendp(p, AIf)
643				p = appendp(p, obj.AUNDEF)
644				p = appendp(p, AEnd)
645			} else {
646				// unwinding WebAssembly stack to switch goroutine, return 1
647				p = appendp(p, ABrIf)
648				unwindExitBranches = append(unwindExitBranches, p)
649			}
650
651		case obj.ARET, ARETUNWIND:
652			ret := *p
653			p.As = obj.ANOP
654
655			if framesize > 0 {
656				// SP += framesize
657				p = appendp(p, AGet, regAddr(REG_SP))
658				p = appendp(p, AI32Const, constAddr(framesize))
659				p = appendp(p, AI32Add)
660				p = appendp(p, ASet, regAddr(REG_SP))
661				// TODO(neelance): This should theoretically set Spadj, but it only works without.
662				// p.Spadj = int32(-framesize)
663			}
664
665			if ret.To.Type == obj.TYPE_MEM {
666				// Set PC_B parameter to function entry.
667				p = appendp(p, AI32Const, constAddr(0))
668
669				// low-level WebAssembly call to function
670				p = appendp(p, ACall, ret.To)
671				p = appendp(p, AReturn)
672				break
673			}
674
675			// SP += 8
676			p = appendp(p, AGet, regAddr(REG_SP))
677			p = appendp(p, AI32Const, constAddr(8))
678			p = appendp(p, AI32Add)
679			p = appendp(p, ASet, regAddr(REG_SP))
680
681			if ret.As == ARETUNWIND {
682				// function needs to unwind the WebAssembly stack, return 1
683				p = appendp(p, AI32Const, constAddr(1))
684				p = appendp(p, AReturn)
685				break
686			}
687
688			// not unwinding the WebAssembly stack, return 0
689			p = appendp(p, AI32Const, constAddr(0))
690			p = appendp(p, AReturn)
691		}
692	}
693
694	for p := s.Func().Text; p != nil; p = p.Link {
695		switch p.From.Name {
696		case obj.NAME_AUTO:
697			p.From.Offset += framesize
698		case obj.NAME_PARAM:
699			p.From.Reg = REG_SP
700			p.From.Offset += framesize + 8 // parameters are after the frame and the 8-byte return address
701		}
702
703		switch p.To.Name {
704		case obj.NAME_AUTO:
705			p.To.Offset += framesize
706		case obj.NAME_PARAM:
707			p.To.Reg = REG_SP
708			p.To.Offset += framesize + 8 // parameters are after the frame and the 8-byte return address
709		}
710
711		switch p.As {
712		case AGet:
713			if p.From.Type == obj.TYPE_ADDR {
714				get := *p
715				p.As = obj.ANOP
716
717				switch get.From.Name {
718				case obj.NAME_EXTERN:
719					p = appendp(p, AI64Const, get.From)
720				case obj.NAME_AUTO, obj.NAME_PARAM:
721					p = appendp(p, AGet, regAddr(get.From.Reg))
722					if get.From.Reg == REG_SP {
723						p = appendp(p, AI64ExtendI32U)
724					}
725					if get.From.Offset != 0 {
726						p = appendp(p, AI64Const, constAddr(get.From.Offset))
727						p = appendp(p, AI64Add)
728					}
729				default:
730					panic("bad Get: invalid name")
731				}
732			}
733
734		case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U:
735			if p.From.Type == obj.TYPE_MEM {
736				as := p.As
737				from := p.From
738
739				p.As = AGet
740				p.From = regAddr(from.Reg)
741
742				if from.Reg != REG_SP {
743					p = appendp(p, AI32WrapI64)
744				}
745
746				p = appendp(p, as, constAddr(from.Offset))
747			}
748
749		case AMOVB, AMOVH, AMOVW, AMOVD:
750			mov := *p
751			p.As = obj.ANOP
752
753			var loadAs obj.As
754			var storeAs obj.As
755			switch mov.As {
756			case AMOVB:
757				loadAs = AI64Load8U
758				storeAs = AI64Store8
759			case AMOVH:
760				loadAs = AI64Load16U
761				storeAs = AI64Store16
762			case AMOVW:
763				loadAs = AI64Load32U
764				storeAs = AI64Store32
765			case AMOVD:
766				loadAs = AI64Load
767				storeAs = AI64Store
768			}
769
770			appendValue := func() {
771				switch mov.From.Type {
772				case obj.TYPE_CONST:
773					p = appendp(p, AI64Const, constAddr(mov.From.Offset))
774
775				case obj.TYPE_ADDR:
776					switch mov.From.Name {
777					case obj.NAME_NONE, obj.NAME_PARAM, obj.NAME_AUTO:
778						p = appendp(p, AGet, regAddr(mov.From.Reg))
779						if mov.From.Reg == REG_SP {
780							p = appendp(p, AI64ExtendI32U)
781						}
782						p = appendp(p, AI64Const, constAddr(mov.From.Offset))
783						p = appendp(p, AI64Add)
784					case obj.NAME_EXTERN:
785						p = appendp(p, AI64Const, mov.From)
786					default:
787						panic("bad name for MOV")
788					}
789
790				case obj.TYPE_REG:
791					p = appendp(p, AGet, mov.From)
792					if mov.From.Reg == REG_SP {
793						p = appendp(p, AI64ExtendI32U)
794					}
795
796				case obj.TYPE_MEM:
797					p = appendp(p, AGet, regAddr(mov.From.Reg))
798					if mov.From.Reg != REG_SP {
799						p = appendp(p, AI32WrapI64)
800					}
801					p = appendp(p, loadAs, constAddr(mov.From.Offset))
802
803				default:
804					panic("bad MOV type")
805				}
806			}
807
808			switch mov.To.Type {
809			case obj.TYPE_REG:
810				appendValue()
811				if mov.To.Reg == REG_SP {
812					p = appendp(p, AI32WrapI64)
813				}
814				p = appendp(p, ASet, mov.To)
815
816			case obj.TYPE_MEM:
817				switch mov.To.Name {
818				case obj.NAME_NONE, obj.NAME_PARAM:
819					p = appendp(p, AGet, regAddr(mov.To.Reg))
820					if mov.To.Reg != REG_SP {
821						p = appendp(p, AI32WrapI64)
822					}
823				case obj.NAME_EXTERN:
824					p = appendp(p, AI32Const, obj.Addr{Type: obj.TYPE_ADDR, Name: obj.NAME_EXTERN, Sym: mov.To.Sym})
825				default:
826					panic("bad MOV name")
827				}
828				appendValue()
829				p = appendp(p, storeAs, constAddr(mov.To.Offset))
830
831			default:
832				panic("bad MOV type")
833			}
834		}
835	}
836
837	{
838		p := s.Func().Text
839		if len(unwindExitBranches) > 0 {
840			p = appendp(p, ABlock) // unwindExit, used to return 1 when unwinding the stack
841			for _, b := range unwindExitBranches {
842				b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p}
843			}
844		}
845		if len(entryPointLoopBranches) > 0 {
846			p = appendp(p, ALoop) // entryPointLoop, used to jump between basic blocks
847			for _, b := range entryPointLoopBranches {
848				b.To = obj.Addr{Type: obj.TYPE_BRANCH, Val: p}
849			}
850		}
851		if numResumePoints > 0 {
852			// Add Block instructions for resume points and BrTable to jump to selected resume point.
853			for i := 0; i < numResumePoints+1; i++ {
854				p = appendp(p, ABlock)
855			}
856			p = appendp(p, AGet, regAddr(REG_PC_B)) // read next basic block from PC_B
857			p = appendp(p, ABrTable, obj.Addr{Val: tableIdxs})
858			p = appendp(p, AEnd) // end of Block
859		}
860		for p.Link != nil {
861			p = p.Link // function instructions
862		}
863		if len(entryPointLoopBranches) > 0 {
864			p = appendp(p, AEnd) // end of entryPointLoop
865		}
866		p = appendp(p, obj.AUNDEF)
867		if len(unwindExitBranches) > 0 {
868			p = appendp(p, AEnd) // end of unwindExit
869			p = appendp(p, AI32Const, constAddr(1))
870		}
871	}
872
873	currentDepth = 0
874	blockDepths := make(map[*obj.Prog]int)
875	for p := s.Func().Text; p != nil; p = p.Link {
876		switch p.As {
877		case ABlock, ALoop, AIf:
878			currentDepth++
879			blockDepths[p] = currentDepth
880		case AEnd:
881			currentDepth--
882		}
883
884		switch p.As {
885		case ABr, ABrIf:
886			if p.To.Type == obj.TYPE_BRANCH {
887				blockDepth, ok := blockDepths[p.To.Val.(*obj.Prog)]
888				if !ok {
889					panic("label not at block")
890				}
891				p.To = constAddr(int64(currentDepth - blockDepth))
892			}
893		}
894	}
895}
896
897func constAddr(value int64) obj.Addr {
898	return obj.Addr{Type: obj.TYPE_CONST, Offset: value}
899}
900
901func regAddr(reg int16) obj.Addr {
902	return obj.Addr{Type: obj.TYPE_REG, Reg: reg}
903}
904
905// Most of the Go functions has a single parameter (PC_B) in
906// Wasm ABI. This is a list of exceptions.
907var notUsePC_B = map[string]bool{
908	"_rt0_wasm_js":            true,
909	"_rt0_wasm_wasip1":        true,
910	"wasm_export_run":         true,
911	"wasm_export_resume":      true,
912	"wasm_export_getsp":       true,
913	"wasm_pc_f_loop":          true,
914	"gcWriteBarrier":          true,
915	"runtime.gcWriteBarrier1": true,
916	"runtime.gcWriteBarrier2": true,
917	"runtime.gcWriteBarrier3": true,
918	"runtime.gcWriteBarrier4": true,
919	"runtime.gcWriteBarrier5": true,
920	"runtime.gcWriteBarrier6": true,
921	"runtime.gcWriteBarrier7": true,
922	"runtime.gcWriteBarrier8": true,
923	"runtime.wasmDiv":         true,
924	"runtime.wasmTruncS":      true,
925	"runtime.wasmTruncU":      true,
926	"cmpbody":                 true,
927	"memeqbody":               true,
928	"memcmp":                  true,
929	"memchr":                  true,
930}
931
932func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
933	type regVar struct {
934		global bool
935		index  uint64
936	}
937
938	type varDecl struct {
939		count uint64
940		typ   valueType
941	}
942
943	hasLocalSP := false
944	regVars := [MAXREG - MINREG]*regVar{
945		REG_SP - MINREG:    {true, 0},
946		REG_CTXT - MINREG:  {true, 1},
947		REG_g - MINREG:     {true, 2},
948		REG_RET0 - MINREG:  {true, 3},
949		REG_RET1 - MINREG:  {true, 4},
950		REG_RET2 - MINREG:  {true, 5},
951		REG_RET3 - MINREG:  {true, 6},
952		REG_PAUSE - MINREG: {true, 7},
953	}
954	var varDecls []*varDecl
955	useAssemblyRegMap := func() {
956		for i := int16(0); i < 16; i++ {
957			regVars[REG_R0+i-MINREG] = &regVar{false, uint64(i)}
958		}
959	}
960
961	// Function starts with declaration of locals: numbers and types.
962	// Some functions use a special calling convention.
963	switch s.Name {
964	case "_rt0_wasm_js", "_rt0_wasm_wasip1", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp",
965		"wasm_pc_f_loop", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
966		varDecls = []*varDecl{}
967		useAssemblyRegMap()
968	case "memchr", "memcmp":
969		varDecls = []*varDecl{{count: 2, typ: i32}}
970		useAssemblyRegMap()
971	case "cmpbody":
972		varDecls = []*varDecl{{count: 2, typ: i64}}
973		useAssemblyRegMap()
974	case "gcWriteBarrier":
975		varDecls = []*varDecl{{count: 5, typ: i64}}
976		useAssemblyRegMap()
977	case "runtime.gcWriteBarrier1",
978		"runtime.gcWriteBarrier2",
979		"runtime.gcWriteBarrier3",
980		"runtime.gcWriteBarrier4",
981		"runtime.gcWriteBarrier5",
982		"runtime.gcWriteBarrier6",
983		"runtime.gcWriteBarrier7",
984		"runtime.gcWriteBarrier8":
985		// no locals
986		useAssemblyRegMap()
987	default:
988		// Normal calling convention: PC_B as WebAssembly parameter. First local variable is local SP cache.
989		regVars[REG_PC_B-MINREG] = &regVar{false, 0}
990		hasLocalSP = true
991
992		var regUsed [MAXREG - MINREG]bool
993		for p := s.Func().Text; p != nil; p = p.Link {
994			if p.From.Reg != 0 {
995				regUsed[p.From.Reg-MINREG] = true
996			}
997			if p.To.Reg != 0 {
998				regUsed[p.To.Reg-MINREG] = true
999			}
1000		}
1001
1002		regs := []int16{REG_SP}
1003		for reg := int16(REG_R0); reg <= REG_F31; reg++ {
1004			if regUsed[reg-MINREG] {
1005				regs = append(regs, reg)
1006			}
1007		}
1008
1009		var lastDecl *varDecl
1010		for i, reg := range regs {
1011			t := regType(reg)
1012			if lastDecl == nil || lastDecl.typ != t {
1013				lastDecl = &varDecl{
1014					count: 0,
1015					typ:   t,
1016				}
1017				varDecls = append(varDecls, lastDecl)
1018			}
1019			lastDecl.count++
1020			if reg != REG_SP {
1021				regVars[reg-MINREG] = &regVar{false, 1 + uint64(i)}
1022			}
1023		}
1024	}
1025
1026	w := new(bytes.Buffer)
1027
1028	writeUleb128(w, uint64(len(varDecls)))
1029	for _, decl := range varDecls {
1030		writeUleb128(w, decl.count)
1031		w.WriteByte(byte(decl.typ))
1032	}
1033
1034	if hasLocalSP {
1035		// Copy SP from its global variable into a local variable. Accessing a local variable is more efficient.
1036		updateLocalSP(w)
1037	}
1038
1039	for p := s.Func().Text; p != nil; p = p.Link {
1040		switch p.As {
1041		case AGet:
1042			if p.From.Type != obj.TYPE_REG {
1043				panic("bad Get: argument is not a register")
1044			}
1045			reg := p.From.Reg
1046			v := regVars[reg-MINREG]
1047			if v == nil {
1048				panic("bad Get: invalid register")
1049			}
1050			if reg == REG_SP && hasLocalSP {
1051				writeOpcode(w, ALocalGet)
1052				writeUleb128(w, 1) // local SP
1053				continue
1054			}
1055			if v.global {
1056				writeOpcode(w, AGlobalGet)
1057			} else {
1058				writeOpcode(w, ALocalGet)
1059			}
1060			writeUleb128(w, v.index)
1061			continue
1062
1063		case ASet:
1064			if p.To.Type != obj.TYPE_REG {
1065				panic("bad Set: argument is not a register")
1066			}
1067			reg := p.To.Reg
1068			v := regVars[reg-MINREG]
1069			if v == nil {
1070				panic("bad Set: invalid register")
1071			}
1072			if reg == REG_SP && hasLocalSP {
1073				writeOpcode(w, ALocalTee)
1074				writeUleb128(w, 1) // local SP
1075			}
1076			if v.global {
1077				writeOpcode(w, AGlobalSet)
1078			} else {
1079				if p.Link.As == AGet && p.Link.From.Reg == reg {
1080					writeOpcode(w, ALocalTee)
1081					p = p.Link
1082				} else {
1083					writeOpcode(w, ALocalSet)
1084				}
1085			}
1086			writeUleb128(w, v.index)
1087			continue
1088
1089		case ATee:
1090			if p.To.Type != obj.TYPE_REG {
1091				panic("bad Tee: argument is not a register")
1092			}
1093			reg := p.To.Reg
1094			v := regVars[reg-MINREG]
1095			if v == nil {
1096				panic("bad Tee: invalid register")
1097			}
1098			writeOpcode(w, ALocalTee)
1099			writeUleb128(w, v.index)
1100			continue
1101
1102		case ANot:
1103			writeOpcode(w, AI32Eqz)
1104			continue
1105
1106		case obj.AUNDEF:
1107			writeOpcode(w, AUnreachable)
1108			continue
1109
1110		case obj.ANOP, obj.ATEXT, obj.AFUNCDATA, obj.APCDATA:
1111			// ignore
1112			continue
1113		}
1114
1115		writeOpcode(w, p.As)
1116
1117		switch p.As {
1118		case ABlock, ALoop, AIf:
1119			if p.From.Offset != 0 {
1120				// block type, rarely used, e.g. for code compiled with emscripten
1121				w.WriteByte(0x80 - byte(p.From.Offset))
1122				continue
1123			}
1124			w.WriteByte(0x40)
1125
1126		case ABr, ABrIf:
1127			if p.To.Type != obj.TYPE_CONST {
1128				panic("bad Br/BrIf")
1129			}
1130			writeUleb128(w, uint64(p.To.Offset))
1131
1132		case ABrTable:
1133			idxs := p.To.Val.([]uint64)
1134			writeUleb128(w, uint64(len(idxs)-1))
1135			for _, idx := range idxs {
1136				writeUleb128(w, idx)
1137			}
1138
1139		case ACall:
1140			switch p.To.Type {
1141			case obj.TYPE_CONST:
1142				writeUleb128(w, uint64(p.To.Offset))
1143
1144			case obj.TYPE_MEM:
1145				if p.To.Name != obj.NAME_EXTERN && p.To.Name != obj.NAME_STATIC {
1146					fmt.Println(p.To)
1147					panic("bad name for Call")
1148				}
1149				r := obj.Addrel(s)
1150				r.Siz = 1 // actually variable sized
1151				r.Off = int32(w.Len())
1152				r.Type = objabi.R_CALL
1153				if p.Mark&WasmImport != 0 {
1154					r.Type = objabi.R_WASMIMPORT
1155				}
1156				r.Sym = p.To.Sym
1157				if hasLocalSP {
1158					// The stack may have moved, which changes SP. Update the local SP variable.
1159					updateLocalSP(w)
1160				}
1161
1162			default:
1163				panic("bad type for Call")
1164			}
1165
1166		case ACallIndirect:
1167			writeUleb128(w, uint64(p.To.Offset))
1168			w.WriteByte(0x00) // reserved value
1169			if hasLocalSP {
1170				// The stack may have moved, which changes SP. Update the local SP variable.
1171				updateLocalSP(w)
1172			}
1173
1174		case AI32Const, AI64Const:
1175			if p.From.Name == obj.NAME_EXTERN {
1176				r := obj.Addrel(s)
1177				r.Siz = 1 // actually variable sized
1178				r.Off = int32(w.Len())
1179				r.Type = objabi.R_ADDR
1180				r.Sym = p.From.Sym
1181				r.Add = p.From.Offset
1182				break
1183			}
1184			writeSleb128(w, p.From.Offset)
1185
1186		case AF32Const:
1187			b := make([]byte, 4)
1188			binary.LittleEndian.PutUint32(b, math.Float32bits(float32(p.From.Val.(float64))))
1189			w.Write(b)
1190
1191		case AF64Const:
1192			b := make([]byte, 8)
1193			binary.LittleEndian.PutUint64(b, math.Float64bits(p.From.Val.(float64)))
1194			w.Write(b)
1195
1196		case AI32Load, AI64Load, AF32Load, AF64Load, AI32Load8S, AI32Load8U, AI32Load16S, AI32Load16U, AI64Load8S, AI64Load8U, AI64Load16S, AI64Load16U, AI64Load32S, AI64Load32U:
1197			if p.From.Offset < 0 {
1198				panic("negative offset for *Load")
1199			}
1200			if p.From.Type != obj.TYPE_CONST {
1201				panic("bad type for *Load")
1202			}
1203			if p.From.Offset > math.MaxUint32 {
1204				ctxt.Diag("bad offset in %v", p)
1205			}
1206			writeUleb128(w, align(p.As))
1207			writeUleb128(w, uint64(p.From.Offset))
1208
1209		case AI32Store, AI64Store, AF32Store, AF64Store, AI32Store8, AI32Store16, AI64Store8, AI64Store16, AI64Store32:
1210			if p.To.Offset < 0 {
1211				panic("negative offset")
1212			}
1213			if p.From.Offset > math.MaxUint32 {
1214				ctxt.Diag("bad offset in %v", p)
1215			}
1216			writeUleb128(w, align(p.As))
1217			writeUleb128(w, uint64(p.To.Offset))
1218
1219		case ACurrentMemory, AGrowMemory, AMemoryFill:
1220			w.WriteByte(0x00)
1221
1222		case AMemoryCopy:
1223			w.WriteByte(0x00)
1224			w.WriteByte(0x00)
1225
1226		}
1227	}
1228
1229	w.WriteByte(0x0b) // end
1230
1231	s.P = w.Bytes()
1232}
1233
1234func updateLocalSP(w *bytes.Buffer) {
1235	writeOpcode(w, AGlobalGet)
1236	writeUleb128(w, 0) // global SP
1237	writeOpcode(w, ALocalSet)
1238	writeUleb128(w, 1) // local SP
1239}
1240
1241func writeOpcode(w *bytes.Buffer, as obj.As) {
1242	switch {
1243	case as < AUnreachable:
1244		panic(fmt.Sprintf("unexpected assembler op: %s", as))
1245	case as < AEnd:
1246		w.WriteByte(byte(as - AUnreachable + 0x00))
1247	case as < ADrop:
1248		w.WriteByte(byte(as - AEnd + 0x0B))
1249	case as < ALocalGet:
1250		w.WriteByte(byte(as - ADrop + 0x1A))
1251	case as < AI32Load:
1252		w.WriteByte(byte(as - ALocalGet + 0x20))
1253	case as < AI32TruncSatF32S:
1254		w.WriteByte(byte(as - AI32Load + 0x28))
1255	case as < ALast:
1256		w.WriteByte(0xFC)
1257		w.WriteByte(byte(as - AI32TruncSatF32S + 0x00))
1258	default:
1259		panic(fmt.Sprintf("unexpected assembler op: %s", as))
1260	}
1261}
1262
1263type valueType byte
1264
1265const (
1266	i32 valueType = 0x7F
1267	i64 valueType = 0x7E
1268	f32 valueType = 0x7D
1269	f64 valueType = 0x7C
1270)
1271
1272func regType(reg int16) valueType {
1273	switch {
1274	case reg == REG_SP:
1275		return i32
1276	case reg >= REG_R0 && reg <= REG_R15:
1277		return i64
1278	case reg >= REG_F0 && reg <= REG_F15:
1279		return f32
1280	case reg >= REG_F16 && reg <= REG_F31:
1281		return f64
1282	default:
1283		panic("invalid register")
1284	}
1285}
1286
1287func align(as obj.As) uint64 {
1288	switch as {
1289	case AI32Load8S, AI32Load8U, AI64Load8S, AI64Load8U, AI32Store8, AI64Store8:
1290		return 0
1291	case AI32Load16S, AI32Load16U, AI64Load16S, AI64Load16U, AI32Store16, AI64Store16:
1292		return 1
1293	case AI32Load, AF32Load, AI64Load32S, AI64Load32U, AI32Store, AF32Store, AI64Store32:
1294		return 2
1295	case AI64Load, AF64Load, AI64Store, AF64Store:
1296		return 3
1297	default:
1298		panic("align: bad op")
1299	}
1300}
1301
1302func writeUleb128(w io.ByteWriter, v uint64) {
1303	if v < 128 {
1304		w.WriteByte(uint8(v))
1305		return
1306	}
1307	more := true
1308	for more {
1309		c := uint8(v & 0x7f)
1310		v >>= 7
1311		more = v != 0
1312		if more {
1313			c |= 0x80
1314		}
1315		w.WriteByte(c)
1316	}
1317}
1318
1319func writeSleb128(w io.ByteWriter, v int64) {
1320	more := true
1321	for more {
1322		c := uint8(v & 0x7f)
1323		s := uint8(v & 0x40)
1324		v >>= 7
1325		more = !((v == 0 && s == 0) || (v == -1 && s != 0))
1326		if more {
1327			c |= 0x80
1328		}
1329		w.WriteByte(c)
1330	}
1331}
1332