1// Inferno utils/6l/pass.c
2// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/pass.c
3//
4//	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
5//	Portions Copyright © 1995-1997 C H Forsyth ([email protected])
6//	Portions Copyright © 1997-1999 Vita Nuova Limited
7//	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8//	Portions Copyright © 2004,2006 Bruce Ellis
9//	Portions Copyright © 2005-2007 C H Forsyth ([email protected])
10//	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11//	Portions Copyright © 2009 The Go Authors. All rights reserved.
12//
13// Permission is hereby granted, free of charge, to any person obtaining a copy
14// of this software and associated documentation files (the "Software"), to deal
15// in the Software without restriction, including without limitation the rights
16// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17// copies of the Software, and to permit persons to whom the Software is
18// furnished to do so, subject to the following conditions:
19//
20// The above copyright notice and this permission notice shall be included in
21// all copies or substantial portions of the Software.
22//
23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29// THE SOFTWARE.
30
31package x86
32
33import (
34	"cmd/internal/obj"
35	"cmd/internal/objabi"
36	"cmd/internal/src"
37	"cmd/internal/sys"
38	"internal/abi"
39	"log"
40	"math"
41	"path"
42	"strings"
43)
44
45func CanUse1InsnTLS(ctxt *obj.Link) bool {
46	if isAndroid {
47		// Android uses a global variable for the tls offset.
48		return false
49	}
50
51	if ctxt.Arch.Family == sys.I386 {
52		switch ctxt.Headtype {
53		case objabi.Hlinux,
54			objabi.Hplan9,
55			objabi.Hwindows:
56			return false
57		}
58
59		return true
60	}
61
62	switch ctxt.Headtype {
63	case objabi.Hplan9, objabi.Hwindows:
64		return false
65	case objabi.Hlinux, objabi.Hfreebsd:
66		return !ctxt.Flag_shared
67	}
68
69	return true
70}
71
72func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
73	// Thread-local storage references use the TLS pseudo-register.
74	// As a register, TLS refers to the thread-local storage base, and it
75	// can only be loaded into another register:
76	//
77	//         MOVQ TLS, AX
78	//
79	// An offset from the thread-local storage base is written off(reg)(TLS*1).
80	// Semantically it is off(reg), but the (TLS*1) annotation marks this as
81	// indexing from the loaded TLS base. This emits a relocation so that
82	// if the linker needs to adjust the offset, it can. For example:
83	//
84	//         MOVQ TLS, AX
85	//         MOVQ 0(AX)(TLS*1), CX // load g into CX
86	//
87	// On systems that support direct access to the TLS memory, this
88	// pair of instructions can be reduced to a direct TLS memory reference:
89	//
90	//         MOVQ 0(TLS), CX // load g into CX
91	//
92	// The 2-instruction and 1-instruction forms correspond to the two code
93	// sequences for loading a TLS variable in the local exec model given in "ELF
94	// Handling For Thread-Local Storage".
95	//
96	// We apply this rewrite on systems that support the 1-instruction form.
97	// The decision is made using only the operating system and the -shared flag,
98	// not the link mode. If some link modes on a particular operating system
99	// require the 2-instruction form, then all builds for that operating system
100	// will use the 2-instruction form, so that the link mode decision can be
101	// delayed to link time.
102	//
103	// In this way, all supported systems use identical instructions to
104	// access TLS, and they are rewritten appropriately first here in
105	// liblink and then finally using relocations in the linker.
106	//
107	// When -shared is passed, we leave the code in the 2-instruction form but
108	// assemble (and relocate) them in different ways to generate the initial
109	// exec code sequence. It's a bit of a fluke that this is possible without
110	// rewriting the instructions more comprehensively, and it only does because
111	// we only support a single TLS variable (g).
112
113	if CanUse1InsnTLS(ctxt) {
114		// Reduce 2-instruction sequence to 1-instruction sequence.
115		// Sequences like
116		//	MOVQ TLS, BX
117		//	... off(BX)(TLS*1) ...
118		// become
119		//	NOP
120		//	... off(TLS) ...
121		//
122		// TODO(rsc): Remove the Hsolaris special case. It exists only to
123		// guarantee we are producing byte-identical binaries as before this code.
124		// But it should be unnecessary.
125		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != objabi.Hsolaris {
126			obj.Nopout(p)
127		}
128		if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 {
129			p.From.Reg = REG_TLS
130			p.From.Scale = 0
131			p.From.Index = REG_NONE
132		}
133
134		if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
135			p.To.Reg = REG_TLS
136			p.To.Scale = 0
137			p.To.Index = REG_NONE
138		}
139	} else {
140		// load_g, below, always inserts the 1-instruction sequence. Rewrite it
141		// as the 2-instruction sequence if necessary.
142		//	MOVQ 0(TLS), BX
143		// becomes
144		//	MOVQ TLS, BX
145		//	MOVQ 0(BX)(TLS*1), BX
146		if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
147			q := obj.Appendp(p, newprog)
148			q.As = p.As
149			q.From = p.From
150			q.From.Type = obj.TYPE_MEM
151			q.From.Reg = p.To.Reg
152			q.From.Index = REG_TLS
153			q.From.Scale = 2 // TODO: use 1
154			q.To = p.To
155			p.From.Type = obj.TYPE_REG
156			p.From.Reg = REG_TLS
157			p.From.Index = REG_NONE
158			p.From.Offset = 0
159		}
160	}
161
162	// Android and Windows use a tls offset determined at runtime. Rewrite
163	//	MOVQ TLS, BX
164	// to
165	//	MOVQ runtime.tls_g(SB), BX
166	if (isAndroid || ctxt.Headtype == objabi.Hwindows) &&
167		(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
168		p.From.Type = obj.TYPE_MEM
169		p.From.Name = obj.NAME_EXTERN
170		p.From.Reg = REG_NONE
171		p.From.Sym = ctxt.Lookup("runtime.tls_g")
172		p.From.Index = REG_NONE
173		if ctxt.Headtype == objabi.Hwindows {
174			// Windows requires an additional indirection
175			// to retrieve the TLS pointer,
176			// as runtime.tls_g contains the TLS offset from GS or FS.
177			// on AMD64 add
178			//	MOVQ 0(BX)(GS*1), BX
179			// on 386 add
180			//	MOVQ 0(BX)(FS*1), BX4
181			q := obj.Appendp(p, newprog)
182			q.As = p.As
183			q.From = obj.Addr{}
184			q.From.Type = obj.TYPE_MEM
185			q.From.Reg = p.To.Reg
186			if ctxt.Arch.Family == sys.AMD64 {
187				q.From.Index = REG_GS
188			} else {
189				q.From.Index = REG_FS
190			}
191			q.From.Scale = 1
192			q.From.Offset = 0
193			q.To = p.To
194		}
195	}
196
197	// TODO: Remove.
198	if ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64 || ctxt.Headtype == objabi.Hplan9 {
199		if p.From.Scale == 1 && p.From.Index == REG_TLS {
200			p.From.Scale = 2
201		}
202		if p.To.Scale == 1 && p.To.Index == REG_TLS {
203			p.To.Scale = 2
204		}
205	}
206
207	// Rewrite 0 to $0 in 3rd argument to CMPPS etc.
208	// That's what the tables expect.
209	switch p.As {
210	case ACMPPD, ACMPPS, ACMPSD, ACMPSS:
211		if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil {
212			p.To.Type = obj.TYPE_CONST
213		}
214	}
215
216	// Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH.
217	switch p.As {
218	case obj.ACALL, obj.AJMP, obj.ARET:
219		if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil {
220			p.To.Type = obj.TYPE_BRANCH
221		}
222	}
223
224	// Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ.
225	if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) {
226		switch p.As {
227		case AMOVL:
228			p.As = ALEAL
229			p.From.Type = obj.TYPE_MEM
230		case AMOVQ:
231			p.As = ALEAQ
232			p.From.Type = obj.TYPE_MEM
233		}
234	}
235
236	// Rewrite float constants to values stored in memory.
237	switch p.As {
238	// Convert AMOVSS $(0), Xx to AXORPS Xx, Xx
239	case AMOVSS:
240		if p.From.Type == obj.TYPE_FCONST {
241			//  f == 0 can't be used here due to -0, so use Float64bits
242			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
243				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
244					p.As = AXORPS
245					p.From = p.To
246					break
247				}
248			}
249		}
250		fallthrough
251
252	case AFMOVF,
253		AFADDF,
254		AFSUBF,
255		AFSUBRF,
256		AFMULF,
257		AFDIVF,
258		AFDIVRF,
259		AFCOMF,
260		AFCOMFP,
261		AADDSS,
262		ASUBSS,
263		AMULSS,
264		ADIVSS,
265		ACOMISS,
266		AUCOMISS:
267		if p.From.Type == obj.TYPE_FCONST {
268			f32 := float32(p.From.Val.(float64))
269			p.From.Type = obj.TYPE_MEM
270			p.From.Name = obj.NAME_EXTERN
271			p.From.Sym = ctxt.Float32Sym(f32)
272			p.From.Offset = 0
273		}
274
275	case AMOVSD:
276		// Convert AMOVSD $(0), Xx to AXORPS Xx, Xx
277		if p.From.Type == obj.TYPE_FCONST {
278			//  f == 0 can't be used here due to -0, so use Float64bits
279			if f := p.From.Val.(float64); math.Float64bits(f) == 0 {
280				if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 {
281					p.As = AXORPS
282					p.From = p.To
283					break
284				}
285			}
286		}
287		fallthrough
288
289	case AFMOVD,
290		AFADDD,
291		AFSUBD,
292		AFSUBRD,
293		AFMULD,
294		AFDIVD,
295		AFDIVRD,
296		AFCOMD,
297		AFCOMDP,
298		AADDSD,
299		ASUBSD,
300		AMULSD,
301		ADIVSD,
302		ACOMISD,
303		AUCOMISD:
304		if p.From.Type == obj.TYPE_FCONST {
305			f64 := p.From.Val.(float64)
306			p.From.Type = obj.TYPE_MEM
307			p.From.Name = obj.NAME_EXTERN
308			p.From.Sym = ctxt.Float64Sym(f64)
309			p.From.Offset = 0
310		}
311	}
312
313	if ctxt.Flag_dynlink {
314		rewriteToUseGot(ctxt, p, newprog)
315	}
316
317	if ctxt.Flag_shared && ctxt.Arch.Family == sys.I386 {
318		rewriteToPcrel(ctxt, p, newprog)
319	}
320}
321
322// Rewrite p, if necessary, to access global data via the global offset table.
323func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
324	var lea, mov obj.As
325	var reg int16
326	if ctxt.Arch.Family == sys.AMD64 {
327		lea = ALEAQ
328		mov = AMOVQ
329		reg = REG_R15
330	} else {
331		lea = ALEAL
332		mov = AMOVL
333		reg = REG_CX
334		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
335			// Special case: clobber the destination register with
336			// the PC so we don't have to clobber CX.
337			// The SSA backend depends on CX not being clobbered across LEAL.
338			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
339			reg = p.To.Reg
340		}
341	}
342
343	if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO {
344		//     ADUFFxxx $offset
345		// becomes
346		//     $MOV runtime.duffxxx@GOT, $reg
347		//     $LEA $offset($reg), $reg
348		//     CALL $reg
349		// (we use LEAx rather than ADDx because ADDx clobbers
350		// flags and duffzero on 386 does not otherwise do so).
351		var sym *obj.LSym
352		if p.As == obj.ADUFFZERO {
353			sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal)
354		} else {
355			sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal)
356		}
357		offset := p.To.Offset
358		p.As = mov
359		p.From.Type = obj.TYPE_MEM
360		p.From.Name = obj.NAME_GOTREF
361		p.From.Sym = sym
362		p.To.Type = obj.TYPE_REG
363		p.To.Reg = reg
364		p.To.Offset = 0
365		p.To.Sym = nil
366		p1 := obj.Appendp(p, newprog)
367		p1.As = lea
368		p1.From.Type = obj.TYPE_MEM
369		p1.From.Offset = offset
370		p1.From.Reg = reg
371		p1.To.Type = obj.TYPE_REG
372		p1.To.Reg = reg
373		p2 := obj.Appendp(p1, newprog)
374		p2.As = obj.ACALL
375		p2.To.Type = obj.TYPE_REG
376		p2.To.Reg = reg
377	}
378
379	// We only care about global data: NAME_EXTERN means a global
380	// symbol in the Go sense, and p.Sym.Local is true for a few
381	// internally defined symbols.
382	if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
383		// $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below
384		p.As = mov
385		p.From.Type = obj.TYPE_ADDR
386	}
387	if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
388		// $MOV $sym, Rx becomes $MOV sym@GOT, Rx
389		// $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx
390		// On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX
391		cmplxdest := false
392		pAs := p.As
393		var dest obj.Addr
394		if p.To.Type != obj.TYPE_REG || pAs != mov {
395			if ctxt.Arch.Family == sys.AMD64 {
396				ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p)
397			}
398			cmplxdest = true
399			dest = p.To
400			p.As = mov
401			p.To.Type = obj.TYPE_REG
402			p.To.Reg = reg
403			p.To.Sym = nil
404			p.To.Name = obj.NAME_NONE
405		}
406		p.From.Type = obj.TYPE_MEM
407		p.From.Name = obj.NAME_GOTREF
408		q := p
409		if p.From.Offset != 0 {
410			q = obj.Appendp(p, newprog)
411			q.As = lea
412			q.From.Type = obj.TYPE_MEM
413			q.From.Reg = p.To.Reg
414			q.From.Offset = p.From.Offset
415			q.To = p.To
416			p.From.Offset = 0
417		}
418		if cmplxdest {
419			q = obj.Appendp(q, newprog)
420			q.As = pAs
421			q.To = dest
422			q.From.Type = obj.TYPE_REG
423			q.From.Reg = reg
424		}
425	}
426	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
427		ctxt.Diag("don't know how to handle %v with -dynlink", p)
428	}
429	var source *obj.Addr
430	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
431	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
432	// An addition may be inserted between the two MOVs if there is an offset.
433	if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() {
434		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
435			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
436		}
437		source = &p.From
438	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
439		source = &p.To
440	} else {
441		return
442	}
443	if p.As == obj.ACALL {
444		// When dynlinking on 386, almost any call might end up being a call
445		// to a PLT, so make sure the GOT pointer is loaded into BX.
446		// RegTo2 is set on the replacement call insn to stop it being
447		// processed when it is in turn passed to progedit.
448		//
449		// We disable open-coded defers in buildssa() on 386 ONLY with shared
450		// libraries because of this extra code added before deferreturn calls.
451		if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
452			return
453		}
454		p1 := obj.Appendp(p, newprog)
455		p2 := obj.Appendp(p1, newprog)
456
457		p1.As = ALEAL
458		p1.From.Type = obj.TYPE_MEM
459		p1.From.Name = obj.NAME_STATIC
460		p1.From.Sym = ctxt.Lookup("_GLOBAL_OFFSET_TABLE_")
461		p1.To.Type = obj.TYPE_REG
462		p1.To.Reg = REG_BX
463
464		p2.As = p.As
465		p2.Scond = p.Scond
466		p2.From = p.From
467		if p.RestArgs != nil {
468			p2.RestArgs = append(p2.RestArgs, p.RestArgs...)
469		}
470		p2.Reg = p.Reg
471		p2.To = p.To
472		// p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr
473		// in ../pass.go complain, so set it back to TYPE_MEM here, until p2
474		// itself gets passed to progedit.
475		p2.To.Type = obj.TYPE_MEM
476		p2.RegTo2 = 1
477
478		obj.Nopout(p)
479		return
480
481	}
482	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
483		return
484	}
485	if source.Type != obj.TYPE_MEM {
486		ctxt.Diag("don't know how to handle %v with -dynlink", p)
487	}
488	p1 := obj.Appendp(p, newprog)
489	p2 := obj.Appendp(p1, newprog)
490
491	p1.As = mov
492	p1.From.Type = obj.TYPE_MEM
493	p1.From.Sym = source.Sym
494	p1.From.Name = obj.NAME_GOTREF
495	p1.To.Type = obj.TYPE_REG
496	p1.To.Reg = reg
497
498	p2.As = p.As
499	p2.From = p.From
500	p2.To = p.To
501	if from3 := p.GetFrom3(); from3 != nil {
502		p2.AddRestSource(*from3)
503	}
504	if p.From.Name == obj.NAME_EXTERN {
505		p2.From.Reg = reg
506		p2.From.Name = obj.NAME_NONE
507		p2.From.Sym = nil
508	} else if p.To.Name == obj.NAME_EXTERN {
509		p2.To.Reg = reg
510		p2.To.Name = obj.NAME_NONE
511		p2.To.Sym = nil
512	} else {
513		return
514	}
515	obj.Nopout(p)
516}
517
518func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
519	// RegTo2 is set on the instructions we insert here so they don't get
520	// processed twice.
521	if p.RegTo2 != 0 {
522		return
523	}
524	if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP {
525		return
526	}
527	// Any Prog (aside from the above special cases) with an Addr with Name ==
528	// NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.XX
529	// inserted before it.
530	isName := func(a *obj.Addr) bool {
531		if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 {
532			return false
533		}
534		if a.Sym.Type == objabi.STLSBSS {
535			return false
536		}
537		return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF
538	}
539
540	if isName(&p.From) && p.From.Type == obj.TYPE_ADDR {
541		// Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting
542		// to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX"
543		// respectively.
544		if p.To.Type != obj.TYPE_REG {
545			q := obj.Appendp(p, newprog)
546			q.As = p.As
547			q.From.Type = obj.TYPE_REG
548			q.From.Reg = REG_CX
549			q.To = p.To
550			p.As = AMOVL
551			p.To.Type = obj.TYPE_REG
552			p.To.Reg = REG_CX
553			p.To.Sym = nil
554			p.To.Name = obj.NAME_NONE
555		}
556	}
557
558	if !isName(&p.From) && !isName(&p.To) && (p.GetFrom3() == nil || !isName(p.GetFrom3())) {
559		return
560	}
561	var dst int16 = REG_CX
562	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
563		dst = p.To.Reg
564		// Why? See the comment near the top of rewriteToUseGot above.
565		// AMOVLs might be introduced by the GOT rewrites.
566	}
567	q := obj.Appendp(p, newprog)
568	q.RegTo2 = 1
569	r := obj.Appendp(q, newprog)
570	r.RegTo2 = 1
571	q.As = obj.ACALL
572	thunkname := "__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))
573	q.To.Sym = ctxt.LookupInit(thunkname, func(s *obj.LSym) { s.Set(obj.AttrLocal, true) })
574	q.To.Type = obj.TYPE_MEM
575	q.To.Name = obj.NAME_EXTERN
576	r.As = p.As
577	r.Scond = p.Scond
578	r.From = p.From
579	r.RestArgs = p.RestArgs
580	r.Reg = p.Reg
581	r.To = p.To
582	if isName(&p.From) {
583		r.From.Reg = dst
584	}
585	if isName(&p.To) {
586		r.To.Reg = dst
587	}
588	if p.GetFrom3() != nil && isName(p.GetFrom3()) {
589		r.GetFrom3().Reg = dst
590	}
591	obj.Nopout(p)
592}
593
594// Prog.mark
595const (
596	markBit = 1 << 0 // used in errorCheck to avoid duplicate work
597)
598
599func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
600	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
601		return
602	}
603
604	p := cursym.Func().Text
605	autoffset := int32(p.To.Offset)
606	if autoffset < 0 {
607		autoffset = 0
608	}
609
610	hasCall := false
611	for q := p; q != nil; q = q.Link {
612		if q.As == obj.ACALL || q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO {
613			hasCall = true
614			break
615		}
616	}
617
618	var bpsize int
619	if ctxt.Arch.Family == sys.AMD64 &&
620		!p.From.Sym.NoFrame() && // (1) below
621		!(autoffset == 0 && !hasCall) { // (2) below
622		// Make room to save a base pointer.
623		// There are 2 cases we must avoid:
624		// 1) If noframe is set (which we do for functions which tail call).
625		// For performance, we also want to avoid:
626		// 2) Frameless leaf functions
627		bpsize = ctxt.Arch.PtrSize
628		autoffset += int32(bpsize)
629		p.To.Offset += int64(bpsize)
630	} else {
631		bpsize = 0
632		p.From.Sym.Set(obj.AttrNoFrame, true)
633	}
634
635	textarg := int64(p.To.Val.(int32))
636	cursym.Func().Args = int32(textarg)
637	cursym.Func().Locals = int32(p.To.Offset)
638
639	// TODO(rsc): Remove.
640	if ctxt.Arch.Family == sys.I386 && cursym.Func().Locals < 0 {
641		cursym.Func().Locals = 0
642	}
643
644	// TODO(rsc): Remove 'ctxt.Arch.Family == sys.AMD64 &&'.
645	if ctxt.Arch.Family == sys.AMD64 && autoffset < abi.StackSmall && !p.From.Sym.NoSplit() {
646		leaf := true
647	LeafSearch:
648		for q := p; q != nil; q = q.Link {
649			switch q.As {
650			case obj.ACALL:
651				// Treat common runtime calls that take no arguments
652				// the same as duffcopy and duffzero.
653				if !isZeroArgRuntimeCall(q.To.Sym) {
654					leaf = false
655					break LeafSearch
656				}
657				fallthrough
658			case obj.ADUFFCOPY, obj.ADUFFZERO:
659				if autoffset >= abi.StackSmall-8 {
660					leaf = false
661					break LeafSearch
662				}
663			}
664		}
665
666		if leaf {
667			p.From.Sym.Set(obj.AttrNoSplit, true)
668		}
669	}
670
671	var regEntryTmp0, regEntryTmp1 int16
672	if ctxt.Arch.Family == sys.AMD64 {
673		regEntryTmp0, regEntryTmp1 = REGENTRYTMP0, REGENTRYTMP1
674	} else {
675		regEntryTmp0, regEntryTmp1 = REG_BX, REG_DI
676	}
677
678	var regg int16
679	if !p.From.Sym.NoSplit() {
680		// Emit split check and load G register
681		p, regg = stacksplit(ctxt, cursym, p, newprog, autoffset, int32(textarg))
682	} else if p.From.Sym.Wrapper() {
683		// Load G register for the wrapper code
684		p, regg = loadG(ctxt, cursym, p, newprog)
685	}
686
687	if bpsize > 0 {
688		// Save caller's BP
689		p = obj.Appendp(p, newprog)
690
691		p.As = APUSHQ
692		p.From.Type = obj.TYPE_REG
693		p.From.Reg = REG_BP
694
695		// Move current frame to BP
696		p = obj.Appendp(p, newprog)
697
698		p.As = AMOVQ
699		p.From.Type = obj.TYPE_REG
700		p.From.Reg = REG_SP
701		p.To.Type = obj.TYPE_REG
702		p.To.Reg = REG_BP
703	}
704
705	if autoffset%int32(ctxt.Arch.RegSize) != 0 {
706		ctxt.Diag("unaligned stack size %d", autoffset)
707	}
708
709	// localoffset is autoffset discounting the frame pointer,
710	// which has already been allocated in the stack.
711	localoffset := autoffset - int32(bpsize)
712	if localoffset != 0 {
713		p = obj.Appendp(p, newprog)
714		p.As = AADJSP
715		p.From.Type = obj.TYPE_CONST
716		p.From.Offset = int64(localoffset)
717		p.Spadj = localoffset
718	}
719
720	// Delve debugger would like the next instruction to be noted as the end of the function prologue.
721	// TODO: are there other cases (e.g., wrapper functions) that need marking?
722	if autoffset != 0 {
723		p.Pos = p.Pos.WithXlogue(src.PosPrologueEnd)
724	}
725
726	if cursym.Func().Text.From.Sym.Wrapper() {
727		// if g._panic != nil && g._panic.argp == FP {
728		//   g._panic.argp = bottom-of-frame
729		// }
730		//
731		//	MOVQ g_panic(g), regEntryTmp0
732		//	TESTQ regEntryTmp0, regEntryTmp0
733		//	JNE checkargp
734		// end:
735		//	NOP
736		//  ... rest of function ...
737		// checkargp:
738		//	LEAQ (autoffset+8)(SP), regEntryTmp1
739		//	CMPQ panic_argp(regEntryTmp0), regEntryTmp1
740		//	JNE end
741		//  MOVQ SP, panic_argp(regEntryTmp0)
742		//  JMP end
743		//
744		// The NOP is needed to give the jumps somewhere to land.
745		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.
746		//
747		// The layout is chosen to help static branch prediction:
748		// Both conditional jumps are unlikely, so they are arranged to be forward jumps.
749
750		// MOVQ g_panic(g), regEntryTmp0
751		p = obj.Appendp(p, newprog)
752		p.As = AMOVQ
753		p.From.Type = obj.TYPE_MEM
754		p.From.Reg = regg
755		p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // g_panic
756		p.To.Type = obj.TYPE_REG
757		p.To.Reg = regEntryTmp0
758		if ctxt.Arch.Family == sys.I386 {
759			p.As = AMOVL
760		}
761
762		// TESTQ regEntryTmp0, regEntryTmp0
763		p = obj.Appendp(p, newprog)
764		p.As = ATESTQ
765		p.From.Type = obj.TYPE_REG
766		p.From.Reg = regEntryTmp0
767		p.To.Type = obj.TYPE_REG
768		p.To.Reg = regEntryTmp0
769		if ctxt.Arch.Family == sys.I386 {
770			p.As = ATESTL
771		}
772
773		// JNE checkargp (checkargp to be resolved later)
774		jne := obj.Appendp(p, newprog)
775		jne.As = AJNE
776		jne.To.Type = obj.TYPE_BRANCH
777
778		// end:
779		//  NOP
780		end := obj.Appendp(jne, newprog)
781		end.As = obj.ANOP
782
783		// Fast forward to end of function.
784		var last *obj.Prog
785		for last = end; last.Link != nil; last = last.Link {
786		}
787
788		// LEAQ (autoffset+8)(SP), regEntryTmp1
789		p = obj.Appendp(last, newprog)
790		p.As = ALEAQ
791		p.From.Type = obj.TYPE_MEM
792		p.From.Reg = REG_SP
793		p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
794		p.To.Type = obj.TYPE_REG
795		p.To.Reg = regEntryTmp1
796		if ctxt.Arch.Family == sys.I386 {
797			p.As = ALEAL
798		}
799
800		// Set jne branch target.
801		jne.To.SetTarget(p)
802
803		// CMPQ panic_argp(regEntryTmp0), regEntryTmp1
804		p = obj.Appendp(p, newprog)
805		p.As = ACMPQ
806		p.From.Type = obj.TYPE_MEM
807		p.From.Reg = regEntryTmp0
808		p.From.Offset = 0 // Panic.argp
809		p.To.Type = obj.TYPE_REG
810		p.To.Reg = regEntryTmp1
811		if ctxt.Arch.Family == sys.I386 {
812			p.As = ACMPL
813		}
814
815		// JNE end
816		p = obj.Appendp(p, newprog)
817		p.As = AJNE
818		p.To.Type = obj.TYPE_BRANCH
819		p.To.SetTarget(end)
820
821		// MOVQ SP, panic_argp(regEntryTmp0)
822		p = obj.Appendp(p, newprog)
823		p.As = AMOVQ
824		p.From.Type = obj.TYPE_REG
825		p.From.Reg = REG_SP
826		p.To.Type = obj.TYPE_MEM
827		p.To.Reg = regEntryTmp0
828		p.To.Offset = 0 // Panic.argp
829		if ctxt.Arch.Family == sys.I386 {
830			p.As = AMOVL
831		}
832
833		// JMP end
834		p = obj.Appendp(p, newprog)
835		p.As = obj.AJMP
836		p.To.Type = obj.TYPE_BRANCH
837		p.To.SetTarget(end)
838
839		// Reset p for following code.
840		p = end
841	}
842
843	var deltasp int32
844	for p = cursym.Func().Text; p != nil; p = p.Link {
845		pcsize := ctxt.Arch.RegSize
846		switch p.From.Name {
847		case obj.NAME_AUTO:
848			p.From.Offset += int64(deltasp) - int64(bpsize)
849		case obj.NAME_PARAM:
850			p.From.Offset += int64(deltasp) + int64(pcsize)
851		}
852		if p.GetFrom3() != nil {
853			switch p.GetFrom3().Name {
854			case obj.NAME_AUTO:
855				p.GetFrom3().Offset += int64(deltasp) - int64(bpsize)
856			case obj.NAME_PARAM:
857				p.GetFrom3().Offset += int64(deltasp) + int64(pcsize)
858			}
859		}
860		switch p.To.Name {
861		case obj.NAME_AUTO:
862			p.To.Offset += int64(deltasp) - int64(bpsize)
863		case obj.NAME_PARAM:
864			p.To.Offset += int64(deltasp) + int64(pcsize)
865		}
866
867		switch p.As {
868		default:
869			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.As != ACMPL && p.As != ACMPQ {
870				f := cursym.Func()
871				if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
872					f.FuncFlag |= abi.FuncFlagSPWrite
873					if ctxt.Debugvlog || !ctxt.IsAsm {
874						ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
875						if !ctxt.IsAsm {
876							ctxt.Diag("invalid auto-SPWRITE in non-assembly")
877							ctxt.DiagFlush()
878							log.Fatalf("bad SPWRITE")
879						}
880					}
881				}
882			}
883			continue
884
885		case APUSHL, APUSHFL:
886			deltasp += 4
887			p.Spadj = 4
888			continue
889
890		case APUSHQ, APUSHFQ:
891			deltasp += 8
892			p.Spadj = 8
893			continue
894
895		case APUSHW, APUSHFW:
896			deltasp += 2
897			p.Spadj = 2
898			continue
899
900		case APOPL, APOPFL:
901			deltasp -= 4
902			p.Spadj = -4
903			continue
904
905		case APOPQ, APOPFQ:
906			deltasp -= 8
907			p.Spadj = -8
908			continue
909
910		case APOPW, APOPFW:
911			deltasp -= 2
912			p.Spadj = -2
913			continue
914
915		case AADJSP:
916			p.Spadj = int32(p.From.Offset)
917			deltasp += int32(p.From.Offset)
918			continue
919
920		case obj.ARET:
921			// do nothing
922		}
923
924		if autoffset != deltasp {
925			ctxt.Diag("%s: unbalanced PUSH/POP", cursym)
926		}
927
928		if autoffset != 0 {
929			to := p.To // Keep To attached to RET for retjmp below
930			p.To = obj.Addr{}
931			if localoffset != 0 {
932				p.As = AADJSP
933				p.From.Type = obj.TYPE_CONST
934				p.From.Offset = int64(-localoffset)
935				p.Spadj = -localoffset
936				p = obj.Appendp(p, newprog)
937			}
938
939			if bpsize > 0 {
940				// Restore caller's BP
941				p.As = APOPQ
942				p.To.Type = obj.TYPE_REG
943				p.To.Reg = REG_BP
944				p.Spadj = -int32(bpsize)
945				p = obj.Appendp(p, newprog)
946			}
947
948			p.As = obj.ARET
949			p.To = to
950
951			// If there are instructions following
952			// this ARET, they come from a branch
953			// with the same stackframe, so undo
954			// the cleanup.
955			p.Spadj = +autoffset
956		}
957
958		if p.To.Sym != nil { // retjmp
959			p.As = obj.AJMP
960		}
961	}
962}
963
964func isZeroArgRuntimeCall(s *obj.LSym) bool {
965	if s == nil {
966		return false
967	}
968	switch s.Name {
969	case "runtime.panicdivide", "runtime.panicwrap", "runtime.panicshift":
970		return true
971	}
972	if strings.HasPrefix(s.Name, "runtime.panicIndex") || strings.HasPrefix(s.Name, "runtime.panicSlice") {
973		// These functions do take arguments (in registers),
974		// but use no stack before they do a stack check. We
975		// should include them. See issue 31219.
976		return true
977	}
978	return false
979}
980
981func indir_cx(ctxt *obj.Link, a *obj.Addr) {
982	a.Type = obj.TYPE_MEM
983	a.Reg = REG_CX
984}
985
986// loadG ensures the G is loaded into a register (either CX or REGG),
987// appending instructions to p if necessary. It returns the new last
988// instruction and the G register.
989func loadG(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc) (*obj.Prog, int16) {
990	if ctxt.Arch.Family == sys.AMD64 && cursym.ABI() == obj.ABIInternal {
991		// Use the G register directly in ABIInternal
992		return p, REGG
993	}
994
995	var regg int16 = REG_CX
996	if ctxt.Arch.Family == sys.AMD64 {
997		regg = REGG // == REG_R14
998	}
999
1000	p = obj.Appendp(p, newprog)
1001	p.As = AMOVQ
1002	if ctxt.Arch.PtrSize == 4 {
1003		p.As = AMOVL
1004	}
1005	p.From.Type = obj.TYPE_MEM
1006	p.From.Reg = REG_TLS
1007	p.From.Offset = 0
1008	p.To.Type = obj.TYPE_REG
1009	p.To.Reg = regg
1010
1011	// Rewrite TLS instruction if necessary.
1012	next := p.Link
1013	progedit(ctxt, p, newprog)
1014	for p.Link != next {
1015		p = p.Link
1016		progedit(ctxt, p, newprog)
1017	}
1018
1019	if p.From.Index == REG_TLS {
1020		p.From.Scale = 2
1021	}
1022
1023	return p, regg
1024}
1025
1026// Append code to p to check for stack split.
1027// Appends to (does not overwrite) p.
1028// Assumes g is in rg.
1029// Returns last new instruction and G register.
1030func stacksplit(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, newprog obj.ProgAlloc, framesize int32, textarg int32) (*obj.Prog, int16) {
1031	cmp := ACMPQ
1032	lea := ALEAQ
1033	mov := AMOVQ
1034	sub := ASUBQ
1035	push, pop := APUSHQ, APOPQ
1036
1037	if ctxt.Arch.Family == sys.I386 {
1038		cmp = ACMPL
1039		lea = ALEAL
1040		mov = AMOVL
1041		sub = ASUBL
1042		push, pop = APUSHL, APOPL
1043	}
1044
1045	tmp := int16(REG_AX) // use AX for 32-bit
1046	if ctxt.Arch.Family == sys.AMD64 {
1047		// Avoid register parameters.
1048		tmp = int16(REGENTRYTMP0)
1049	}
1050
1051	if ctxt.Flag_maymorestack != "" {
1052		p = cursym.Func().SpillRegisterArgs(p, newprog)
1053
1054		if cursym.Func().Text.From.Sym.NeedCtxt() {
1055			p = obj.Appendp(p, newprog)
1056			p.As = push
1057			p.From.Type = obj.TYPE_REG
1058			p.From.Reg = REGCTXT
1059		}
1060
1061		// We call maymorestack with an ABI matching the
1062		// caller's ABI. Since this is the first thing that
1063		// happens in the function, we have to be consistent
1064		// with the caller about CPU state (notably,
1065		// fixed-meaning registers).
1066
1067		p = obj.Appendp(p, newprog)
1068		p.As = obj.ACALL
1069		p.To.Type = obj.TYPE_BRANCH
1070		p.To.Name = obj.NAME_EXTERN
1071		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
1072
1073		if cursym.Func().Text.From.Sym.NeedCtxt() {
1074			p = obj.Appendp(p, newprog)
1075			p.As = pop
1076			p.To.Type = obj.TYPE_REG
1077			p.To.Reg = REGCTXT
1078		}
1079
1080		p = cursym.Func().UnspillRegisterArgs(p, newprog)
1081	}
1082
1083	// Jump back to here after morestack returns.
1084	startPred := p
1085
1086	// Load G register
1087	var rg int16
1088	p, rg = loadG(ctxt, cursym, p, newprog)
1089
1090	var q1 *obj.Prog
1091	if framesize <= abi.StackSmall {
1092		// small stack: SP <= stackguard
1093		//	CMPQ SP, stackguard
1094		p = obj.Appendp(p, newprog)
1095
1096		p.As = cmp
1097		p.From.Type = obj.TYPE_REG
1098		p.From.Reg = REG_SP
1099		p.To.Type = obj.TYPE_MEM
1100		p.To.Reg = rg
1101		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1102		if cursym.CFunc() {
1103			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1104		}
1105
1106		// Mark the stack bound check and morestack call async nonpreemptible.
1107		// If we get preempted here, when resumed the preemption request is
1108		// cleared, but we'll still call morestack, which will double the stack
1109		// unnecessarily. See issue #35470.
1110		p = ctxt.StartUnsafePoint(p, newprog)
1111	} else if framesize <= abi.StackBig {
1112		// large stack: SP-framesize <= stackguard-StackSmall
1113		//	LEAQ -xxx(SP), tmp
1114		//	CMPQ tmp, stackguard
1115		p = obj.Appendp(p, newprog)
1116
1117		p.As = lea
1118		p.From.Type = obj.TYPE_MEM
1119		p.From.Reg = REG_SP
1120		p.From.Offset = -(int64(framesize) - abi.StackSmall)
1121		p.To.Type = obj.TYPE_REG
1122		p.To.Reg = tmp
1123
1124		p = obj.Appendp(p, newprog)
1125		p.As = cmp
1126		p.From.Type = obj.TYPE_REG
1127		p.From.Reg = tmp
1128		p.To.Type = obj.TYPE_MEM
1129		p.To.Reg = rg
1130		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1131		if cursym.CFunc() {
1132			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1133		}
1134
1135		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
1136	} else {
1137		// Such a large stack we need to protect against underflow.
1138		// The runtime guarantees SP > objabi.StackBig, but
1139		// framesize is large enough that SP-framesize may
1140		// underflow, causing a direct comparison with the
1141		// stack guard to incorrectly succeed. We explicitly
1142		// guard against underflow.
1143		//
1144		//	MOVQ	SP, tmp
1145		//	SUBQ	$(framesize - StackSmall), tmp
1146		//	// If subtraction wrapped (carry set), morestack.
1147		//	JCS	label-of-call-to-morestack
1148		//	CMPQ	tmp, stackguard
1149
1150		p = obj.Appendp(p, newprog)
1151
1152		p.As = mov
1153		p.From.Type = obj.TYPE_REG
1154		p.From.Reg = REG_SP
1155		p.To.Type = obj.TYPE_REG
1156		p.To.Reg = tmp
1157
1158		p = ctxt.StartUnsafePoint(p, newprog) // see the comment above
1159
1160		p = obj.Appendp(p, newprog)
1161		p.As = sub
1162		p.From.Type = obj.TYPE_CONST
1163		p.From.Offset = int64(framesize) - abi.StackSmall
1164		p.To.Type = obj.TYPE_REG
1165		p.To.Reg = tmp
1166
1167		p = obj.Appendp(p, newprog)
1168		p.As = AJCS
1169		p.To.Type = obj.TYPE_BRANCH
1170		q1 = p
1171
1172		p = obj.Appendp(p, newprog)
1173		p.As = cmp
1174		p.From.Type = obj.TYPE_REG
1175		p.From.Reg = tmp
1176		p.To.Type = obj.TYPE_MEM
1177		p.To.Reg = rg
1178		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
1179		if cursym.CFunc() {
1180			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
1181		}
1182	}
1183
1184	// common
1185	jls := obj.Appendp(p, newprog)
1186	jls.As = AJLS
1187	jls.To.Type = obj.TYPE_BRANCH
1188
1189	end := ctxt.EndUnsafePoint(jls, newprog, -1)
1190
1191	var last *obj.Prog
1192	for last = cursym.Func().Text; last.Link != nil; last = last.Link {
1193	}
1194
1195	// Now we are at the end of the function, but logically
1196	// we are still in function prologue. We need to fix the
1197	// SP data and PCDATA.
1198	spfix := obj.Appendp(last, newprog)
1199	spfix.As = obj.ANOP
1200	spfix.Spadj = -framesize
1201
1202	pcdata := ctxt.EmitEntryStackMap(cursym, spfix, newprog)
1203	spill := ctxt.StartUnsafePoint(pcdata, newprog)
1204	pcdata = cursym.Func().SpillRegisterArgs(spill, newprog)
1205
1206	call := obj.Appendp(pcdata, newprog)
1207	call.Pos = cursym.Func().Text.Pos
1208	call.As = obj.ACALL
1209	call.To.Type = obj.TYPE_BRANCH
1210	call.To.Name = obj.NAME_EXTERN
1211	morestack := "runtime.morestack"
1212	switch {
1213	case cursym.CFunc():
1214		morestack = "runtime.morestackc"
1215	case !cursym.Func().Text.From.Sym.NeedCtxt():
1216		morestack = "runtime.morestack_noctxt"
1217	}
1218	call.To.Sym = ctxt.Lookup(morestack)
1219	// When compiling 386 code for dynamic linking, the call needs to be adjusted
1220	// to follow PIC rules. This in turn can insert more instructions, so we need
1221	// to keep track of the start of the call (where the jump will be to) and the
1222	// end (which following instructions are appended to).
1223	callend := call
1224	progedit(ctxt, callend, newprog)
1225	for ; callend.Link != nil; callend = callend.Link {
1226		progedit(ctxt, callend.Link, newprog)
1227	}
1228
1229	// The instructions which unspill regs should be preemptible.
1230	pcdata = ctxt.EndUnsafePoint(callend, newprog, -1)
1231	unspill := cursym.Func().UnspillRegisterArgs(pcdata, newprog)
1232
1233	jmp := obj.Appendp(unspill, newprog)
1234	jmp.As = obj.AJMP
1235	jmp.To.Type = obj.TYPE_BRANCH
1236	jmp.To.SetTarget(startPred.Link)
1237	jmp.Spadj = +framesize
1238
1239	jls.To.SetTarget(spill)
1240	if q1 != nil {
1241		q1.To.SetTarget(spill)
1242	}
1243
1244	return end, rg
1245}
1246
1247func isR15(r int16) bool {
1248	return r == REG_R15 || r == REG_R15B
1249}
1250func addrMentionsR15(a *obj.Addr) bool {
1251	if a == nil {
1252		return false
1253	}
1254	return isR15(a.Reg) || isR15(a.Index)
1255}
1256func progMentionsR15(p *obj.Prog) bool {
1257	return addrMentionsR15(&p.From) || addrMentionsR15(&p.To) || isR15(p.Reg) || addrMentionsR15(p.GetFrom3())
1258}
1259
1260func addrUsesGlobal(a *obj.Addr) bool {
1261	if a == nil {
1262		return false
1263	}
1264	return a.Name == obj.NAME_EXTERN && !a.Sym.Local()
1265}
1266func progUsesGlobal(p *obj.Prog) bool {
1267	if p.As == obj.ACALL || p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP {
1268		// These opcodes don't use a GOT to access their argument (see rewriteToUseGot),
1269		// or R15 would be dead at them anyway.
1270		return false
1271	}
1272	if p.As == ALEAQ {
1273		// The GOT entry is placed directly in the destination register; R15 is not used.
1274		return false
1275	}
1276	return addrUsesGlobal(&p.From) || addrUsesGlobal(&p.To) || addrUsesGlobal(p.GetFrom3())
1277}
1278
1279type rwMask int
1280
1281const (
1282	readFrom rwMask = 1 << iota
1283	readTo
1284	readReg
1285	readFrom3
1286	writeFrom
1287	writeTo
1288	writeReg
1289	writeFrom3
1290)
1291
1292// progRW returns a mask describing the effects of the instruction p.
1293// Note: this isn't exhaustively accurate. It is only currently used for detecting
1294// reads/writes to R15, so SSE register behavior isn't fully correct, and
1295// other weird cases (e.g. writes to DX by CLD) also aren't captured.
1296func progRW(p *obj.Prog) rwMask {
1297	var m rwMask
1298	// Default for most instructions
1299	if p.From.Type != obj.TYPE_NONE {
1300		m |= readFrom
1301	}
1302	if p.To.Type != obj.TYPE_NONE {
1303		// Most x86 instructions update the To value
1304		m |= readTo | writeTo
1305	}
1306	if p.Reg != 0 {
1307		m |= readReg
1308	}
1309	if p.GetFrom3() != nil {
1310		m |= readFrom3
1311	}
1312
1313	// Lots of exceptions to the above defaults.
1314	name := p.As.String()
1315	if strings.HasPrefix(name, "MOV") || strings.HasPrefix(name, "PMOV") {
1316		// MOV instructions don't read To.
1317		m &^= readTo
1318	}
1319	switch p.As {
1320	case APOPW, APOPL, APOPQ,
1321		ALEAL, ALEAQ,
1322		AIMUL3W, AIMUL3L, AIMUL3Q,
1323		APEXTRB, APEXTRW, APEXTRD, APEXTRQ, AVPEXTRB, AVPEXTRW, AVPEXTRD, AVPEXTRQ, AEXTRACTPS,
1324		ABSFW, ABSFL, ABSFQ, ABSRW, ABSRL, ABSRQ, APOPCNTW, APOPCNTL, APOPCNTQ, ALZCNTW, ALZCNTL, ALZCNTQ,
1325		ASHLXL, ASHLXQ, ASHRXL, ASHRXQ, ASARXL, ASARXQ:
1326		// These instructions are pure writes to To. They don't use its old value.
1327		m &^= readTo
1328	case AXORL, AXORQ:
1329		// Register-clearing idiom doesn't read previous value.
1330		if p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG && p.From.Reg == p.To.Reg {
1331			m &^= readFrom | readTo
1332		}
1333	case AMULXL, AMULXQ:
1334		// These are write-only to both To and From3.
1335		m &^= readTo | readFrom3
1336		m |= writeFrom3
1337	}
1338	return m
1339}
1340
1341// progReadsR15 reports whether p reads the register R15.
1342func progReadsR15(p *obj.Prog) bool {
1343	m := progRW(p)
1344	if m&readFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
1345		return true
1346	}
1347	if m&readTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
1348		return true
1349	}
1350	if m&readReg != 0 && isR15(p.Reg) {
1351		return true
1352	}
1353	if m&readFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
1354		return true
1355	}
1356	// reads of the index registers
1357	if p.From.Type == obj.TYPE_MEM && (isR15(p.From.Reg) || isR15(p.From.Index)) {
1358		return true
1359	}
1360	if p.To.Type == obj.TYPE_MEM && (isR15(p.To.Reg) || isR15(p.To.Index)) {
1361		return true
1362	}
1363	if f3 := p.GetFrom3(); f3 != nil && f3.Type == obj.TYPE_MEM && (isR15(f3.Reg) || isR15(f3.Index)) {
1364		return true
1365	}
1366	return false
1367}
1368
1369// progWritesR15 reports whether p writes the register R15.
1370func progWritesR15(p *obj.Prog) bool {
1371	m := progRW(p)
1372	if m&writeFrom != 0 && p.From.Type == obj.TYPE_REG && isR15(p.From.Reg) {
1373		return true
1374	}
1375	if m&writeTo != 0 && p.To.Type == obj.TYPE_REG && isR15(p.To.Reg) {
1376		return true
1377	}
1378	if m&writeReg != 0 && isR15(p.Reg) {
1379		return true
1380	}
1381	if m&writeFrom3 != 0 && p.GetFrom3().Type == obj.TYPE_REG && isR15(p.GetFrom3().Reg) {
1382		return true
1383	}
1384	return false
1385}
1386
1387func errorCheck(ctxt *obj.Link, s *obj.LSym) {
1388	// When dynamic linking, R15 is used to access globals. Reject code that
1389	// uses R15 after a global variable access.
1390	if !ctxt.Flag_dynlink {
1391		return
1392	}
1393
1394	// Flood fill all the instructions where R15's value is junk.
1395	// If there are any uses of R15 in that set, report an error.
1396	var work []*obj.Prog
1397	var mentionsR15 bool
1398	for p := s.Func().Text; p != nil; p = p.Link {
1399		if progUsesGlobal(p) {
1400			work = append(work, p)
1401			p.Mark |= markBit
1402		}
1403		if progMentionsR15(p) {
1404			mentionsR15 = true
1405		}
1406	}
1407	if mentionsR15 {
1408		for len(work) > 0 {
1409			p := work[len(work)-1]
1410			work = work[:len(work)-1]
1411			if progReadsR15(p) {
1412				pos := ctxt.PosTable.Pos(p.Pos)
1413				ctxt.Diag("%s:%s: when dynamic linking, R15 is clobbered by a global variable access and is used here: %v", path.Base(pos.Filename()), pos.LineNumber(), p)
1414				break // only report one error
1415			}
1416			if progWritesR15(p) {
1417				// R15 is overwritten by this instruction. Its value is not junk any more.
1418				continue
1419			}
1420			if q := p.To.Target(); q != nil && q.Mark&markBit == 0 {
1421				q.Mark |= markBit
1422				work = append(work, q)
1423			}
1424			if p.As == obj.AJMP || p.As == obj.ARET {
1425				continue // no fallthrough
1426			}
1427			if q := p.Link; q != nil && q.Mark&markBit == 0 {
1428				q.Mark |= markBit
1429				work = append(work, q)
1430			}
1431		}
1432	}
1433
1434	// Clean up.
1435	for p := s.Func().Text; p != nil; p = p.Link {
1436		p.Mark &^= markBit
1437	}
1438}
1439
1440var unaryDst = map[obj.As]bool{
1441	ABSWAPL:     true,
1442	ABSWAPQ:     true,
1443	ACLDEMOTE:   true,
1444	ACLFLUSH:    true,
1445	ACLFLUSHOPT: true,
1446	ACLWB:       true,
1447	ACMPXCHG16B: true,
1448	ACMPXCHG8B:  true,
1449	ADECB:       true,
1450	ADECL:       true,
1451	ADECQ:       true,
1452	ADECW:       true,
1453	AFBSTP:      true,
1454	AFFREE:      true,
1455	AFLDENV:     true,
1456	AFSAVE:      true,
1457	AFSTCW:      true,
1458	AFSTENV:     true,
1459	AFSTSW:      true,
1460	AFXSAVE64:   true,
1461	AFXSAVE:     true,
1462	AINCB:       true,
1463	AINCL:       true,
1464	AINCQ:       true,
1465	AINCW:       true,
1466	ANEGB:       true,
1467	ANEGL:       true,
1468	ANEGQ:       true,
1469	ANEGW:       true,
1470	ANOTB:       true,
1471	ANOTL:       true,
1472	ANOTQ:       true,
1473	ANOTW:       true,
1474	APOPL:       true,
1475	APOPQ:       true,
1476	APOPW:       true,
1477	ARDFSBASEL:  true,
1478	ARDFSBASEQ:  true,
1479	ARDGSBASEL:  true,
1480	ARDGSBASEQ:  true,
1481	ARDPID:      true,
1482	ARDRANDL:    true,
1483	ARDRANDQ:    true,
1484	ARDRANDW:    true,
1485	ARDSEEDL:    true,
1486	ARDSEEDQ:    true,
1487	ARDSEEDW:    true,
1488	ASETCC:      true,
1489	ASETCS:      true,
1490	ASETEQ:      true,
1491	ASETGE:      true,
1492	ASETGT:      true,
1493	ASETHI:      true,
1494	ASETLE:      true,
1495	ASETLS:      true,
1496	ASETLT:      true,
1497	ASETMI:      true,
1498	ASETNE:      true,
1499	ASETOC:      true,
1500	ASETOS:      true,
1501	ASETPC:      true,
1502	ASETPL:      true,
1503	ASETPS:      true,
1504	ASGDT:       true,
1505	ASIDT:       true,
1506	ASLDTL:      true,
1507	ASLDTQ:      true,
1508	ASLDTW:      true,
1509	ASMSWL:      true,
1510	ASMSWQ:      true,
1511	ASMSWW:      true,
1512	ASTMXCSR:    true,
1513	ASTRL:       true,
1514	ASTRQ:       true,
1515	ASTRW:       true,
1516	AXSAVE64:    true,
1517	AXSAVE:      true,
1518	AXSAVEC64:   true,
1519	AXSAVEC:     true,
1520	AXSAVEOPT64: true,
1521	AXSAVEOPT:   true,
1522	AXSAVES64:   true,
1523	AXSAVES:     true,
1524}
1525
1526var Linkamd64 = obj.LinkArch{
1527	Arch:           sys.ArchAMD64,
1528	Init:           instinit,
1529	ErrorCheck:     errorCheck,
1530	Preprocess:     preprocess,
1531	Assemble:       span6,
1532	Progedit:       progedit,
1533	SEH:            populateSeh,
1534	UnaryDst:       unaryDst,
1535	DWARFRegisters: AMD64DWARFRegisters,
1536}
1537
1538var Link386 = obj.LinkArch{
1539	Arch:           sys.Arch386,
1540	Init:           instinit,
1541	Preprocess:     preprocess,
1542	Assemble:       span6,
1543	Progedit:       progedit,
1544	UnaryDst:       unaryDst,
1545	DWARFRegisters: X86DWARFRegisters,
1546}
1547