1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package ssa
6
7import (
8	"cmd/compile/internal/base"
9	"cmd/compile/internal/logopt"
10	"cmd/compile/internal/reflectdata"
11	"cmd/compile/internal/types"
12	"cmd/internal/obj"
13	"cmd/internal/obj/s390x"
14	"cmd/internal/objabi"
15	"cmd/internal/src"
16	"encoding/binary"
17	"fmt"
18	"internal/buildcfg"
19	"io"
20	"math"
21	"math/bits"
22	"os"
23	"path/filepath"
24	"strings"
25)
26
27type deadValueChoice bool
28
29const (
30	leaveDeadValues  deadValueChoice = false
31	removeDeadValues                 = true
32)
33
34// deadcode indicates whether rewrite should try to remove any values that become dead.
35func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
36	// repeat rewrites until we find no more rewrites
37	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
38	pendingLines.clear()
39	debug := f.pass.debug
40	if debug > 1 {
41		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
42	}
43	// if the number of rewrite iterations reaches itersLimit we will
44	// at that point turn on cycle detection. Instead of a fixed limit,
45	// size the limit according to func size to allow for cases such
46	// as the one in issue #66773.
47	itersLimit := f.NumBlocks()
48	if itersLimit < 20 {
49		itersLimit = 20
50	}
51	var iters int
52	var states map[string]bool
53	for {
54		change := false
55		deadChange := false
56		for _, b := range f.Blocks {
57			var b0 *Block
58			if debug > 1 {
59				b0 = new(Block)
60				*b0 = *b
61				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
62			}
63			for i, c := range b.ControlValues() {
64				for c.Op == OpCopy {
65					c = c.Args[0]
66					b.ReplaceControl(i, c)
67				}
68			}
69			if rb(b) {
70				change = true
71				if debug > 1 {
72					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
73				}
74			}
75			for j, v := range b.Values {
76				var v0 *Value
77				if debug > 1 {
78					v0 = new(Value)
79					*v0 = *v
80					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
81				}
82				if v.Uses == 0 && v.removeable() {
83					if v.Op != OpInvalid && deadcode == removeDeadValues {
84						// Reset any values that are now unused, so that we decrement
85						// the use count of all of its arguments.
86						// Not quite a deadcode pass, because it does not handle cycles.
87						// But it should help Uses==1 rules to fire.
88						v.reset(OpInvalid)
89						deadChange = true
90					}
91					// No point rewriting values which aren't used.
92					continue
93				}
94
95				vchange := phielimValue(v)
96				if vchange && debug > 1 {
97					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
98				}
99
100				// Eliminate copy inputs.
101				// If any copy input becomes unused, mark it
102				// as invalid and discard its argument. Repeat
103				// recursively on the discarded argument.
104				// This phase helps remove phantom "dead copy" uses
105				// of a value so that a x.Uses==1 rule condition
106				// fires reliably.
107				for i, a := range v.Args {
108					if a.Op != OpCopy {
109						continue
110					}
111					aa := copySource(a)
112					v.SetArg(i, aa)
113					// If a, a copy, has a line boundary indicator, attempt to find a new value
114					// to hold it.  The first candidate is the value that will replace a (aa),
115					// if it shares the same block and line and is eligible.
116					// The second option is v, which has a as an input.  Because aa is earlier in
117					// the data flow, it is the better choice.
118					if a.Pos.IsStmt() == src.PosIsStmt {
119						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
120							aa.Pos = aa.Pos.WithIsStmt()
121						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
122							v.Pos = v.Pos.WithIsStmt()
123						} else {
124							// Record the lost line and look for a new home after all rewrites are complete.
125							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
126							// line to appear in more than one block, but only one block is stored, so if both end
127							// up here, then one will be lost.
128							pendingLines.set(a.Pos, int32(a.Block.ID))
129						}
130						a.Pos = a.Pos.WithNotStmt()
131					}
132					vchange = true
133					for a.Uses == 0 {
134						b := a.Args[0]
135						a.reset(OpInvalid)
136						a = b
137					}
138				}
139				if vchange && debug > 1 {
140					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
141				}
142
143				// apply rewrite function
144				if rv(v) {
145					vchange = true
146					// If value changed to a poor choice for a statement boundary, move the boundary
147					if v.Pos.IsStmt() == src.PosIsStmt {
148						if k := nextGoodStatementIndex(v, j, b); k != j {
149							v.Pos = v.Pos.WithNotStmt()
150							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
151						}
152					}
153				}
154
155				change = change || vchange
156				if vchange && debug > 1 {
157					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
158				}
159			}
160		}
161		if !change && !deadChange {
162			break
163		}
164		iters++
165		if (iters > itersLimit || debug >= 2) && change {
166			// We've done a suspiciously large number of rewrites (or we're in debug mode).
167			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
168			// and the maximum value encountered during make.bash is 12.
169			// Start checking for cycles. (This is too expensive to do routinely.)
170			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
171			if states == nil {
172				states = make(map[string]bool)
173			}
174			h := f.rewriteHash()
175			if _, ok := states[h]; ok {
176				// We've found a cycle.
177				// To diagnose it, set debug to 2 and start again,
178				// so that we'll print all rules applied until we complete another cycle.
179				// If debug is already >= 2, we've already done that, so it's time to crash.
180				if debug < 2 {
181					debug = 2
182					states = make(map[string]bool)
183				} else {
184					f.Fatalf("rewrite cycle detected")
185				}
186			}
187			states[h] = true
188		}
189	}
190	// remove clobbered values
191	for _, b := range f.Blocks {
192		j := 0
193		for i, v := range b.Values {
194			vl := v.Pos
195			if v.Op == OpInvalid {
196				if v.Pos.IsStmt() == src.PosIsStmt {
197					pendingLines.set(vl, int32(b.ID))
198				}
199				f.freeValue(v)
200				continue
201			}
202			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
203				pendingLines.remove(vl)
204				v.Pos = v.Pos.WithIsStmt()
205			}
206			if i != j {
207				b.Values[j] = v
208			}
209			j++
210		}
211		if pendingLines.get(b.Pos) == int32(b.ID) {
212			b.Pos = b.Pos.WithIsStmt()
213			pendingLines.remove(b.Pos)
214		}
215		b.truncateValues(j)
216	}
217}
218
219// Common functions called from rewriting rules
220
221func is64BitFloat(t *types.Type) bool {
222	return t.Size() == 8 && t.IsFloat()
223}
224
225func is32BitFloat(t *types.Type) bool {
226	return t.Size() == 4 && t.IsFloat()
227}
228
229func is64BitInt(t *types.Type) bool {
230	return t.Size() == 8 && t.IsInteger()
231}
232
233func is32BitInt(t *types.Type) bool {
234	return t.Size() == 4 && t.IsInteger()
235}
236
237func is16BitInt(t *types.Type) bool {
238	return t.Size() == 2 && t.IsInteger()
239}
240
241func is8BitInt(t *types.Type) bool {
242	return t.Size() == 1 && t.IsInteger()
243}
244
245func isPtr(t *types.Type) bool {
246	return t.IsPtrShaped()
247}
248
249// mergeSym merges two symbolic offsets. There is no real merging of
250// offsets, we just pick the non-nil one.
251func mergeSym(x, y Sym) Sym {
252	if x == nil {
253		return y
254	}
255	if y == nil {
256		return x
257	}
258	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
259}
260
261func canMergeSym(x, y Sym) bool {
262	return x == nil || y == nil
263}
264
265// canMergeLoadClobber reports whether the load can be merged into target without
266// invalidating the schedule.
267// It also checks that the other non-load argument x is something we
268// are ok with clobbering.
269func canMergeLoadClobber(target, load, x *Value) bool {
270	// The register containing x is going to get clobbered.
271	// Don't merge if we still need the value of x.
272	// We don't have liveness information here, but we can
273	// approximate x dying with:
274	//  1) target is x's only use.
275	//  2) target is not in a deeper loop than x.
276	if x.Uses != 1 {
277		return false
278	}
279	loopnest := x.Block.Func.loopnest()
280	loopnest.calculateDepths()
281	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
282		return false
283	}
284	return canMergeLoad(target, load)
285}
286
287// canMergeLoad reports whether the load can be merged into target without
288// invalidating the schedule.
289func canMergeLoad(target, load *Value) bool {
290	if target.Block.ID != load.Block.ID {
291		// If the load is in a different block do not merge it.
292		return false
293	}
294
295	// We can't merge the load into the target if the load
296	// has more than one use.
297	if load.Uses != 1 {
298		return false
299	}
300
301	mem := load.MemoryArg()
302
303	// We need the load's memory arg to still be alive at target. That
304	// can't be the case if one of target's args depends on a memory
305	// state that is a successor of load's memory arg.
306	//
307	// For example, it would be invalid to merge load into target in
308	// the following situation because newmem has killed oldmem
309	// before target is reached:
310	//     load = read ... oldmem
311	//   newmem = write ... oldmem
312	//     arg0 = read ... newmem
313	//   target = add arg0 load
314	//
315	// If the argument comes from a different block then we can exclude
316	// it immediately because it must dominate load (which is in the
317	// same block as target).
318	var args []*Value
319	for _, a := range target.Args {
320		if a != load && a.Block.ID == target.Block.ID {
321			args = append(args, a)
322		}
323	}
324
325	// memPreds contains memory states known to be predecessors of load's
326	// memory state. It is lazily initialized.
327	var memPreds map[*Value]bool
328	for i := 0; len(args) > 0; i++ {
329		const limit = 100
330		if i >= limit {
331			// Give up if we have done a lot of iterations.
332			return false
333		}
334		v := args[len(args)-1]
335		args = args[:len(args)-1]
336		if target.Block.ID != v.Block.ID {
337			// Since target and load are in the same block
338			// we can stop searching when we leave the block.
339			continue
340		}
341		if v.Op == OpPhi {
342			// A Phi implies we have reached the top of the block.
343			// The memory phi, if it exists, is always
344			// the first logical store in the block.
345			continue
346		}
347		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
348			// We could handle this situation however it is likely
349			// to be very rare.
350			return false
351		}
352		if v.Op.SymEffect()&SymAddr != 0 {
353			// This case prevents an operation that calculates the
354			// address of a local variable from being forced to schedule
355			// before its corresponding VarDef.
356			// See issue 28445.
357			//   v1 = LOAD ...
358			//   v2 = VARDEF
359			//   v3 = LEAQ
360			//   v4 = CMPQ v1 v3
361			// We don't want to combine the CMPQ with the load, because
362			// that would force the CMPQ to schedule before the VARDEF, which
363			// in turn requires the LEAQ to schedule before the VARDEF.
364			return false
365		}
366		if v.Type.IsMemory() {
367			if memPreds == nil {
368				// Initialise a map containing memory states
369				// known to be predecessors of load's memory
370				// state.
371				memPreds = make(map[*Value]bool)
372				m := mem
373				const limit = 50
374				for i := 0; i < limit; i++ {
375					if m.Op == OpPhi {
376						// The memory phi, if it exists, is always
377						// the first logical store in the block.
378						break
379					}
380					if m.Block.ID != target.Block.ID {
381						break
382					}
383					if !m.Type.IsMemory() {
384						break
385					}
386					memPreds[m] = true
387					if len(m.Args) == 0 {
388						break
389					}
390					m = m.MemoryArg()
391				}
392			}
393
394			// We can merge if v is a predecessor of mem.
395			//
396			// For example, we can merge load into target in the
397			// following scenario:
398			//      x = read ... v
399			//    mem = write ... v
400			//   load = read ... mem
401			// target = add x load
402			if memPreds[v] {
403				continue
404			}
405			return false
406		}
407		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
408			// If v takes mem as an input then we know mem
409			// is valid at this point.
410			continue
411		}
412		for _, a := range v.Args {
413			if target.Block.ID == a.Block.ID {
414				args = append(args, a)
415			}
416		}
417	}
418
419	return true
420}
421
422// isSameCall reports whether sym is the same as the given named symbol.
423func isSameCall(sym interface{}, name string) bool {
424	fn := sym.(*AuxCall).Fn
425	return fn != nil && fn.String() == name
426}
427
428// canLoadUnaligned reports if the architecture supports unaligned load operations.
429func canLoadUnaligned(c *Config) bool {
430	return c.ctxt.Arch.Alignment == 1
431}
432
433// nlzX returns the number of leading zeros.
434func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
435func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
436func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
437func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
438
439// ntzX returns the number of trailing zeros.
440func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
441func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
442func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
443func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
444
445func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
446func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
447func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
448func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
449func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
450
451// nto returns the number of trailing ones.
452func nto(x int64) int64 {
453	return int64(ntz64(^x))
454}
455
456// logX returns logarithm of n base 2.
457// n must be a positive power of 2 (isPowerOfTwoX returns true).
458func log8(n int8) int64 {
459	return int64(bits.Len8(uint8(n))) - 1
460}
461func log16(n int16) int64 {
462	return int64(bits.Len16(uint16(n))) - 1
463}
464func log32(n int32) int64 {
465	return int64(bits.Len32(uint32(n))) - 1
466}
467func log64(n int64) int64 {
468	return int64(bits.Len64(uint64(n))) - 1
469}
470
471// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
472// Rounds down.
473func log2uint32(n int64) int64 {
474	return int64(bits.Len32(uint32(n))) - 1
475}
476
477// isPowerOfTwoX functions report whether n is a power of 2.
478func isPowerOfTwo8(n int8) bool {
479	return n > 0 && n&(n-1) == 0
480}
481func isPowerOfTwo16(n int16) bool {
482	return n > 0 && n&(n-1) == 0
483}
484func isPowerOfTwo32(n int32) bool {
485	return n > 0 && n&(n-1) == 0
486}
487func isPowerOfTwo64(n int64) bool {
488	return n > 0 && n&(n-1) == 0
489}
490
491// isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
492func isUint64PowerOfTwo(in int64) bool {
493	n := uint64(in)
494	return n > 0 && n&(n-1) == 0
495}
496
497// isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
498func isUint32PowerOfTwo(in int64) bool {
499	n := uint64(uint32(in))
500	return n > 0 && n&(n-1) == 0
501}
502
503// is32Bit reports whether n can be represented as a signed 32 bit integer.
504func is32Bit(n int64) bool {
505	return n == int64(int32(n))
506}
507
508// is16Bit reports whether n can be represented as a signed 16 bit integer.
509func is16Bit(n int64) bool {
510	return n == int64(int16(n))
511}
512
513// is8Bit reports whether n can be represented as a signed 8 bit integer.
514func is8Bit(n int64) bool {
515	return n == int64(int8(n))
516}
517
518// isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
519func isU8Bit(n int64) bool {
520	return n == int64(uint8(n))
521}
522
523// isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
524func isU12Bit(n int64) bool {
525	return 0 <= n && n < (1<<12)
526}
527
528// isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
529func isU16Bit(n int64) bool {
530	return n == int64(uint16(n))
531}
532
533// isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
534func isU32Bit(n int64) bool {
535	return n == int64(uint32(n))
536}
537
538// is20Bit reports whether n can be represented as a signed 20 bit integer.
539func is20Bit(n int64) bool {
540	return -(1<<19) <= n && n < (1<<19)
541}
542
543// b2i translates a boolean value to 0 or 1 for assigning to auxInt.
544func b2i(b bool) int64 {
545	if b {
546		return 1
547	}
548	return 0
549}
550
551// b2i32 translates a boolean value to 0 or 1.
552func b2i32(b bool) int32 {
553	if b {
554		return 1
555	}
556	return 0
557}
558
559// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
560// A shift is bounded if it is shifting by less than the width of the shifted value.
561func shiftIsBounded(v *Value) bool {
562	return v.AuxInt != 0
563}
564
565// canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
566// generated code as much as possible.
567func canonLessThan(x, y *Value) bool {
568	if x.Op != y.Op {
569		return x.Op < y.Op
570	}
571	if !x.Pos.SameFileAndLine(y.Pos) {
572		return x.Pos.Before(y.Pos)
573	}
574	return x.ID < y.ID
575}
576
577// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
578// of the mantissa. It will panic if the truncation results in lost information.
579func truncate64Fto32F(f float64) float32 {
580	if !isExactFloat32(f) {
581		panic("truncate64Fto32F: truncation is not exact")
582	}
583	if !math.IsNaN(f) {
584		return float32(f)
585	}
586	// NaN bit patterns aren't necessarily preserved across conversion
587	// instructions so we need to do the conversion manually.
588	b := math.Float64bits(f)
589	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
590	//          | sign                  | exponent   | mantissa       |
591	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
592	return math.Float32frombits(r)
593}
594
595// extend32Fto64F converts a float32 value to a float64 value preserving the bit
596// pattern of the mantissa.
597func extend32Fto64F(f float32) float64 {
598	if !math.IsNaN(float64(f)) {
599		return float64(f)
600	}
601	// NaN bit patterns aren't necessarily preserved across conversion
602	// instructions so we need to do the conversion manually.
603	b := uint64(math.Float32bits(f))
604	//   | sign                  | exponent      | mantissa                    |
605	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
606	return math.Float64frombits(r)
607}
608
609// DivisionNeedsFixUp reports whether the division needs fix-up code.
610func DivisionNeedsFixUp(v *Value) bool {
611	return v.AuxInt == 0
612}
613
614// auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
615func auxFrom64F(f float64) int64 {
616	if f != f {
617		panic("can't encode a NaN in AuxInt field")
618	}
619	return int64(math.Float64bits(f))
620}
621
622// auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
623func auxFrom32F(f float32) int64 {
624	if f != f {
625		panic("can't encode a NaN in AuxInt field")
626	}
627	return int64(math.Float64bits(extend32Fto64F(f)))
628}
629
630// auxTo32F decodes a float32 from the AuxInt value provided.
631func auxTo32F(i int64) float32 {
632	return truncate64Fto32F(math.Float64frombits(uint64(i)))
633}
634
635// auxTo64F decodes a float64 from the AuxInt value provided.
636func auxTo64F(i int64) float64 {
637	return math.Float64frombits(uint64(i))
638}
639
640func auxIntToBool(i int64) bool {
641	if i == 0 {
642		return false
643	}
644	return true
645}
646func auxIntToInt8(i int64) int8 {
647	return int8(i)
648}
649func auxIntToInt16(i int64) int16 {
650	return int16(i)
651}
652func auxIntToInt32(i int64) int32 {
653	return int32(i)
654}
655func auxIntToInt64(i int64) int64 {
656	return i
657}
658func auxIntToUint8(i int64) uint8 {
659	return uint8(i)
660}
661func auxIntToFloat32(i int64) float32 {
662	return float32(math.Float64frombits(uint64(i)))
663}
664func auxIntToFloat64(i int64) float64 {
665	return math.Float64frombits(uint64(i))
666}
667func auxIntToValAndOff(i int64) ValAndOff {
668	return ValAndOff(i)
669}
670func auxIntToArm64BitField(i int64) arm64BitField {
671	return arm64BitField(i)
672}
673func auxIntToInt128(x int64) int128 {
674	if x != 0 {
675		panic("nonzero int128 not allowed")
676	}
677	return 0
678}
679func auxIntToFlagConstant(x int64) flagConstant {
680	return flagConstant(x)
681}
682
683func auxIntToOp(cc int64) Op {
684	return Op(cc)
685}
686
687func boolToAuxInt(b bool) int64 {
688	if b {
689		return 1
690	}
691	return 0
692}
693func int8ToAuxInt(i int8) int64 {
694	return int64(i)
695}
696func int16ToAuxInt(i int16) int64 {
697	return int64(i)
698}
699func int32ToAuxInt(i int32) int64 {
700	return int64(i)
701}
702func int64ToAuxInt(i int64) int64 {
703	return int64(i)
704}
705func uint8ToAuxInt(i uint8) int64 {
706	return int64(int8(i))
707}
708func float32ToAuxInt(f float32) int64 {
709	return int64(math.Float64bits(float64(f)))
710}
711func float64ToAuxInt(f float64) int64 {
712	return int64(math.Float64bits(f))
713}
714func valAndOffToAuxInt(v ValAndOff) int64 {
715	return int64(v)
716}
717func arm64BitFieldToAuxInt(v arm64BitField) int64 {
718	return int64(v)
719}
720func int128ToAuxInt(x int128) int64 {
721	if x != 0 {
722		panic("nonzero int128 not allowed")
723	}
724	return 0
725}
726func flagConstantToAuxInt(x flagConstant) int64 {
727	return int64(x)
728}
729
730func opToAuxInt(o Op) int64 {
731	return int64(o)
732}
733
734// Aux is an interface to hold miscellaneous data in Blocks and Values.
735type Aux interface {
736	CanBeAnSSAAux()
737}
738
739// for now only used to mark moves that need to avoid clobbering flags
740type auxMark bool
741
742func (auxMark) CanBeAnSSAAux() {}
743
744var AuxMark auxMark
745
746// stringAux wraps string values for use in Aux.
747type stringAux string
748
749func (stringAux) CanBeAnSSAAux() {}
750
751func auxToString(i Aux) string {
752	return string(i.(stringAux))
753}
754func auxToSym(i Aux) Sym {
755	// TODO: kind of a hack - allows nil interface through
756	s, _ := i.(Sym)
757	return s
758}
759func auxToType(i Aux) *types.Type {
760	return i.(*types.Type)
761}
762func auxToCall(i Aux) *AuxCall {
763	return i.(*AuxCall)
764}
765func auxToS390xCCMask(i Aux) s390x.CCMask {
766	return i.(s390x.CCMask)
767}
768func auxToS390xRotateParams(i Aux) s390x.RotateParams {
769	return i.(s390x.RotateParams)
770}
771
772func StringToAux(s string) Aux {
773	return stringAux(s)
774}
775func symToAux(s Sym) Aux {
776	return s
777}
778func callToAux(s *AuxCall) Aux {
779	return s
780}
781func typeToAux(t *types.Type) Aux {
782	return t
783}
784func s390xCCMaskToAux(c s390x.CCMask) Aux {
785	return c
786}
787func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
788	return r
789}
790
791// uaddOvf reports whether unsigned a+b would overflow.
792func uaddOvf(a, b int64) bool {
793	return uint64(a)+uint64(b) < uint64(a)
794}
795
796// loadLSymOffset simulates reading a word at an offset into a
797// read-only symbol's runtime memory. If it would read a pointer to
798// another symbol, that symbol is returned. Otherwise, it returns nil.
799func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
800	if lsym.Type != objabi.SRODATA {
801		return nil
802	}
803
804	for _, r := range lsym.R {
805		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
806			return r.Sym
807		}
808	}
809
810	return nil
811}
812
813func devirtLECall(v *Value, sym *obj.LSym) *Value {
814	v.Op = OpStaticLECall
815	auxcall := v.Aux.(*AuxCall)
816	auxcall.Fn = sym
817	// Remove first arg
818	v.Args[0].Uses--
819	copy(v.Args[0:], v.Args[1:])
820	v.Args[len(v.Args)-1] = nil // aid GC
821	v.Args = v.Args[:len(v.Args)-1]
822	if f := v.Block.Func; f.pass.debug > 0 {
823		f.Warnl(v.Pos, "de-virtualizing call")
824	}
825	return v
826}
827
828// isSamePtr reports whether p1 and p2 point to the same address.
829func isSamePtr(p1, p2 *Value) bool {
830	if p1 == p2 {
831		return true
832	}
833	if p1.Op != p2.Op {
834		return false
835	}
836	switch p1.Op {
837	case OpOffPtr:
838		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
839	case OpAddr, OpLocalAddr:
840		return p1.Aux == p2.Aux
841	case OpAddPtr:
842		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
843	}
844	return false
845}
846
847func isStackPtr(v *Value) bool {
848	for v.Op == OpOffPtr || v.Op == OpAddPtr {
849		v = v.Args[0]
850	}
851	return v.Op == OpSP || v.Op == OpLocalAddr
852}
853
854// disjoint reports whether the memory region specified by [p1:p1+n1)
855// does not overlap with [p2:p2+n2).
856// A return value of false does not imply the regions overlap.
857func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
858	if n1 == 0 || n2 == 0 {
859		return true
860	}
861	if p1 == p2 {
862		return false
863	}
864	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
865		base, offset = ptr, 0
866		for base.Op == OpOffPtr {
867			offset += base.AuxInt
868			base = base.Args[0]
869		}
870		if opcodeTable[base.Op].nilCheck {
871			base = base.Args[0]
872		}
873		return base, offset
874	}
875	p1, off1 := baseAndOffset(p1)
876	p2, off2 := baseAndOffset(p2)
877	if isSamePtr(p1, p2) {
878		return !overlap(off1, n1, off2, n2)
879	}
880	// p1 and p2 are not the same, so if they are both OpAddrs then
881	// they point to different variables.
882	// If one pointer is on the stack and the other is an argument
883	// then they can't overlap.
884	switch p1.Op {
885	case OpAddr, OpLocalAddr:
886		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
887			return true
888		}
889		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
890	case OpArg, OpArgIntReg:
891		if p2.Op == OpSP || p2.Op == OpLocalAddr {
892			return true
893		}
894	case OpSP:
895		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
896	}
897	return false
898}
899
900// moveSize returns the number of bytes an aligned MOV instruction moves.
901func moveSize(align int64, c *Config) int64 {
902	switch {
903	case align%8 == 0 && c.PtrSize == 8:
904		return 8
905	case align%4 == 0:
906		return 4
907	case align%2 == 0:
908		return 2
909	}
910	return 1
911}
912
913// mergePoint finds a block among a's blocks which dominates b and is itself
914// dominated by all of a's blocks. Returns nil if it can't find one.
915// Might return nil even if one does exist.
916func mergePoint(b *Block, a ...*Value) *Block {
917	// Walk backward from b looking for one of the a's blocks.
918
919	// Max distance
920	d := 100
921
922	for d > 0 {
923		for _, x := range a {
924			if b == x.Block {
925				goto found
926			}
927		}
928		if len(b.Preds) > 1 {
929			// Don't know which way to go back. Abort.
930			return nil
931		}
932		b = b.Preds[0].b
933		d--
934	}
935	return nil // too far away
936found:
937	// At this point, r is the first value in a that we find by walking backwards.
938	// if we return anything, r will be it.
939	r := b
940
941	// Keep going, counting the other a's that we find. They must all dominate r.
942	na := 0
943	for d > 0 {
944		for _, x := range a {
945			if b == x.Block {
946				na++
947			}
948		}
949		if na == len(a) {
950			// Found all of a in a backwards walk. We can return r.
951			return r
952		}
953		if len(b.Preds) > 1 {
954			return nil
955		}
956		b = b.Preds[0].b
957		d--
958
959	}
960	return nil // too far away
961}
962
963// clobber invalidates values. Returns true.
964// clobber is used by rewrite rules to:
965//
966//	A) make sure the values are really dead and never used again.
967//	B) decrement use counts of the values' args.
968func clobber(vv ...*Value) bool {
969	for _, v := range vv {
970		v.reset(OpInvalid)
971		// Note: leave v.Block intact.  The Block field is used after clobber.
972	}
973	return true
974}
975
976// clobberIfDead resets v when use count is 1. Returns true.
977// clobberIfDead is used by rewrite rules to decrement
978// use counts of v's args when v is dead and never used.
979func clobberIfDead(v *Value) bool {
980	if v.Uses == 1 {
981		v.reset(OpInvalid)
982	}
983	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
984	return true
985}
986
987// noteRule is an easy way to track if a rule is matched when writing
988// new ones.  Make the rule of interest also conditional on
989//
990//	noteRule("note to self: rule of interest matched")
991//
992// and that message will print when the rule matches.
993func noteRule(s string) bool {
994	fmt.Println(s)
995	return true
996}
997
998// countRule increments Func.ruleMatches[key].
999// If Func.ruleMatches is non-nil at the end
1000// of compilation, it will be printed to stdout.
1001// This is intended to make it easier to find which functions
1002// which contain lots of rules matches when developing new rules.
1003func countRule(v *Value, key string) bool {
1004	f := v.Block.Func
1005	if f.ruleMatches == nil {
1006		f.ruleMatches = make(map[string]int)
1007	}
1008	f.ruleMatches[key]++
1009	return true
1010}
1011
1012// warnRule generates compiler debug output with string s when
1013// v is not in autogenerated code, cond is true and the rule has fired.
1014func warnRule(cond bool, v *Value, s string) bool {
1015	if pos := v.Pos; pos.Line() > 1 && cond {
1016		v.Block.Func.Warnl(pos, s)
1017	}
1018	return true
1019}
1020
1021// for a pseudo-op like (LessThan x), extract x.
1022func flagArg(v *Value) *Value {
1023	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
1024		return nil
1025	}
1026	return v.Args[0]
1027}
1028
1029// arm64Negate finds the complement to an ARM64 condition code,
1030// for example !Equal -> NotEqual or !LessThan -> GreaterEqual
1031//
1032// For floating point, it's more subtle because NaN is unordered. We do
1033// !LessThanF -> NotLessThanF, the latter takes care of NaNs.
1034func arm64Negate(op Op) Op {
1035	switch op {
1036	case OpARM64LessThan:
1037		return OpARM64GreaterEqual
1038	case OpARM64LessThanU:
1039		return OpARM64GreaterEqualU
1040	case OpARM64GreaterThan:
1041		return OpARM64LessEqual
1042	case OpARM64GreaterThanU:
1043		return OpARM64LessEqualU
1044	case OpARM64LessEqual:
1045		return OpARM64GreaterThan
1046	case OpARM64LessEqualU:
1047		return OpARM64GreaterThanU
1048	case OpARM64GreaterEqual:
1049		return OpARM64LessThan
1050	case OpARM64GreaterEqualU:
1051		return OpARM64LessThanU
1052	case OpARM64Equal:
1053		return OpARM64NotEqual
1054	case OpARM64NotEqual:
1055		return OpARM64Equal
1056	case OpARM64LessThanF:
1057		return OpARM64NotLessThanF
1058	case OpARM64NotLessThanF:
1059		return OpARM64LessThanF
1060	case OpARM64LessEqualF:
1061		return OpARM64NotLessEqualF
1062	case OpARM64NotLessEqualF:
1063		return OpARM64LessEqualF
1064	case OpARM64GreaterThanF:
1065		return OpARM64NotGreaterThanF
1066	case OpARM64NotGreaterThanF:
1067		return OpARM64GreaterThanF
1068	case OpARM64GreaterEqualF:
1069		return OpARM64NotGreaterEqualF
1070	case OpARM64NotGreaterEqualF:
1071		return OpARM64GreaterEqualF
1072	default:
1073		panic("unreachable")
1074	}
1075}
1076
1077// arm64Invert evaluates (InvertFlags op), which
1078// is the same as altering the condition codes such
1079// that the same result would be produced if the arguments
1080// to the flag-generating instruction were reversed, e.g.
1081// (InvertFlags (CMP x y)) -> (CMP y x)
1082func arm64Invert(op Op) Op {
1083	switch op {
1084	case OpARM64LessThan:
1085		return OpARM64GreaterThan
1086	case OpARM64LessThanU:
1087		return OpARM64GreaterThanU
1088	case OpARM64GreaterThan:
1089		return OpARM64LessThan
1090	case OpARM64GreaterThanU:
1091		return OpARM64LessThanU
1092	case OpARM64LessEqual:
1093		return OpARM64GreaterEqual
1094	case OpARM64LessEqualU:
1095		return OpARM64GreaterEqualU
1096	case OpARM64GreaterEqual:
1097		return OpARM64LessEqual
1098	case OpARM64GreaterEqualU:
1099		return OpARM64LessEqualU
1100	case OpARM64Equal, OpARM64NotEqual:
1101		return op
1102	case OpARM64LessThanF:
1103		return OpARM64GreaterThanF
1104	case OpARM64GreaterThanF:
1105		return OpARM64LessThanF
1106	case OpARM64LessEqualF:
1107		return OpARM64GreaterEqualF
1108	case OpARM64GreaterEqualF:
1109		return OpARM64LessEqualF
1110	case OpARM64NotLessThanF:
1111		return OpARM64NotGreaterThanF
1112	case OpARM64NotGreaterThanF:
1113		return OpARM64NotLessThanF
1114	case OpARM64NotLessEqualF:
1115		return OpARM64NotGreaterEqualF
1116	case OpARM64NotGreaterEqualF:
1117		return OpARM64NotLessEqualF
1118	default:
1119		panic("unreachable")
1120	}
1121}
1122
1123// evaluate an ARM64 op against a flags value
1124// that is potentially constant; return 1 for true,
1125// -1 for false, and 0 for not constant.
1126func ccARM64Eval(op Op, flags *Value) int {
1127	fop := flags.Op
1128	if fop == OpARM64InvertFlags {
1129		return -ccARM64Eval(op, flags.Args[0])
1130	}
1131	if fop != OpARM64FlagConstant {
1132		return 0
1133	}
1134	fc := flagConstant(flags.AuxInt)
1135	b2i := func(b bool) int {
1136		if b {
1137			return 1
1138		}
1139		return -1
1140	}
1141	switch op {
1142	case OpARM64Equal:
1143		return b2i(fc.eq())
1144	case OpARM64NotEqual:
1145		return b2i(fc.ne())
1146	case OpARM64LessThan:
1147		return b2i(fc.lt())
1148	case OpARM64LessThanU:
1149		return b2i(fc.ult())
1150	case OpARM64GreaterThan:
1151		return b2i(fc.gt())
1152	case OpARM64GreaterThanU:
1153		return b2i(fc.ugt())
1154	case OpARM64LessEqual:
1155		return b2i(fc.le())
1156	case OpARM64LessEqualU:
1157		return b2i(fc.ule())
1158	case OpARM64GreaterEqual:
1159		return b2i(fc.ge())
1160	case OpARM64GreaterEqualU:
1161		return b2i(fc.uge())
1162	}
1163	return 0
1164}
1165
1166// logRule logs the use of the rule s. This will only be enabled if
1167// rewrite rules were generated with the -log option, see _gen/rulegen.go.
1168func logRule(s string) {
1169	if ruleFile == nil {
1170		// Open a log file to write log to. We open in append
1171		// mode because all.bash runs the compiler lots of times,
1172		// and we want the concatenation of all of those logs.
1173		// This means, of course, that users need to rm the old log
1174		// to get fresh data.
1175		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
1176		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
1177			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
1178		if err != nil {
1179			panic(err)
1180		}
1181		ruleFile = w
1182	}
1183	_, err := fmt.Fprintln(ruleFile, s)
1184	if err != nil {
1185		panic(err)
1186	}
1187}
1188
1189var ruleFile io.Writer
1190
1191func min(x, y int64) int64 {
1192	if x < y {
1193		return x
1194	}
1195	return y
1196}
1197func max(x, y int64) int64 {
1198	if x > y {
1199		return x
1200	}
1201	return y
1202}
1203
1204func isConstZero(v *Value) bool {
1205	switch v.Op {
1206	case OpConstNil:
1207		return true
1208	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
1209		return v.AuxInt == 0
1210	}
1211	return false
1212}
1213
1214// reciprocalExact64 reports whether 1/c is exactly representable.
1215func reciprocalExact64(c float64) bool {
1216	b := math.Float64bits(c)
1217	man := b & (1<<52 - 1)
1218	if man != 0 {
1219		return false // not a power of 2, denormal, or NaN
1220	}
1221	exp := b >> 52 & (1<<11 - 1)
1222	// exponent bias is 0x3ff.  So taking the reciprocal of a number
1223	// changes the exponent to 0x7fe-exp.
1224	switch exp {
1225	case 0:
1226		return false // ±0
1227	case 0x7ff:
1228		return false // ±inf
1229	case 0x7fe:
1230		return false // exponent is not representable
1231	default:
1232		return true
1233	}
1234}
1235
1236// reciprocalExact32 reports whether 1/c is exactly representable.
1237func reciprocalExact32(c float32) bool {
1238	b := math.Float32bits(c)
1239	man := b & (1<<23 - 1)
1240	if man != 0 {
1241		return false // not a power of 2, denormal, or NaN
1242	}
1243	exp := b >> 23 & (1<<8 - 1)
1244	// exponent bias is 0x7f.  So taking the reciprocal of a number
1245	// changes the exponent to 0xfe-exp.
1246	switch exp {
1247	case 0:
1248		return false // ±0
1249	case 0xff:
1250		return false // ±inf
1251	case 0xfe:
1252		return false // exponent is not representable
1253	default:
1254		return true
1255	}
1256}
1257
1258// check if an immediate can be directly encoded into an ARM's instruction.
1259func isARMImmRot(v uint32) bool {
1260	for i := 0; i < 16; i++ {
1261		if v&^0xff == 0 {
1262			return true
1263		}
1264		v = v<<2 | v>>30
1265	}
1266
1267	return false
1268}
1269
1270// overlap reports whether the ranges given by the given offset and
1271// size pairs overlap.
1272func overlap(offset1, size1, offset2, size2 int64) bool {
1273	if offset1 >= offset2 && offset2+size2 > offset1 {
1274		return true
1275	}
1276	if offset2 >= offset1 && offset1+size1 > offset2 {
1277		return true
1278	}
1279	return false
1280}
1281
1282func areAdjacentOffsets(off1, off2, size int64) bool {
1283	return off1+size == off2 || off1 == off2+size
1284}
1285
1286// check if value zeroes out upper 32-bit of 64-bit register.
1287// depth limits recursion depth. In AMD64.rules 3 is used as limit,
1288// because it catches same amount of cases as 4.
1289func zeroUpper32Bits(x *Value, depth int) bool {
1290	if x.Type.IsSigned() && x.Type.Size() < 8 {
1291		// If the value is signed, it might get re-sign-extended
1292		// during spill and restore. See issue 68227.
1293		return false
1294	}
1295	switch x.Op {
1296	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
1297		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
1298		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
1299		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
1300		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
1301		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
1302		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
1303		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
1304		OpAMD64SHLL, OpAMD64SHLLconst:
1305		return true
1306	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
1307		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
1308		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
1309		return true
1310	case OpArg: // note: but not ArgIntReg
1311		// amd64 always loads args from the stack unsigned.
1312		// most other architectures load them sign/zero extended based on the type.
1313		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
1314	case OpPhi, OpSelect0, OpSelect1:
1315		// Phis can use each-other as an arguments, instead of tracking visited values,
1316		// just limit recursion depth.
1317		if depth <= 0 {
1318			return false
1319		}
1320		for i := range x.Args {
1321			if !zeroUpper32Bits(x.Args[i], depth-1) {
1322				return false
1323			}
1324		}
1325		return true
1326
1327	}
1328	return false
1329}
1330
1331// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1332func zeroUpper48Bits(x *Value, depth int) bool {
1333	if x.Type.IsSigned() && x.Type.Size() < 8 {
1334		return false
1335	}
1336	switch x.Op {
1337	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
1338		return true
1339	case OpArg: // note: but not ArgIntReg
1340		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
1341	case OpPhi, OpSelect0, OpSelect1:
1342		// Phis can use each-other as an arguments, instead of tracking visited values,
1343		// just limit recursion depth.
1344		if depth <= 0 {
1345			return false
1346		}
1347		for i := range x.Args {
1348			if !zeroUpper48Bits(x.Args[i], depth-1) {
1349				return false
1350			}
1351		}
1352		return true
1353
1354	}
1355	return false
1356}
1357
1358// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1359func zeroUpper56Bits(x *Value, depth int) bool {
1360	if x.Type.IsSigned() && x.Type.Size() < 8 {
1361		return false
1362	}
1363	switch x.Op {
1364	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
1365		return true
1366	case OpArg: // note: but not ArgIntReg
1367		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
1368	case OpPhi, OpSelect0, OpSelect1:
1369		// Phis can use each-other as an arguments, instead of tracking visited values,
1370		// just limit recursion depth.
1371		if depth <= 0 {
1372			return false
1373		}
1374		for i := range x.Args {
1375			if !zeroUpper56Bits(x.Args[i], depth-1) {
1376				return false
1377			}
1378		}
1379		return true
1380
1381	}
1382	return false
1383}
1384
1385func isInlinableMemclr(c *Config, sz int64) bool {
1386	if sz < 0 {
1387		return false
1388	}
1389	// TODO: expand this check to allow other architectures
1390	// see CL 454255 and issue 56997
1391	switch c.arch {
1392	case "amd64", "arm64":
1393		return true
1394	case "ppc64le", "ppc64":
1395		return sz < 512
1396	}
1397	return false
1398}
1399
1400// isInlinableMemmove reports whether the given arch performs a Move of the given size
1401// faster than memmove. It will only return true if replacing the memmove with a Move is
1402// safe, either because Move will do all of its loads before any of its stores, or
1403// because the arguments are known to be disjoint.
1404// This is used as a check for replacing memmove with Move ops.
1405func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1406	// It is always safe to convert memmove into Move when its arguments are disjoint.
1407	// Move ops may or may not be faster for large sizes depending on how the platform
1408	// lowers them, so we only perform this optimization on platforms that we know to
1409	// have fast Move ops.
1410	switch c.arch {
1411	case "amd64":
1412		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
1413	case "386", "arm64":
1414		return sz <= 8
1415	case "s390x", "ppc64", "ppc64le":
1416		return sz <= 8 || disjoint(dst, sz, src, sz)
1417	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
1418		return sz <= 4
1419	}
1420	return false
1421}
1422func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
1423	return isInlinableMemmove(dst, src, sz, c)
1424}
1425
1426// logLargeCopy logs the occurrence of a large copy.
1427// The best place to do this is in the rewrite rules where the size of the move is easy to find.
1428// "Large" is arbitrarily chosen to be 128 bytes; this may change.
1429func logLargeCopy(v *Value, s int64) bool {
1430	if s < 128 {
1431		return true
1432	}
1433	if logopt.Enabled() {
1434		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
1435	}
1436	return true
1437}
1438func LogLargeCopy(funcName string, pos src.XPos, s int64) {
1439	if s < 128 {
1440		return
1441	}
1442	if logopt.Enabled() {
1443		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
1444	}
1445}
1446
1447// hasSmallRotate reports whether the architecture has rotate instructions
1448// for sizes < 32-bit.  This is used to decide whether to promote some rotations.
1449func hasSmallRotate(c *Config) bool {
1450	switch c.arch {
1451	case "amd64", "386":
1452		return true
1453	default:
1454		return false
1455	}
1456}
1457
1458func supportsPPC64PCRel() bool {
1459	// PCRel is currently supported for >= power10, linux only
1460	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
1461	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
1462}
1463
1464func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
1465	if sh < 0 || sh >= sz {
1466		panic("PPC64 shift arg sh out of range")
1467	}
1468	if mb < 0 || mb >= sz {
1469		panic("PPC64 shift arg mb out of range")
1470	}
1471	if me < 0 || me >= sz {
1472		panic("PPC64 shift arg me out of range")
1473	}
1474	return int32(sh<<16 | mb<<8 | me)
1475}
1476
1477func GetPPC64Shiftsh(auxint int64) int64 {
1478	return int64(int8(auxint >> 16))
1479}
1480
1481func GetPPC64Shiftmb(auxint int64) int64 {
1482	return int64(int8(auxint >> 8))
1483}
1484
1485func GetPPC64Shiftme(auxint int64) int64 {
1486	return int64(int8(auxint))
1487}
1488
1489// Test if this value can encoded as a mask for a rlwinm like
1490// operation.  Masks can also extend from the msb and wrap to
1491// the lsb too.  That is, the valid masks are 32 bit strings
1492// of the form: 0..01..10..0 or 1..10..01..1 or 1...1
1493func isPPC64WordRotateMask(v64 int64) bool {
1494	// Isolate rightmost 1 (if none 0) and add.
1495	v := uint32(v64)
1496	vp := (v & -v) + v
1497	// Likewise, for the wrapping case.
1498	vn := ^v
1499	vpn := (vn & -vn) + vn
1500	return (v&vp == 0 || vn&vpn == 0) && v != 0
1501}
1502
1503// Compress mask and shift into single value of the form
1504// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
1505// be used to regenerate the input mask.
1506func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
1507	var mb, me, mbn, men int
1508
1509	// Determine boundaries and then decode them
1510	if mask == 0 || ^mask == 0 || rotate >= nbits {
1511		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
1512	} else if nbits == 32 {
1513		mb = bits.LeadingZeros32(uint32(mask))
1514		me = 32 - bits.TrailingZeros32(uint32(mask))
1515		mbn = bits.LeadingZeros32(^uint32(mask))
1516		men = 32 - bits.TrailingZeros32(^uint32(mask))
1517	} else {
1518		mb = bits.LeadingZeros64(uint64(mask))
1519		me = 64 - bits.TrailingZeros64(uint64(mask))
1520		mbn = bits.LeadingZeros64(^uint64(mask))
1521		men = 64 - bits.TrailingZeros64(^uint64(mask))
1522	}
1523	// Check for a wrapping mask (e.g bits at 0 and 63)
1524	if mb == 0 && me == int(nbits) {
1525		// swap the inverted values
1526		mb, me = men, mbn
1527	}
1528
1529	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
1530}
1531
1532// Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
1533// SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
1534// RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
1535// operations can be combined. This functions assumes the two opcodes can
1536// be merged, and returns an encoded rotate+mask value of the combined RLDICL.
1537func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
1538	mb := s
1539	r := 64 - s
1540	// A larger mb is a smaller mask.
1541	if (encoded>>8)&0xFF < mb {
1542		encoded = (encoded &^ 0xFF00) | mb<<8
1543	}
1544	// The rotate is expected to be 0.
1545	if (encoded & 0xFF0000) != 0 {
1546		panic("non-zero rotate")
1547	}
1548	return encoded | r<<16
1549}
1550
1551// DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
1552// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
1553func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
1554	auxint := uint64(sauxint)
1555	rotate = int64((auxint >> 16) & 0xFF)
1556	mb = int64((auxint >> 8) & 0xFF)
1557	me = int64((auxint >> 0) & 0xFF)
1558	nbits := int64((auxint >> 24) & 0xFF)
1559	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
1560	if mb > me {
1561		mask = ^mask
1562	}
1563	if nbits == 32 {
1564		mask = uint64(uint32(mask))
1565	}
1566
1567	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
1568	// is inclusive.
1569	me = (me - 1) & (nbits - 1)
1570	return
1571}
1572
1573// This verifies that the mask is a set of
1574// consecutive bits including the least
1575// significant bit.
1576func isPPC64ValidShiftMask(v int64) bool {
1577	if (v != 0) && ((v+1)&v) == 0 {
1578		return true
1579	}
1580	return false
1581}
1582
1583func getPPC64ShiftMaskLength(v int64) int64 {
1584	return int64(bits.Len64(uint64(v)))
1585}
1586
1587// Decompose a shift right into an equivalent rotate/mask,
1588// and return mask & m.
1589func mergePPC64RShiftMask(m, s, nbits int64) int64 {
1590	smask := uint64((1<<uint(nbits))-1) >> uint(s)
1591	return m & int64(smask)
1592}
1593
1594// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
1595func mergePPC64AndSrwi(m, s int64) int64 {
1596	mask := mergePPC64RShiftMask(m, s, 32)
1597	if !isPPC64WordRotateMask(mask) {
1598		return 0
1599	}
1600	return encodePPC64RotateMask((32-s)&31, mask, 32)
1601}
1602
1603// Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
1604// Return the encoded RLWINM constant, or 0 if they cannot be merged.
1605func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
1606	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
1607	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1608	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1609
1610	// Rewrite mask to apply after the final left shift.
1611	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1612
1613	r_1 := 32 - srw
1614	r_2 := GetPPC64Shiftsh(sld)
1615	r_3 := (r_1 + r_2) & 31 // This can wrap.
1616
1617	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1618		return 0
1619	}
1620	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
1621}
1622
1623// Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
1624// Return the encoded RLWINM constant, or 0 if they cannot be merged.
1625func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
1626	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
1627	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1628	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1629
1630	// Rewrite mask to apply after the final left shift.
1631	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
1632
1633	r_1 := 64 - srd
1634	r_2 := GetPPC64Shiftsh(sld)
1635	r_3 := (r_1 + r_2) & 63 // This can wrap.
1636
1637	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
1638		return 0
1639	}
1640	// This combine only works when selecting and shifting the lower 32 bits.
1641	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
1642	if v1&mask_3 != 0 {
1643		return 0
1644	}
1645	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
1646}
1647
1648// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
1649// the encoded RLWINM constant, or 0 if they cannot be merged.
1650func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
1651	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
1652	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
1653	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
1654
1655	// combine the masks, and adjust for the final left shift.
1656	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
1657	r_2 := GetPPC64Shiftsh(int64(sld))
1658	r_3 := (r_1 + r_2) & 31 // This can wrap.
1659
1660	// Verify the result is still a valid bitmask of <= 32 bits.
1661	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
1662		return 0
1663	}
1664	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1665}
1666
1667// Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
1668// or 0 if they cannot be merged.
1669func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
1670	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
1671	mask_out := (mask_rlw & uint64(mask))
1672
1673	// Verify the result is still a valid bitmask of <= 32 bits.
1674	if !isPPC64WordRotateMask(int64(mask_out)) {
1675		return 0
1676	}
1677	return encodePPC64RotateMask(r, int64(mask_out), 32)
1678}
1679
1680// Test if RLWINM opcode rlw clears the upper 32 bits of the
1681// result. Return rlw if it does, 0 otherwise.
1682func mergePPC64MovwzregRlwinm(rlw int64) int64 {
1683	_, mb, me, _ := DecodePPC64RotateMask(rlw)
1684	if mb > me {
1685		return 0
1686	}
1687	return rlw
1688}
1689
1690// Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
1691// or 0 if they cannot be merged.
1692func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
1693	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
1694
1695	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
1696	r_mask := bits.RotateLeft32(mask, int(r))
1697
1698	mask_out := (mask_rlw & uint64(r_mask))
1699
1700	// Verify the result is still a valid bitmask of <= 32 bits.
1701	if !isPPC64WordRotateMask(int64(mask_out)) {
1702		return 0
1703	}
1704	return encodePPC64RotateMask(r, int64(mask_out), 32)
1705}
1706
1707// Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
1708// or 0 if they cannot be merged.
1709func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
1710	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
1711	if mb > me || mb < sldi {
1712		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
1713		// Likewise, if mb is less than the shift amount, it cannot be merged.
1714		return 0
1715	}
1716	// combine the masks, and adjust for the final left shift.
1717	mask_3 := mask_1 << sldi
1718	r_3 := (r_1 + sldi) & 31 // This can wrap.
1719
1720	// Verify the result is still a valid bitmask of <= 32 bits.
1721	if uint64(uint32(mask_3)) != mask_3 {
1722		return 0
1723	}
1724	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
1725}
1726
1727// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
1728// or return 0 if they cannot be combined.
1729func mergePPC64SldiSrw(sld, srw int64) int64 {
1730	if sld > srw || srw >= 32 {
1731		return 0
1732	}
1733	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
1734	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
1735	mask := (mask_r & mask_l) << uint(sld)
1736	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
1737}
1738
1739// Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
1740// to (Select0 (opCC x y)) without having to explicitly fixup every user
1741// of op.
1742//
1743// E.g consider the case:
1744// a = (ADD x y)
1745// b = (CMPconst [0] a)
1746// c = (OR a z)
1747//
1748// A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
1749// would produce:
1750// a  = (ADD x y)
1751// a' = (ADDCC x y)
1752// a” = (Select0 a')
1753// b  = (CMPconst [0] a”)
1754// c  = (OR a z)
1755//
1756// which makes it impossible to rewrite the second user. Instead the result
1757// of this conversion is:
1758// a' = (ADDCC x y)
1759// a  = (Select0 a')
1760// b  = (CMPconst [0] a)
1761// c  = (OR a z)
1762//
1763// Which makes it trivial to rewrite b using a lowering rule.
1764func convertPPC64OpToOpCC(op *Value) *Value {
1765	ccOpMap := map[Op]Op{
1766		OpPPC64ADD:      OpPPC64ADDCC,
1767		OpPPC64ADDconst: OpPPC64ADDCCconst,
1768		OpPPC64AND:      OpPPC64ANDCC,
1769		OpPPC64ANDconst: OpPPC64ANDCCconst,
1770		OpPPC64ANDN:     OpPPC64ANDNCC,
1771		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
1772		OpPPC64OR:       OpPPC64ORCC,
1773		OpPPC64RLDICL:   OpPPC64RLDICLCC,
1774		OpPPC64SUB:      OpPPC64SUBCC,
1775		OpPPC64NEG:      OpPPC64NEGCC,
1776		OpPPC64NOR:      OpPPC64NORCC,
1777		OpPPC64XOR:      OpPPC64XORCC,
1778	}
1779	b := op.Block
1780	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
1781	opCC.AddArgs(op.Args...)
1782	op.reset(OpSelect0)
1783	op.AddArgs(opCC)
1784	return op
1785}
1786
1787// Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
1788func convertPPC64RldiclAndccconst(sauxint int64) int64 {
1789	r, _, _, mask := DecodePPC64RotateMask(sauxint)
1790	if r != 0 || mask&0xFFFF != mask {
1791		return 0
1792	}
1793	return int64(mask)
1794}
1795
1796// Convenience function to rotate a 32 bit constant value by another constant.
1797func rotateLeft32(v, rotate int64) int64 {
1798	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
1799}
1800
1801func rotateRight64(v, rotate int64) int64 {
1802	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
1803}
1804
1805// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
1806func armBFAuxInt(lsb, width int64) arm64BitField {
1807	if lsb < 0 || lsb > 63 {
1808		panic("ARM(64) bit field lsb constant out of range")
1809	}
1810	if width < 1 || lsb+width > 64 {
1811		panic("ARM(64) bit field width constant out of range")
1812	}
1813	return arm64BitField(width | lsb<<8)
1814}
1815
1816// returns the lsb part of the auxInt field of arm64 bitfield ops.
1817func (bfc arm64BitField) getARM64BFlsb() int64 {
1818	return int64(uint64(bfc) >> 8)
1819}
1820
1821// returns the width part of the auxInt field of arm64 bitfield ops.
1822func (bfc arm64BitField) getARM64BFwidth() int64 {
1823	return int64(bfc) & 0xff
1824}
1825
1826// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
1827func isARM64BFMask(lsb, mask, rshift int64) bool {
1828	shiftedMask := int64(uint64(mask) >> uint64(rshift))
1829	return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
1830}
1831
1832// returns the bitfield width of mask >> rshift for arm64 bitfield ops.
1833func arm64BFWidth(mask, rshift int64) int64 {
1834	shiftedMask := int64(uint64(mask) >> uint64(rshift))
1835	if shiftedMask == 0 {
1836		panic("ARM64 BF mask is zero")
1837	}
1838	return nto(shiftedMask)
1839}
1840
1841// sizeof returns the size of t in bytes.
1842// It will panic if t is not a *types.Type.
1843func sizeof(t interface{}) int64 {
1844	return t.(*types.Type).Size()
1845}
1846
1847// registerizable reports whether t is a primitive type that fits in
1848// a register. It assumes float64 values will always fit into registers
1849// even if that isn't strictly true.
1850func registerizable(b *Block, typ *types.Type) bool {
1851	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
1852		return true
1853	}
1854	if typ.IsInteger() {
1855		return typ.Size() <= b.Func.Config.RegSize
1856	}
1857	return false
1858}
1859
1860// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
1861func needRaceCleanup(sym *AuxCall, v *Value) bool {
1862	f := v.Block.Func
1863	if !f.Config.Race {
1864		return false
1865	}
1866	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
1867		return false
1868	}
1869	for _, b := range f.Blocks {
1870		for _, v := range b.Values {
1871			switch v.Op {
1872			case OpStaticCall, OpStaticLECall:
1873				// Check for racefuncenter will encounter racefuncexit and vice versa.
1874				// Allow calls to panic*
1875				s := v.Aux.(*AuxCall).Fn.String()
1876				switch s {
1877				case "runtime.racefuncenter", "runtime.racefuncexit",
1878					"runtime.panicdivide", "runtime.panicwrap",
1879					"runtime.panicshift":
1880					continue
1881				}
1882				// If we encountered any call, we need to keep racefunc*,
1883				// for accurate stacktraces.
1884				return false
1885			case OpPanicBounds, OpPanicExtend:
1886				// Note: these are panic generators that are ok (like the static calls above).
1887			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
1888				// We must keep the race functions if there are any other call types.
1889				return false
1890			}
1891		}
1892	}
1893	if isSameCall(sym, "runtime.racefuncenter") {
1894		// TODO REGISTER ABI this needs to be cleaned up.
1895		// If we're removing racefuncenter, remove its argument as well.
1896		if v.Args[0].Op != OpStore {
1897			if v.Op == OpStaticLECall {
1898				// there is no store, yet.
1899				return true
1900			}
1901			return false
1902		}
1903		mem := v.Args[0].Args[2]
1904		v.Args[0].reset(OpCopy)
1905		v.Args[0].AddArg(mem)
1906	}
1907	return true
1908}
1909
1910// symIsRO reports whether sym is a read-only global.
1911func symIsRO(sym interface{}) bool {
1912	lsym := sym.(*obj.LSym)
1913	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
1914}
1915
1916// symIsROZero reports whether sym is a read-only global whose data contains all zeros.
1917func symIsROZero(sym Sym) bool {
1918	lsym := sym.(*obj.LSym)
1919	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
1920		return false
1921	}
1922	for _, b := range lsym.P {
1923		if b != 0 {
1924			return false
1925		}
1926	}
1927	return true
1928}
1929
1930// isFixed32 returns true if the int32 at offset off in symbol sym
1931// is known and constant.
1932func isFixed32(c *Config, sym Sym, off int64) bool {
1933	return isFixed(c, sym, off, 4)
1934}
1935
1936// isFixed returns true if the range [off,off+size] of the symbol sym
1937// is known and constant.
1938func isFixed(c *Config, sym Sym, off, size int64) bool {
1939	lsym := sym.(*obj.LSym)
1940	if lsym.Extra == nil {
1941		return false
1942	}
1943	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1944		if off == 2*c.PtrSize && size == 4 {
1945			return true // type hash field
1946		}
1947	}
1948	return false
1949}
1950func fixed32(c *Config, sym Sym, off int64) int32 {
1951	lsym := sym.(*obj.LSym)
1952	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
1953		if off == 2*c.PtrSize {
1954			return int32(types.TypeHash(ti.Type.(*types.Type)))
1955		}
1956	}
1957	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
1958	return 0
1959}
1960
1961// isFixedSym returns true if the contents of sym at the given offset
1962// is known and is the constant address of another symbol.
1963func isFixedSym(sym Sym, off int64) bool {
1964	lsym := sym.(*obj.LSym)
1965	switch {
1966	case lsym.Type == objabi.SRODATA:
1967		// itabs, dictionaries
1968	default:
1969		return false
1970	}
1971	for _, r := range lsym.R {
1972		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
1973			return true
1974		}
1975	}
1976	return false
1977}
1978func fixedSym(f *Func, sym Sym, off int64) Sym {
1979	lsym := sym.(*obj.LSym)
1980	for _, r := range lsym.R {
1981		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
1982			if strings.HasPrefix(r.Sym.Name, "type:") {
1983				// In case we're loading a type out of a dictionary, we need to record
1984				// that the containing function might put that type in an interface.
1985				// That information is currently recorded in relocations in the dictionary,
1986				// but if we perform this load at compile time then the dictionary
1987				// might be dead.
1988				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1989			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
1990				// Same, but if we're using an itab we need to record that the
1991				// itab._type might be put in an interface.
1992				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
1993			}
1994			return r.Sym
1995		}
1996	}
1997	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
1998	return nil
1999}
2000
2001// read8 reads one byte from the read-only global sym at offset off.
2002func read8(sym interface{}, off int64) uint8 {
2003	lsym := sym.(*obj.LSym)
2004	if off >= int64(len(lsym.P)) || off < 0 {
2005		// Invalid index into the global sym.
2006		// This can happen in dead code, so we don't want to panic.
2007		// Just return any value, it will eventually get ignored.
2008		// See issue 29215.
2009		return 0
2010	}
2011	return lsym.P[off]
2012}
2013
2014// read16 reads two bytes from the read-only global sym at offset off.
2015func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
2016	lsym := sym.(*obj.LSym)
2017	// lsym.P is written lazily.
2018	// Bytes requested after the end of lsym.P are 0.
2019	var src []byte
2020	if 0 <= off && off < int64(len(lsym.P)) {
2021		src = lsym.P[off:]
2022	}
2023	buf := make([]byte, 2)
2024	copy(buf, src)
2025	return byteorder.Uint16(buf)
2026}
2027
2028// read32 reads four bytes from the read-only global sym at offset off.
2029func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
2030	lsym := sym.(*obj.LSym)
2031	var src []byte
2032	if 0 <= off && off < int64(len(lsym.P)) {
2033		src = lsym.P[off:]
2034	}
2035	buf := make([]byte, 4)
2036	copy(buf, src)
2037	return byteorder.Uint32(buf)
2038}
2039
2040// read64 reads eight bytes from the read-only global sym at offset off.
2041func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
2042	lsym := sym.(*obj.LSym)
2043	var src []byte
2044	if 0 <= off && off < int64(len(lsym.P)) {
2045		src = lsym.P[off:]
2046	}
2047	buf := make([]byte, 8)
2048	copy(buf, src)
2049	return byteorder.Uint64(buf)
2050}
2051
2052// sequentialAddresses reports true if it can prove that x + n == y
2053func sequentialAddresses(x, y *Value, n int64) bool {
2054	if x == y && n == 0 {
2055		return true
2056	}
2057	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
2058		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
2059			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
2060		return true
2061	}
2062	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
2063		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
2064			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
2065		return true
2066	}
2067	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
2068		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
2069			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
2070		return true
2071	}
2072	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
2073		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
2074			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
2075		return true
2076	}
2077	return false
2078}
2079
2080// flagConstant represents the result of a compile-time comparison.
2081// The sense of these flags does not necessarily represent the hardware's notion
2082// of a flags register - these are just a compile-time construct.
2083// We happen to match the semantics to those of arm/arm64.
2084// Note that these semantics differ from x86: the carry flag has the opposite
2085// sense on a subtraction!
2086//
2087//	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
2088//	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
2089//	 (because it does x + ^y + C).
2090//
2091// See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
2092type flagConstant uint8
2093
2094// N reports whether the result of an operation is negative (high bit set).
2095func (fc flagConstant) N() bool {
2096	return fc&1 != 0
2097}
2098
2099// Z reports whether the result of an operation is 0.
2100func (fc flagConstant) Z() bool {
2101	return fc&2 != 0
2102}
2103
2104// C reports whether an unsigned add overflowed (carry), or an
2105// unsigned subtract did not underflow (borrow).
2106func (fc flagConstant) C() bool {
2107	return fc&4 != 0
2108}
2109
2110// V reports whether a signed operation overflowed or underflowed.
2111func (fc flagConstant) V() bool {
2112	return fc&8 != 0
2113}
2114
2115func (fc flagConstant) eq() bool {
2116	return fc.Z()
2117}
2118func (fc flagConstant) ne() bool {
2119	return !fc.Z()
2120}
2121func (fc flagConstant) lt() bool {
2122	return fc.N() != fc.V()
2123}
2124func (fc flagConstant) le() bool {
2125	return fc.Z() || fc.lt()
2126}
2127func (fc flagConstant) gt() bool {
2128	return !fc.Z() && fc.ge()
2129}
2130func (fc flagConstant) ge() bool {
2131	return fc.N() == fc.V()
2132}
2133func (fc flagConstant) ult() bool {
2134	return !fc.C()
2135}
2136func (fc flagConstant) ule() bool {
2137	return fc.Z() || fc.ult()
2138}
2139func (fc flagConstant) ugt() bool {
2140	return !fc.Z() && fc.uge()
2141}
2142func (fc flagConstant) uge() bool {
2143	return fc.C()
2144}
2145
2146func (fc flagConstant) ltNoov() bool {
2147	return fc.lt() && !fc.V()
2148}
2149func (fc flagConstant) leNoov() bool {
2150	return fc.le() && !fc.V()
2151}
2152func (fc flagConstant) gtNoov() bool {
2153	return fc.gt() && !fc.V()
2154}
2155func (fc flagConstant) geNoov() bool {
2156	return fc.ge() && !fc.V()
2157}
2158
2159func (fc flagConstant) String() string {
2160	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
2161}
2162
2163type flagConstantBuilder struct {
2164	N bool
2165	Z bool
2166	C bool
2167	V bool
2168}
2169
2170func (fcs flagConstantBuilder) encode() flagConstant {
2171	var fc flagConstant
2172	if fcs.N {
2173		fc |= 1
2174	}
2175	if fcs.Z {
2176		fc |= 2
2177	}
2178	if fcs.C {
2179		fc |= 4
2180	}
2181	if fcs.V {
2182		fc |= 8
2183	}
2184	return fc
2185}
2186
2187// Note: addFlags(x,y) != subFlags(x,-y) in some situations:
2188//  - the results of the C flag are different
2189//  - the results of the V flag when y==minint are different
2190
2191// addFlags64 returns the flags that would be set from computing x+y.
2192func addFlags64(x, y int64) flagConstant {
2193	var fcb flagConstantBuilder
2194	fcb.Z = x+y == 0
2195	fcb.N = x+y < 0
2196	fcb.C = uint64(x+y) < uint64(x)
2197	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2198	return fcb.encode()
2199}
2200
2201// subFlags64 returns the flags that would be set from computing x-y.
2202func subFlags64(x, y int64) flagConstant {
2203	var fcb flagConstantBuilder
2204	fcb.Z = x-y == 0
2205	fcb.N = x-y < 0
2206	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
2207	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2208	return fcb.encode()
2209}
2210
2211// addFlags32 returns the flags that would be set from computing x+y.
2212func addFlags32(x, y int32) flagConstant {
2213	var fcb flagConstantBuilder
2214	fcb.Z = x+y == 0
2215	fcb.N = x+y < 0
2216	fcb.C = uint32(x+y) < uint32(x)
2217	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
2218	return fcb.encode()
2219}
2220
2221// subFlags32 returns the flags that would be set from computing x-y.
2222func subFlags32(x, y int32) flagConstant {
2223	var fcb flagConstantBuilder
2224	fcb.Z = x-y == 0
2225	fcb.N = x-y < 0
2226	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
2227	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
2228	return fcb.encode()
2229}
2230
2231// logicFlags64 returns flags set to the sign/zeroness of x.
2232// C and V are set to false.
2233func logicFlags64(x int64) flagConstant {
2234	var fcb flagConstantBuilder
2235	fcb.Z = x == 0
2236	fcb.N = x < 0
2237	return fcb.encode()
2238}
2239
2240// logicFlags32 returns flags set to the sign/zeroness of x.
2241// C and V are set to false.
2242func logicFlags32(x int32) flagConstant {
2243	var fcb flagConstantBuilder
2244	fcb.Z = x == 0
2245	fcb.N = x < 0
2246	return fcb.encode()
2247}
2248
2249func makeJumpTableSym(b *Block) *obj.LSym {
2250	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
2251	// The jump table symbol is accessed only from the function symbol.
2252	s.Set(obj.AttrStatic, true)
2253	return s
2254}
2255
2256// canRotate reports whether the architecture supports
2257// rotates of integer registers with the given number of bits.
2258func canRotate(c *Config, bits int64) bool {
2259	if bits > c.PtrSize*8 {
2260		// Don't rewrite to rotates bigger than the machine word.
2261		return false
2262	}
2263	switch c.arch {
2264	case "386", "amd64", "arm64", "riscv64":
2265		return true
2266	case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
2267		return bits >= 32
2268	default:
2269		return false
2270	}
2271}
2272
2273// isARM64bitcon reports whether a constant can be encoded into a logical instruction.
2274func isARM64bitcon(x uint64) bool {
2275	if x == 1<<64-1 || x == 0 {
2276		return false
2277	}
2278	// determine the period and sign-extend a unit to 64 bits
2279	switch {
2280	case x != x>>32|x<<32:
2281		// period is 64
2282		// nothing to do
2283	case x != x>>16|x<<48:
2284		// period is 32
2285		x = uint64(int64(int32(x)))
2286	case x != x>>8|x<<56:
2287		// period is 16
2288		x = uint64(int64(int16(x)))
2289	case x != x>>4|x<<60:
2290		// period is 8
2291		x = uint64(int64(int8(x)))
2292	default:
2293		// period is 4 or 2, always true
2294		// 0001, 0010, 0100, 1000 -- 0001 rotate
2295		// 0011, 0110, 1100, 1001 -- 0011 rotate
2296		// 0111, 1011, 1101, 1110 -- 0111 rotate
2297		// 0101, 1010             -- 01   rotate, repeat
2298		return true
2299	}
2300	return sequenceOfOnes(x) || sequenceOfOnes(^x)
2301}
2302
2303// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
2304func sequenceOfOnes(x uint64) bool {
2305	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
2306	y += x
2307	return (y-1)&y == 0
2308}
2309
2310// isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
2311func isARM64addcon(v int64) bool {
2312	/* uimm12 or uimm24? */
2313	if v < 0 {
2314		return false
2315	}
2316	if (v & 0xFFF) == 0 {
2317		v >>= 12
2318	}
2319	return v <= 0xFFF
2320}
2321
2322// setPos sets the position of v to pos, then returns true.
2323// Useful for setting the result of a rewrite's position to
2324// something other than the default.
2325func setPos(v *Value, pos src.XPos) bool {
2326	v.Pos = pos
2327	return true
2328}
2329