1// Copyright 2015 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package ssa 6 7import ( 8 "cmd/compile/internal/base" 9 "cmd/compile/internal/logopt" 10 "cmd/compile/internal/reflectdata" 11 "cmd/compile/internal/types" 12 "cmd/internal/obj" 13 "cmd/internal/obj/s390x" 14 "cmd/internal/objabi" 15 "cmd/internal/src" 16 "encoding/binary" 17 "fmt" 18 "internal/buildcfg" 19 "io" 20 "math" 21 "math/bits" 22 "os" 23 "path/filepath" 24 "strings" 25) 26 27type deadValueChoice bool 28 29const ( 30 leaveDeadValues deadValueChoice = false 31 removeDeadValues = true 32) 33 34// deadcode indicates whether rewrite should try to remove any values that become dead. 35func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) { 36 // repeat rewrites until we find no more rewrites 37 pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block 38 pendingLines.clear() 39 debug := f.pass.debug 40 if debug > 1 { 41 fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name) 42 } 43 // if the number of rewrite iterations reaches itersLimit we will 44 // at that point turn on cycle detection. Instead of a fixed limit, 45 // size the limit according to func size to allow for cases such 46 // as the one in issue #66773. 47 itersLimit := f.NumBlocks() 48 if itersLimit < 20 { 49 itersLimit = 20 50 } 51 var iters int 52 var states map[string]bool 53 for { 54 change := false 55 deadChange := false 56 for _, b := range f.Blocks { 57 var b0 *Block 58 if debug > 1 { 59 b0 = new(Block) 60 *b0 = *b 61 b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing 62 } 63 for i, c := range b.ControlValues() { 64 for c.Op == OpCopy { 65 c = c.Args[0] 66 b.ReplaceControl(i, c) 67 } 68 } 69 if rb(b) { 70 change = true 71 if debug > 1 { 72 fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString()) 73 } 74 } 75 for j, v := range b.Values { 76 var v0 *Value 77 if debug > 1 { 78 v0 = new(Value) 79 *v0 = *v 80 v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing 81 } 82 if v.Uses == 0 && v.removeable() { 83 if v.Op != OpInvalid && deadcode == removeDeadValues { 84 // Reset any values that are now unused, so that we decrement 85 // the use count of all of its arguments. 86 // Not quite a deadcode pass, because it does not handle cycles. 87 // But it should help Uses==1 rules to fire. 88 v.reset(OpInvalid) 89 deadChange = true 90 } 91 // No point rewriting values which aren't used. 92 continue 93 } 94 95 vchange := phielimValue(v) 96 if vchange && debug > 1 { 97 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 98 } 99 100 // Eliminate copy inputs. 101 // If any copy input becomes unused, mark it 102 // as invalid and discard its argument. Repeat 103 // recursively on the discarded argument. 104 // This phase helps remove phantom "dead copy" uses 105 // of a value so that a x.Uses==1 rule condition 106 // fires reliably. 107 for i, a := range v.Args { 108 if a.Op != OpCopy { 109 continue 110 } 111 aa := copySource(a) 112 v.SetArg(i, aa) 113 // If a, a copy, has a line boundary indicator, attempt to find a new value 114 // to hold it. The first candidate is the value that will replace a (aa), 115 // if it shares the same block and line and is eligible. 116 // The second option is v, which has a as an input. Because aa is earlier in 117 // the data flow, it is the better choice. 118 if a.Pos.IsStmt() == src.PosIsStmt { 119 if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt { 120 aa.Pos = aa.Pos.WithIsStmt() 121 } else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt { 122 v.Pos = v.Pos.WithIsStmt() 123 } else { 124 // Record the lost line and look for a new home after all rewrites are complete. 125 // TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same 126 // line to appear in more than one block, but only one block is stored, so if both end 127 // up here, then one will be lost. 128 pendingLines.set(a.Pos, int32(a.Block.ID)) 129 } 130 a.Pos = a.Pos.WithNotStmt() 131 } 132 vchange = true 133 for a.Uses == 0 { 134 b := a.Args[0] 135 a.reset(OpInvalid) 136 a = b 137 } 138 } 139 if vchange && debug > 1 { 140 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 141 } 142 143 // apply rewrite function 144 if rv(v) { 145 vchange = true 146 // If value changed to a poor choice for a statement boundary, move the boundary 147 if v.Pos.IsStmt() == src.PosIsStmt { 148 if k := nextGoodStatementIndex(v, j, b); k != j { 149 v.Pos = v.Pos.WithNotStmt() 150 b.Values[k].Pos = b.Values[k].Pos.WithIsStmt() 151 } 152 } 153 } 154 155 change = change || vchange 156 if vchange && debug > 1 { 157 fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString()) 158 } 159 } 160 } 161 if !change && !deadChange { 162 break 163 } 164 iters++ 165 if (iters > itersLimit || debug >= 2) && change { 166 // We've done a suspiciously large number of rewrites (or we're in debug mode). 167 // As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer 168 // and the maximum value encountered during make.bash is 12. 169 // Start checking for cycles. (This is too expensive to do routinely.) 170 // Note: we avoid this path for deadChange-only iterations, to fix #51639. 171 if states == nil { 172 states = make(map[string]bool) 173 } 174 h := f.rewriteHash() 175 if _, ok := states[h]; ok { 176 // We've found a cycle. 177 // To diagnose it, set debug to 2 and start again, 178 // so that we'll print all rules applied until we complete another cycle. 179 // If debug is already >= 2, we've already done that, so it's time to crash. 180 if debug < 2 { 181 debug = 2 182 states = make(map[string]bool) 183 } else { 184 f.Fatalf("rewrite cycle detected") 185 } 186 } 187 states[h] = true 188 } 189 } 190 // remove clobbered values 191 for _, b := range f.Blocks { 192 j := 0 193 for i, v := range b.Values { 194 vl := v.Pos 195 if v.Op == OpInvalid { 196 if v.Pos.IsStmt() == src.PosIsStmt { 197 pendingLines.set(vl, int32(b.ID)) 198 } 199 f.freeValue(v) 200 continue 201 } 202 if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) { 203 pendingLines.remove(vl) 204 v.Pos = v.Pos.WithIsStmt() 205 } 206 if i != j { 207 b.Values[j] = v 208 } 209 j++ 210 } 211 if pendingLines.get(b.Pos) == int32(b.ID) { 212 b.Pos = b.Pos.WithIsStmt() 213 pendingLines.remove(b.Pos) 214 } 215 b.truncateValues(j) 216 } 217} 218 219// Common functions called from rewriting rules 220 221func is64BitFloat(t *types.Type) bool { 222 return t.Size() == 8 && t.IsFloat() 223} 224 225func is32BitFloat(t *types.Type) bool { 226 return t.Size() == 4 && t.IsFloat() 227} 228 229func is64BitInt(t *types.Type) bool { 230 return t.Size() == 8 && t.IsInteger() 231} 232 233func is32BitInt(t *types.Type) bool { 234 return t.Size() == 4 && t.IsInteger() 235} 236 237func is16BitInt(t *types.Type) bool { 238 return t.Size() == 2 && t.IsInteger() 239} 240 241func is8BitInt(t *types.Type) bool { 242 return t.Size() == 1 && t.IsInteger() 243} 244 245func isPtr(t *types.Type) bool { 246 return t.IsPtrShaped() 247} 248 249// mergeSym merges two symbolic offsets. There is no real merging of 250// offsets, we just pick the non-nil one. 251func mergeSym(x, y Sym) Sym { 252 if x == nil { 253 return y 254 } 255 if y == nil { 256 return x 257 } 258 panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y)) 259} 260 261func canMergeSym(x, y Sym) bool { 262 return x == nil || y == nil 263} 264 265// canMergeLoadClobber reports whether the load can be merged into target without 266// invalidating the schedule. 267// It also checks that the other non-load argument x is something we 268// are ok with clobbering. 269func canMergeLoadClobber(target, load, x *Value) bool { 270 // The register containing x is going to get clobbered. 271 // Don't merge if we still need the value of x. 272 // We don't have liveness information here, but we can 273 // approximate x dying with: 274 // 1) target is x's only use. 275 // 2) target is not in a deeper loop than x. 276 if x.Uses != 1 { 277 return false 278 } 279 loopnest := x.Block.Func.loopnest() 280 loopnest.calculateDepths() 281 if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) { 282 return false 283 } 284 return canMergeLoad(target, load) 285} 286 287// canMergeLoad reports whether the load can be merged into target without 288// invalidating the schedule. 289func canMergeLoad(target, load *Value) bool { 290 if target.Block.ID != load.Block.ID { 291 // If the load is in a different block do not merge it. 292 return false 293 } 294 295 // We can't merge the load into the target if the load 296 // has more than one use. 297 if load.Uses != 1 { 298 return false 299 } 300 301 mem := load.MemoryArg() 302 303 // We need the load's memory arg to still be alive at target. That 304 // can't be the case if one of target's args depends on a memory 305 // state that is a successor of load's memory arg. 306 // 307 // For example, it would be invalid to merge load into target in 308 // the following situation because newmem has killed oldmem 309 // before target is reached: 310 // load = read ... oldmem 311 // newmem = write ... oldmem 312 // arg0 = read ... newmem 313 // target = add arg0 load 314 // 315 // If the argument comes from a different block then we can exclude 316 // it immediately because it must dominate load (which is in the 317 // same block as target). 318 var args []*Value 319 for _, a := range target.Args { 320 if a != load && a.Block.ID == target.Block.ID { 321 args = append(args, a) 322 } 323 } 324 325 // memPreds contains memory states known to be predecessors of load's 326 // memory state. It is lazily initialized. 327 var memPreds map[*Value]bool 328 for i := 0; len(args) > 0; i++ { 329 const limit = 100 330 if i >= limit { 331 // Give up if we have done a lot of iterations. 332 return false 333 } 334 v := args[len(args)-1] 335 args = args[:len(args)-1] 336 if target.Block.ID != v.Block.ID { 337 // Since target and load are in the same block 338 // we can stop searching when we leave the block. 339 continue 340 } 341 if v.Op == OpPhi { 342 // A Phi implies we have reached the top of the block. 343 // The memory phi, if it exists, is always 344 // the first logical store in the block. 345 continue 346 } 347 if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() { 348 // We could handle this situation however it is likely 349 // to be very rare. 350 return false 351 } 352 if v.Op.SymEffect()&SymAddr != 0 { 353 // This case prevents an operation that calculates the 354 // address of a local variable from being forced to schedule 355 // before its corresponding VarDef. 356 // See issue 28445. 357 // v1 = LOAD ... 358 // v2 = VARDEF 359 // v3 = LEAQ 360 // v4 = CMPQ v1 v3 361 // We don't want to combine the CMPQ with the load, because 362 // that would force the CMPQ to schedule before the VARDEF, which 363 // in turn requires the LEAQ to schedule before the VARDEF. 364 return false 365 } 366 if v.Type.IsMemory() { 367 if memPreds == nil { 368 // Initialise a map containing memory states 369 // known to be predecessors of load's memory 370 // state. 371 memPreds = make(map[*Value]bool) 372 m := mem 373 const limit = 50 374 for i := 0; i < limit; i++ { 375 if m.Op == OpPhi { 376 // The memory phi, if it exists, is always 377 // the first logical store in the block. 378 break 379 } 380 if m.Block.ID != target.Block.ID { 381 break 382 } 383 if !m.Type.IsMemory() { 384 break 385 } 386 memPreds[m] = true 387 if len(m.Args) == 0 { 388 break 389 } 390 m = m.MemoryArg() 391 } 392 } 393 394 // We can merge if v is a predecessor of mem. 395 // 396 // For example, we can merge load into target in the 397 // following scenario: 398 // x = read ... v 399 // mem = write ... v 400 // load = read ... mem 401 // target = add x load 402 if memPreds[v] { 403 continue 404 } 405 return false 406 } 407 if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem { 408 // If v takes mem as an input then we know mem 409 // is valid at this point. 410 continue 411 } 412 for _, a := range v.Args { 413 if target.Block.ID == a.Block.ID { 414 args = append(args, a) 415 } 416 } 417 } 418 419 return true 420} 421 422// isSameCall reports whether sym is the same as the given named symbol. 423func isSameCall(sym interface{}, name string) bool { 424 fn := sym.(*AuxCall).Fn 425 return fn != nil && fn.String() == name 426} 427 428// canLoadUnaligned reports if the architecture supports unaligned load operations. 429func canLoadUnaligned(c *Config) bool { 430 return c.ctxt.Arch.Alignment == 1 431} 432 433// nlzX returns the number of leading zeros. 434func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) } 435func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) } 436func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) } 437func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) } 438 439// ntzX returns the number of trailing zeros. 440func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) } 441func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) } 442func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) } 443func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) } 444 445func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 } 446func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 } 447func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 } 448func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 } 449func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 } 450 451// nto returns the number of trailing ones. 452func nto(x int64) int64 { 453 return int64(ntz64(^x)) 454} 455 456// logX returns logarithm of n base 2. 457// n must be a positive power of 2 (isPowerOfTwoX returns true). 458func log8(n int8) int64 { 459 return int64(bits.Len8(uint8(n))) - 1 460} 461func log16(n int16) int64 { 462 return int64(bits.Len16(uint16(n))) - 1 463} 464func log32(n int32) int64 { 465 return int64(bits.Len32(uint32(n))) - 1 466} 467func log64(n int64) int64 { 468 return int64(bits.Len64(uint64(n))) - 1 469} 470 471// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1. 472// Rounds down. 473func log2uint32(n int64) int64 { 474 return int64(bits.Len32(uint32(n))) - 1 475} 476 477// isPowerOfTwoX functions report whether n is a power of 2. 478func isPowerOfTwo8(n int8) bool { 479 return n > 0 && n&(n-1) == 0 480} 481func isPowerOfTwo16(n int16) bool { 482 return n > 0 && n&(n-1) == 0 483} 484func isPowerOfTwo32(n int32) bool { 485 return n > 0 && n&(n-1) == 0 486} 487func isPowerOfTwo64(n int64) bool { 488 return n > 0 && n&(n-1) == 0 489} 490 491// isUint64PowerOfTwo reports whether uint64(n) is a power of 2. 492func isUint64PowerOfTwo(in int64) bool { 493 n := uint64(in) 494 return n > 0 && n&(n-1) == 0 495} 496 497// isUint32PowerOfTwo reports whether uint32(n) is a power of 2. 498func isUint32PowerOfTwo(in int64) bool { 499 n := uint64(uint32(in)) 500 return n > 0 && n&(n-1) == 0 501} 502 503// is32Bit reports whether n can be represented as a signed 32 bit integer. 504func is32Bit(n int64) bool { 505 return n == int64(int32(n)) 506} 507 508// is16Bit reports whether n can be represented as a signed 16 bit integer. 509func is16Bit(n int64) bool { 510 return n == int64(int16(n)) 511} 512 513// is8Bit reports whether n can be represented as a signed 8 bit integer. 514func is8Bit(n int64) bool { 515 return n == int64(int8(n)) 516} 517 518// isU8Bit reports whether n can be represented as an unsigned 8 bit integer. 519func isU8Bit(n int64) bool { 520 return n == int64(uint8(n)) 521} 522 523// isU12Bit reports whether n can be represented as an unsigned 12 bit integer. 524func isU12Bit(n int64) bool { 525 return 0 <= n && n < (1<<12) 526} 527 528// isU16Bit reports whether n can be represented as an unsigned 16 bit integer. 529func isU16Bit(n int64) bool { 530 return n == int64(uint16(n)) 531} 532 533// isU32Bit reports whether n can be represented as an unsigned 32 bit integer. 534func isU32Bit(n int64) bool { 535 return n == int64(uint32(n)) 536} 537 538// is20Bit reports whether n can be represented as a signed 20 bit integer. 539func is20Bit(n int64) bool { 540 return -(1<<19) <= n && n < (1<<19) 541} 542 543// b2i translates a boolean value to 0 or 1 for assigning to auxInt. 544func b2i(b bool) int64 { 545 if b { 546 return 1 547 } 548 return 0 549} 550 551// b2i32 translates a boolean value to 0 or 1. 552func b2i32(b bool) int32 { 553 if b { 554 return 1 555 } 556 return 0 557} 558 559// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded. 560// A shift is bounded if it is shifting by less than the width of the shifted value. 561func shiftIsBounded(v *Value) bool { 562 return v.AuxInt != 0 563} 564 565// canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing 566// generated code as much as possible. 567func canonLessThan(x, y *Value) bool { 568 if x.Op != y.Op { 569 return x.Op < y.Op 570 } 571 if !x.Pos.SameFileAndLine(y.Pos) { 572 return x.Pos.Before(y.Pos) 573 } 574 return x.ID < y.ID 575} 576 577// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern 578// of the mantissa. It will panic if the truncation results in lost information. 579func truncate64Fto32F(f float64) float32 { 580 if !isExactFloat32(f) { 581 panic("truncate64Fto32F: truncation is not exact") 582 } 583 if !math.IsNaN(f) { 584 return float32(f) 585 } 586 // NaN bit patterns aren't necessarily preserved across conversion 587 // instructions so we need to do the conversion manually. 588 b := math.Float64bits(f) 589 m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand) 590 // | sign | exponent | mantissa | 591 r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23))) 592 return math.Float32frombits(r) 593} 594 595// extend32Fto64F converts a float32 value to a float64 value preserving the bit 596// pattern of the mantissa. 597func extend32Fto64F(f float32) float64 { 598 if !math.IsNaN(float64(f)) { 599 return float64(f) 600 } 601 // NaN bit patterns aren't necessarily preserved across conversion 602 // instructions so we need to do the conversion manually. 603 b := uint64(math.Float32bits(f)) 604 // | sign | exponent | mantissa | 605 r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23)) 606 return math.Float64frombits(r) 607} 608 609// DivisionNeedsFixUp reports whether the division needs fix-up code. 610func DivisionNeedsFixUp(v *Value) bool { 611 return v.AuxInt == 0 612} 613 614// auxFrom64F encodes a float64 value so it can be stored in an AuxInt. 615func auxFrom64F(f float64) int64 { 616 if f != f { 617 panic("can't encode a NaN in AuxInt field") 618 } 619 return int64(math.Float64bits(f)) 620} 621 622// auxFrom32F encodes a float32 value so it can be stored in an AuxInt. 623func auxFrom32F(f float32) int64 { 624 if f != f { 625 panic("can't encode a NaN in AuxInt field") 626 } 627 return int64(math.Float64bits(extend32Fto64F(f))) 628} 629 630// auxTo32F decodes a float32 from the AuxInt value provided. 631func auxTo32F(i int64) float32 { 632 return truncate64Fto32F(math.Float64frombits(uint64(i))) 633} 634 635// auxTo64F decodes a float64 from the AuxInt value provided. 636func auxTo64F(i int64) float64 { 637 return math.Float64frombits(uint64(i)) 638} 639 640func auxIntToBool(i int64) bool { 641 if i == 0 { 642 return false 643 } 644 return true 645} 646func auxIntToInt8(i int64) int8 { 647 return int8(i) 648} 649func auxIntToInt16(i int64) int16 { 650 return int16(i) 651} 652func auxIntToInt32(i int64) int32 { 653 return int32(i) 654} 655func auxIntToInt64(i int64) int64 { 656 return i 657} 658func auxIntToUint8(i int64) uint8 { 659 return uint8(i) 660} 661func auxIntToFloat32(i int64) float32 { 662 return float32(math.Float64frombits(uint64(i))) 663} 664func auxIntToFloat64(i int64) float64 { 665 return math.Float64frombits(uint64(i)) 666} 667func auxIntToValAndOff(i int64) ValAndOff { 668 return ValAndOff(i) 669} 670func auxIntToArm64BitField(i int64) arm64BitField { 671 return arm64BitField(i) 672} 673func auxIntToInt128(x int64) int128 { 674 if x != 0 { 675 panic("nonzero int128 not allowed") 676 } 677 return 0 678} 679func auxIntToFlagConstant(x int64) flagConstant { 680 return flagConstant(x) 681} 682 683func auxIntToOp(cc int64) Op { 684 return Op(cc) 685} 686 687func boolToAuxInt(b bool) int64 { 688 if b { 689 return 1 690 } 691 return 0 692} 693func int8ToAuxInt(i int8) int64 { 694 return int64(i) 695} 696func int16ToAuxInt(i int16) int64 { 697 return int64(i) 698} 699func int32ToAuxInt(i int32) int64 { 700 return int64(i) 701} 702func int64ToAuxInt(i int64) int64 { 703 return int64(i) 704} 705func uint8ToAuxInt(i uint8) int64 { 706 return int64(int8(i)) 707} 708func float32ToAuxInt(f float32) int64 { 709 return int64(math.Float64bits(float64(f))) 710} 711func float64ToAuxInt(f float64) int64 { 712 return int64(math.Float64bits(f)) 713} 714func valAndOffToAuxInt(v ValAndOff) int64 { 715 return int64(v) 716} 717func arm64BitFieldToAuxInt(v arm64BitField) int64 { 718 return int64(v) 719} 720func int128ToAuxInt(x int128) int64 { 721 if x != 0 { 722 panic("nonzero int128 not allowed") 723 } 724 return 0 725} 726func flagConstantToAuxInt(x flagConstant) int64 { 727 return int64(x) 728} 729 730func opToAuxInt(o Op) int64 { 731 return int64(o) 732} 733 734// Aux is an interface to hold miscellaneous data in Blocks and Values. 735type Aux interface { 736 CanBeAnSSAAux() 737} 738 739// for now only used to mark moves that need to avoid clobbering flags 740type auxMark bool 741 742func (auxMark) CanBeAnSSAAux() {} 743 744var AuxMark auxMark 745 746// stringAux wraps string values for use in Aux. 747type stringAux string 748 749func (stringAux) CanBeAnSSAAux() {} 750 751func auxToString(i Aux) string { 752 return string(i.(stringAux)) 753} 754func auxToSym(i Aux) Sym { 755 // TODO: kind of a hack - allows nil interface through 756 s, _ := i.(Sym) 757 return s 758} 759func auxToType(i Aux) *types.Type { 760 return i.(*types.Type) 761} 762func auxToCall(i Aux) *AuxCall { 763 return i.(*AuxCall) 764} 765func auxToS390xCCMask(i Aux) s390x.CCMask { 766 return i.(s390x.CCMask) 767} 768func auxToS390xRotateParams(i Aux) s390x.RotateParams { 769 return i.(s390x.RotateParams) 770} 771 772func StringToAux(s string) Aux { 773 return stringAux(s) 774} 775func symToAux(s Sym) Aux { 776 return s 777} 778func callToAux(s *AuxCall) Aux { 779 return s 780} 781func typeToAux(t *types.Type) Aux { 782 return t 783} 784func s390xCCMaskToAux(c s390x.CCMask) Aux { 785 return c 786} 787func s390xRotateParamsToAux(r s390x.RotateParams) Aux { 788 return r 789} 790 791// uaddOvf reports whether unsigned a+b would overflow. 792func uaddOvf(a, b int64) bool { 793 return uint64(a)+uint64(b) < uint64(a) 794} 795 796// loadLSymOffset simulates reading a word at an offset into a 797// read-only symbol's runtime memory. If it would read a pointer to 798// another symbol, that symbol is returned. Otherwise, it returns nil. 799func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym { 800 if lsym.Type != objabi.SRODATA { 801 return nil 802 } 803 804 for _, r := range lsym.R { 805 if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 { 806 return r.Sym 807 } 808 } 809 810 return nil 811} 812 813func devirtLECall(v *Value, sym *obj.LSym) *Value { 814 v.Op = OpStaticLECall 815 auxcall := v.Aux.(*AuxCall) 816 auxcall.Fn = sym 817 // Remove first arg 818 v.Args[0].Uses-- 819 copy(v.Args[0:], v.Args[1:]) 820 v.Args[len(v.Args)-1] = nil // aid GC 821 v.Args = v.Args[:len(v.Args)-1] 822 if f := v.Block.Func; f.pass.debug > 0 { 823 f.Warnl(v.Pos, "de-virtualizing call") 824 } 825 return v 826} 827 828// isSamePtr reports whether p1 and p2 point to the same address. 829func isSamePtr(p1, p2 *Value) bool { 830 if p1 == p2 { 831 return true 832 } 833 if p1.Op != p2.Op { 834 return false 835 } 836 switch p1.Op { 837 case OpOffPtr: 838 return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0]) 839 case OpAddr, OpLocalAddr: 840 return p1.Aux == p2.Aux 841 case OpAddPtr: 842 return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0]) 843 } 844 return false 845} 846 847func isStackPtr(v *Value) bool { 848 for v.Op == OpOffPtr || v.Op == OpAddPtr { 849 v = v.Args[0] 850 } 851 return v.Op == OpSP || v.Op == OpLocalAddr 852} 853 854// disjoint reports whether the memory region specified by [p1:p1+n1) 855// does not overlap with [p2:p2+n2). 856// A return value of false does not imply the regions overlap. 857func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool { 858 if n1 == 0 || n2 == 0 { 859 return true 860 } 861 if p1 == p2 { 862 return false 863 } 864 baseAndOffset := func(ptr *Value) (base *Value, offset int64) { 865 base, offset = ptr, 0 866 for base.Op == OpOffPtr { 867 offset += base.AuxInt 868 base = base.Args[0] 869 } 870 if opcodeTable[base.Op].nilCheck { 871 base = base.Args[0] 872 } 873 return base, offset 874 } 875 p1, off1 := baseAndOffset(p1) 876 p2, off2 := baseAndOffset(p2) 877 if isSamePtr(p1, p2) { 878 return !overlap(off1, n1, off2, n2) 879 } 880 // p1 and p2 are not the same, so if they are both OpAddrs then 881 // they point to different variables. 882 // If one pointer is on the stack and the other is an argument 883 // then they can't overlap. 884 switch p1.Op { 885 case OpAddr, OpLocalAddr: 886 if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP { 887 return true 888 } 889 return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP 890 case OpArg, OpArgIntReg: 891 if p2.Op == OpSP || p2.Op == OpLocalAddr { 892 return true 893 } 894 case OpSP: 895 return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP 896 } 897 return false 898} 899 900// moveSize returns the number of bytes an aligned MOV instruction moves. 901func moveSize(align int64, c *Config) int64 { 902 switch { 903 case align%8 == 0 && c.PtrSize == 8: 904 return 8 905 case align%4 == 0: 906 return 4 907 case align%2 == 0: 908 return 2 909 } 910 return 1 911} 912 913// mergePoint finds a block among a's blocks which dominates b and is itself 914// dominated by all of a's blocks. Returns nil if it can't find one. 915// Might return nil even if one does exist. 916func mergePoint(b *Block, a ...*Value) *Block { 917 // Walk backward from b looking for one of the a's blocks. 918 919 // Max distance 920 d := 100 921 922 for d > 0 { 923 for _, x := range a { 924 if b == x.Block { 925 goto found 926 } 927 } 928 if len(b.Preds) > 1 { 929 // Don't know which way to go back. Abort. 930 return nil 931 } 932 b = b.Preds[0].b 933 d-- 934 } 935 return nil // too far away 936found: 937 // At this point, r is the first value in a that we find by walking backwards. 938 // if we return anything, r will be it. 939 r := b 940 941 // Keep going, counting the other a's that we find. They must all dominate r. 942 na := 0 943 for d > 0 { 944 for _, x := range a { 945 if b == x.Block { 946 na++ 947 } 948 } 949 if na == len(a) { 950 // Found all of a in a backwards walk. We can return r. 951 return r 952 } 953 if len(b.Preds) > 1 { 954 return nil 955 } 956 b = b.Preds[0].b 957 d-- 958 959 } 960 return nil // too far away 961} 962 963// clobber invalidates values. Returns true. 964// clobber is used by rewrite rules to: 965// 966// A) make sure the values are really dead and never used again. 967// B) decrement use counts of the values' args. 968func clobber(vv ...*Value) bool { 969 for _, v := range vv { 970 v.reset(OpInvalid) 971 // Note: leave v.Block intact. The Block field is used after clobber. 972 } 973 return true 974} 975 976// clobberIfDead resets v when use count is 1. Returns true. 977// clobberIfDead is used by rewrite rules to decrement 978// use counts of v's args when v is dead and never used. 979func clobberIfDead(v *Value) bool { 980 if v.Uses == 1 { 981 v.reset(OpInvalid) 982 } 983 // Note: leave v.Block intact. The Block field is used after clobberIfDead. 984 return true 985} 986 987// noteRule is an easy way to track if a rule is matched when writing 988// new ones. Make the rule of interest also conditional on 989// 990// noteRule("note to self: rule of interest matched") 991// 992// and that message will print when the rule matches. 993func noteRule(s string) bool { 994 fmt.Println(s) 995 return true 996} 997 998// countRule increments Func.ruleMatches[key]. 999// If Func.ruleMatches is non-nil at the end 1000// of compilation, it will be printed to stdout. 1001// This is intended to make it easier to find which functions 1002// which contain lots of rules matches when developing new rules. 1003func countRule(v *Value, key string) bool { 1004 f := v.Block.Func 1005 if f.ruleMatches == nil { 1006 f.ruleMatches = make(map[string]int) 1007 } 1008 f.ruleMatches[key]++ 1009 return true 1010} 1011 1012// warnRule generates compiler debug output with string s when 1013// v is not in autogenerated code, cond is true and the rule has fired. 1014func warnRule(cond bool, v *Value, s string) bool { 1015 if pos := v.Pos; pos.Line() > 1 && cond { 1016 v.Block.Func.Warnl(pos, s) 1017 } 1018 return true 1019} 1020 1021// for a pseudo-op like (LessThan x), extract x. 1022func flagArg(v *Value) *Value { 1023 if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() { 1024 return nil 1025 } 1026 return v.Args[0] 1027} 1028 1029// arm64Negate finds the complement to an ARM64 condition code, 1030// for example !Equal -> NotEqual or !LessThan -> GreaterEqual 1031// 1032// For floating point, it's more subtle because NaN is unordered. We do 1033// !LessThanF -> NotLessThanF, the latter takes care of NaNs. 1034func arm64Negate(op Op) Op { 1035 switch op { 1036 case OpARM64LessThan: 1037 return OpARM64GreaterEqual 1038 case OpARM64LessThanU: 1039 return OpARM64GreaterEqualU 1040 case OpARM64GreaterThan: 1041 return OpARM64LessEqual 1042 case OpARM64GreaterThanU: 1043 return OpARM64LessEqualU 1044 case OpARM64LessEqual: 1045 return OpARM64GreaterThan 1046 case OpARM64LessEqualU: 1047 return OpARM64GreaterThanU 1048 case OpARM64GreaterEqual: 1049 return OpARM64LessThan 1050 case OpARM64GreaterEqualU: 1051 return OpARM64LessThanU 1052 case OpARM64Equal: 1053 return OpARM64NotEqual 1054 case OpARM64NotEqual: 1055 return OpARM64Equal 1056 case OpARM64LessThanF: 1057 return OpARM64NotLessThanF 1058 case OpARM64NotLessThanF: 1059 return OpARM64LessThanF 1060 case OpARM64LessEqualF: 1061 return OpARM64NotLessEqualF 1062 case OpARM64NotLessEqualF: 1063 return OpARM64LessEqualF 1064 case OpARM64GreaterThanF: 1065 return OpARM64NotGreaterThanF 1066 case OpARM64NotGreaterThanF: 1067 return OpARM64GreaterThanF 1068 case OpARM64GreaterEqualF: 1069 return OpARM64NotGreaterEqualF 1070 case OpARM64NotGreaterEqualF: 1071 return OpARM64GreaterEqualF 1072 default: 1073 panic("unreachable") 1074 } 1075} 1076 1077// arm64Invert evaluates (InvertFlags op), which 1078// is the same as altering the condition codes such 1079// that the same result would be produced if the arguments 1080// to the flag-generating instruction were reversed, e.g. 1081// (InvertFlags (CMP x y)) -> (CMP y x) 1082func arm64Invert(op Op) Op { 1083 switch op { 1084 case OpARM64LessThan: 1085 return OpARM64GreaterThan 1086 case OpARM64LessThanU: 1087 return OpARM64GreaterThanU 1088 case OpARM64GreaterThan: 1089 return OpARM64LessThan 1090 case OpARM64GreaterThanU: 1091 return OpARM64LessThanU 1092 case OpARM64LessEqual: 1093 return OpARM64GreaterEqual 1094 case OpARM64LessEqualU: 1095 return OpARM64GreaterEqualU 1096 case OpARM64GreaterEqual: 1097 return OpARM64LessEqual 1098 case OpARM64GreaterEqualU: 1099 return OpARM64LessEqualU 1100 case OpARM64Equal, OpARM64NotEqual: 1101 return op 1102 case OpARM64LessThanF: 1103 return OpARM64GreaterThanF 1104 case OpARM64GreaterThanF: 1105 return OpARM64LessThanF 1106 case OpARM64LessEqualF: 1107 return OpARM64GreaterEqualF 1108 case OpARM64GreaterEqualF: 1109 return OpARM64LessEqualF 1110 case OpARM64NotLessThanF: 1111 return OpARM64NotGreaterThanF 1112 case OpARM64NotGreaterThanF: 1113 return OpARM64NotLessThanF 1114 case OpARM64NotLessEqualF: 1115 return OpARM64NotGreaterEqualF 1116 case OpARM64NotGreaterEqualF: 1117 return OpARM64NotLessEqualF 1118 default: 1119 panic("unreachable") 1120 } 1121} 1122 1123// evaluate an ARM64 op against a flags value 1124// that is potentially constant; return 1 for true, 1125// -1 for false, and 0 for not constant. 1126func ccARM64Eval(op Op, flags *Value) int { 1127 fop := flags.Op 1128 if fop == OpARM64InvertFlags { 1129 return -ccARM64Eval(op, flags.Args[0]) 1130 } 1131 if fop != OpARM64FlagConstant { 1132 return 0 1133 } 1134 fc := flagConstant(flags.AuxInt) 1135 b2i := func(b bool) int { 1136 if b { 1137 return 1 1138 } 1139 return -1 1140 } 1141 switch op { 1142 case OpARM64Equal: 1143 return b2i(fc.eq()) 1144 case OpARM64NotEqual: 1145 return b2i(fc.ne()) 1146 case OpARM64LessThan: 1147 return b2i(fc.lt()) 1148 case OpARM64LessThanU: 1149 return b2i(fc.ult()) 1150 case OpARM64GreaterThan: 1151 return b2i(fc.gt()) 1152 case OpARM64GreaterThanU: 1153 return b2i(fc.ugt()) 1154 case OpARM64LessEqual: 1155 return b2i(fc.le()) 1156 case OpARM64LessEqualU: 1157 return b2i(fc.ule()) 1158 case OpARM64GreaterEqual: 1159 return b2i(fc.ge()) 1160 case OpARM64GreaterEqualU: 1161 return b2i(fc.uge()) 1162 } 1163 return 0 1164} 1165 1166// logRule logs the use of the rule s. This will only be enabled if 1167// rewrite rules were generated with the -log option, see _gen/rulegen.go. 1168func logRule(s string) { 1169 if ruleFile == nil { 1170 // Open a log file to write log to. We open in append 1171 // mode because all.bash runs the compiler lots of times, 1172 // and we want the concatenation of all of those logs. 1173 // This means, of course, that users need to rm the old log 1174 // to get fresh data. 1175 // TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow? 1176 w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"), 1177 os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 1178 if err != nil { 1179 panic(err) 1180 } 1181 ruleFile = w 1182 } 1183 _, err := fmt.Fprintln(ruleFile, s) 1184 if err != nil { 1185 panic(err) 1186 } 1187} 1188 1189var ruleFile io.Writer 1190 1191func min(x, y int64) int64 { 1192 if x < y { 1193 return x 1194 } 1195 return y 1196} 1197func max(x, y int64) int64 { 1198 if x > y { 1199 return x 1200 } 1201 return y 1202} 1203 1204func isConstZero(v *Value) bool { 1205 switch v.Op { 1206 case OpConstNil: 1207 return true 1208 case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F: 1209 return v.AuxInt == 0 1210 } 1211 return false 1212} 1213 1214// reciprocalExact64 reports whether 1/c is exactly representable. 1215func reciprocalExact64(c float64) bool { 1216 b := math.Float64bits(c) 1217 man := b & (1<<52 - 1) 1218 if man != 0 { 1219 return false // not a power of 2, denormal, or NaN 1220 } 1221 exp := b >> 52 & (1<<11 - 1) 1222 // exponent bias is 0x3ff. So taking the reciprocal of a number 1223 // changes the exponent to 0x7fe-exp. 1224 switch exp { 1225 case 0: 1226 return false // ±0 1227 case 0x7ff: 1228 return false // ±inf 1229 case 0x7fe: 1230 return false // exponent is not representable 1231 default: 1232 return true 1233 } 1234} 1235 1236// reciprocalExact32 reports whether 1/c is exactly representable. 1237func reciprocalExact32(c float32) bool { 1238 b := math.Float32bits(c) 1239 man := b & (1<<23 - 1) 1240 if man != 0 { 1241 return false // not a power of 2, denormal, or NaN 1242 } 1243 exp := b >> 23 & (1<<8 - 1) 1244 // exponent bias is 0x7f. So taking the reciprocal of a number 1245 // changes the exponent to 0xfe-exp. 1246 switch exp { 1247 case 0: 1248 return false // ±0 1249 case 0xff: 1250 return false // ±inf 1251 case 0xfe: 1252 return false // exponent is not representable 1253 default: 1254 return true 1255 } 1256} 1257 1258// check if an immediate can be directly encoded into an ARM's instruction. 1259func isARMImmRot(v uint32) bool { 1260 for i := 0; i < 16; i++ { 1261 if v&^0xff == 0 { 1262 return true 1263 } 1264 v = v<<2 | v>>30 1265 } 1266 1267 return false 1268} 1269 1270// overlap reports whether the ranges given by the given offset and 1271// size pairs overlap. 1272func overlap(offset1, size1, offset2, size2 int64) bool { 1273 if offset1 >= offset2 && offset2+size2 > offset1 { 1274 return true 1275 } 1276 if offset2 >= offset1 && offset1+size1 > offset2 { 1277 return true 1278 } 1279 return false 1280} 1281 1282func areAdjacentOffsets(off1, off2, size int64) bool { 1283 return off1+size == off2 || off1 == off2+size 1284} 1285 1286// check if value zeroes out upper 32-bit of 64-bit register. 1287// depth limits recursion depth. In AMD64.rules 3 is used as limit, 1288// because it catches same amount of cases as 4. 1289func zeroUpper32Bits(x *Value, depth int) bool { 1290 if x.Type.IsSigned() && x.Type.Size() < 8 { 1291 // If the value is signed, it might get re-sign-extended 1292 // during spill and restore. See issue 68227. 1293 return false 1294 } 1295 switch x.Op { 1296 case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1, 1297 OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1, 1298 OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload, 1299 OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL, 1300 OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst, 1301 OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst, 1302 OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL, 1303 OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst, 1304 OpAMD64SHLL, OpAMD64SHLLconst: 1305 return true 1306 case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst, 1307 OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW, 1308 OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst: 1309 return true 1310 case OpArg: // note: but not ArgIntReg 1311 // amd64 always loads args from the stack unsigned. 1312 // most other architectures load them sign/zero extended based on the type. 1313 return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64" 1314 case OpPhi, OpSelect0, OpSelect1: 1315 // Phis can use each-other as an arguments, instead of tracking visited values, 1316 // just limit recursion depth. 1317 if depth <= 0 { 1318 return false 1319 } 1320 for i := range x.Args { 1321 if !zeroUpper32Bits(x.Args[i], depth-1) { 1322 return false 1323 } 1324 } 1325 return true 1326 1327 } 1328 return false 1329} 1330 1331// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits. 1332func zeroUpper48Bits(x *Value, depth int) bool { 1333 if x.Type.IsSigned() && x.Type.Size() < 8 { 1334 return false 1335 } 1336 switch x.Op { 1337 case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2: 1338 return true 1339 case OpArg: // note: but not ArgIntReg 1340 return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64" 1341 case OpPhi, OpSelect0, OpSelect1: 1342 // Phis can use each-other as an arguments, instead of tracking visited values, 1343 // just limit recursion depth. 1344 if depth <= 0 { 1345 return false 1346 } 1347 for i := range x.Args { 1348 if !zeroUpper48Bits(x.Args[i], depth-1) { 1349 return false 1350 } 1351 } 1352 return true 1353 1354 } 1355 return false 1356} 1357 1358// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits. 1359func zeroUpper56Bits(x *Value, depth int) bool { 1360 if x.Type.IsSigned() && x.Type.Size() < 8 { 1361 return false 1362 } 1363 switch x.Op { 1364 case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1: 1365 return true 1366 case OpArg: // note: but not ArgIntReg 1367 return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64" 1368 case OpPhi, OpSelect0, OpSelect1: 1369 // Phis can use each-other as an arguments, instead of tracking visited values, 1370 // just limit recursion depth. 1371 if depth <= 0 { 1372 return false 1373 } 1374 for i := range x.Args { 1375 if !zeroUpper56Bits(x.Args[i], depth-1) { 1376 return false 1377 } 1378 } 1379 return true 1380 1381 } 1382 return false 1383} 1384 1385func isInlinableMemclr(c *Config, sz int64) bool { 1386 if sz < 0 { 1387 return false 1388 } 1389 // TODO: expand this check to allow other architectures 1390 // see CL 454255 and issue 56997 1391 switch c.arch { 1392 case "amd64", "arm64": 1393 return true 1394 case "ppc64le", "ppc64": 1395 return sz < 512 1396 } 1397 return false 1398} 1399 1400// isInlinableMemmove reports whether the given arch performs a Move of the given size 1401// faster than memmove. It will only return true if replacing the memmove with a Move is 1402// safe, either because Move will do all of its loads before any of its stores, or 1403// because the arguments are known to be disjoint. 1404// This is used as a check for replacing memmove with Move ops. 1405func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { 1406 // It is always safe to convert memmove into Move when its arguments are disjoint. 1407 // Move ops may or may not be faster for large sizes depending on how the platform 1408 // lowers them, so we only perform this optimization on platforms that we know to 1409 // have fast Move ops. 1410 switch c.arch { 1411 case "amd64": 1412 return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz)) 1413 case "386", "arm64": 1414 return sz <= 8 1415 case "s390x", "ppc64", "ppc64le": 1416 return sz <= 8 || disjoint(dst, sz, src, sz) 1417 case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le": 1418 return sz <= 4 1419 } 1420 return false 1421} 1422func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool { 1423 return isInlinableMemmove(dst, src, sz, c) 1424} 1425 1426// logLargeCopy logs the occurrence of a large copy. 1427// The best place to do this is in the rewrite rules where the size of the move is easy to find. 1428// "Large" is arbitrarily chosen to be 128 bytes; this may change. 1429func logLargeCopy(v *Value, s int64) bool { 1430 if s < 128 { 1431 return true 1432 } 1433 if logopt.Enabled() { 1434 logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s)) 1435 } 1436 return true 1437} 1438func LogLargeCopy(funcName string, pos src.XPos, s int64) { 1439 if s < 128 { 1440 return 1441 } 1442 if logopt.Enabled() { 1443 logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s)) 1444 } 1445} 1446 1447// hasSmallRotate reports whether the architecture has rotate instructions 1448// for sizes < 32-bit. This is used to decide whether to promote some rotations. 1449func hasSmallRotate(c *Config) bool { 1450 switch c.arch { 1451 case "amd64", "386": 1452 return true 1453 default: 1454 return false 1455 } 1456} 1457 1458func supportsPPC64PCRel() bool { 1459 // PCRel is currently supported for >= power10, linux only 1460 // Internal and external linking supports this on ppc64le; internal linking on ppc64. 1461 return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux" 1462} 1463 1464func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 { 1465 if sh < 0 || sh >= sz { 1466 panic("PPC64 shift arg sh out of range") 1467 } 1468 if mb < 0 || mb >= sz { 1469 panic("PPC64 shift arg mb out of range") 1470 } 1471 if me < 0 || me >= sz { 1472 panic("PPC64 shift arg me out of range") 1473 } 1474 return int32(sh<<16 | mb<<8 | me) 1475} 1476 1477func GetPPC64Shiftsh(auxint int64) int64 { 1478 return int64(int8(auxint >> 16)) 1479} 1480 1481func GetPPC64Shiftmb(auxint int64) int64 { 1482 return int64(int8(auxint >> 8)) 1483} 1484 1485func GetPPC64Shiftme(auxint int64) int64 { 1486 return int64(int8(auxint)) 1487} 1488 1489// Test if this value can encoded as a mask for a rlwinm like 1490// operation. Masks can also extend from the msb and wrap to 1491// the lsb too. That is, the valid masks are 32 bit strings 1492// of the form: 0..01..10..0 or 1..10..01..1 or 1...1 1493func isPPC64WordRotateMask(v64 int64) bool { 1494 // Isolate rightmost 1 (if none 0) and add. 1495 v := uint32(v64) 1496 vp := (v & -v) + v 1497 // Likewise, for the wrapping case. 1498 vn := ^v 1499 vpn := (vn & -vn) + vn 1500 return (v&vp == 0 || vn&vpn == 0) && v != 0 1501} 1502 1503// Compress mask and shift into single value of the form 1504// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can 1505// be used to regenerate the input mask. 1506func encodePPC64RotateMask(rotate, mask, nbits int64) int64 { 1507 var mb, me, mbn, men int 1508 1509 // Determine boundaries and then decode them 1510 if mask == 0 || ^mask == 0 || rotate >= nbits { 1511 panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits)) 1512 } else if nbits == 32 { 1513 mb = bits.LeadingZeros32(uint32(mask)) 1514 me = 32 - bits.TrailingZeros32(uint32(mask)) 1515 mbn = bits.LeadingZeros32(^uint32(mask)) 1516 men = 32 - bits.TrailingZeros32(^uint32(mask)) 1517 } else { 1518 mb = bits.LeadingZeros64(uint64(mask)) 1519 me = 64 - bits.TrailingZeros64(uint64(mask)) 1520 mbn = bits.LeadingZeros64(^uint64(mask)) 1521 men = 64 - bits.TrailingZeros64(^uint64(mask)) 1522 } 1523 // Check for a wrapping mask (e.g bits at 0 and 63) 1524 if mb == 0 && me == int(nbits) { 1525 // swap the inverted values 1526 mb, me = men, mbn 1527 } 1528 1529 return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24) 1530} 1531 1532// Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x) 1533// SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an 1534// RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two 1535// operations can be combined. This functions assumes the two opcodes can 1536// be merged, and returns an encoded rotate+mask value of the combined RLDICL. 1537func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 { 1538 mb := s 1539 r := 64 - s 1540 // A larger mb is a smaller mask. 1541 if (encoded>>8)&0xFF < mb { 1542 encoded = (encoded &^ 0xFF00) | mb<<8 1543 } 1544 // The rotate is expected to be 0. 1545 if (encoded & 0xFF0000) != 0 { 1546 panic("non-zero rotate") 1547 } 1548 return encoded | r<<16 1549} 1550 1551// DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask. The values returned as 1552// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask. 1553func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) { 1554 auxint := uint64(sauxint) 1555 rotate = int64((auxint >> 16) & 0xFF) 1556 mb = int64((auxint >> 8) & 0xFF) 1557 me = int64((auxint >> 0) & 0xFF) 1558 nbits := int64((auxint >> 24) & 0xFF) 1559 mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1) 1560 if mb > me { 1561 mask = ^mask 1562 } 1563 if nbits == 32 { 1564 mask = uint64(uint32(mask)) 1565 } 1566 1567 // Fixup ME to match ISA definition. The second argument to MASK(..,me) 1568 // is inclusive. 1569 me = (me - 1) & (nbits - 1) 1570 return 1571} 1572 1573// This verifies that the mask is a set of 1574// consecutive bits including the least 1575// significant bit. 1576func isPPC64ValidShiftMask(v int64) bool { 1577 if (v != 0) && ((v+1)&v) == 0 { 1578 return true 1579 } 1580 return false 1581} 1582 1583func getPPC64ShiftMaskLength(v int64) int64 { 1584 return int64(bits.Len64(uint64(v))) 1585} 1586 1587// Decompose a shift right into an equivalent rotate/mask, 1588// and return mask & m. 1589func mergePPC64RShiftMask(m, s, nbits int64) int64 { 1590 smask := uint64((1<<uint(nbits))-1) >> uint(s) 1591 return m & int64(smask) 1592} 1593 1594// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0 1595func mergePPC64AndSrwi(m, s int64) int64 { 1596 mask := mergePPC64RShiftMask(m, s, 32) 1597 if !isPPC64WordRotateMask(mask) { 1598 return 0 1599 } 1600 return encodePPC64RotateMask((32-s)&31, mask, 32) 1601} 1602 1603// Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM. 1604// Return the encoded RLWINM constant, or 0 if they cannot be merged. 1605func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { 1606 mask_1 := uint64(0xFFFFFFFF >> uint(srw)) 1607 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left. 1608 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1609 1610 // Rewrite mask to apply after the final left shift. 1611 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) 1612 1613 r_1 := 32 - srw 1614 r_2 := GetPPC64Shiftsh(sld) 1615 r_3 := (r_1 + r_2) & 31 // This can wrap. 1616 1617 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { 1618 return 0 1619 } 1620 return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) 1621} 1622 1623// Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM. 1624// Return the encoded RLWINM constant, or 0 if they cannot be merged. 1625func mergePPC64ClrlsldiSrd(sld, srd int64) int64 { 1626 mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd) 1627 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left. 1628 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1629 1630 // Rewrite mask to apply after the final left shift. 1631 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) 1632 1633 r_1 := 64 - srd 1634 r_2 := GetPPC64Shiftsh(sld) 1635 r_3 := (r_1 + r_2) & 63 // This can wrap. 1636 1637 if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { 1638 return 0 1639 } 1640 // This combine only works when selecting and shifting the lower 32 bits. 1641 v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3)) 1642 if v1&mask_3 != 0 { 1643 return 0 1644 } 1645 return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32) 1646} 1647 1648// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return 1649// the encoded RLWINM constant, or 0 if they cannot be merged. 1650func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { 1651 r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw) 1652 // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left. 1653 mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) 1654 1655 // combine the masks, and adjust for the final left shift. 1656 mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld))) 1657 r_2 := GetPPC64Shiftsh(int64(sld)) 1658 r_3 := (r_1 + r_2) & 31 // This can wrap. 1659 1660 // Verify the result is still a valid bitmask of <= 32 bits. 1661 if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 { 1662 return 0 1663 } 1664 return encodePPC64RotateMask(r_3, int64(mask_3), 32) 1665} 1666 1667// Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant, 1668// or 0 if they cannot be merged. 1669func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 { 1670 r, _, _, mask_rlw := DecodePPC64RotateMask(rlw) 1671 mask_out := (mask_rlw & uint64(mask)) 1672 1673 // Verify the result is still a valid bitmask of <= 32 bits. 1674 if !isPPC64WordRotateMask(int64(mask_out)) { 1675 return 0 1676 } 1677 return encodePPC64RotateMask(r, int64(mask_out), 32) 1678} 1679 1680// Test if RLWINM opcode rlw clears the upper 32 bits of the 1681// result. Return rlw if it does, 0 otherwise. 1682func mergePPC64MovwzregRlwinm(rlw int64) int64 { 1683 _, mb, me, _ := DecodePPC64RotateMask(rlw) 1684 if mb > me { 1685 return 0 1686 } 1687 return rlw 1688} 1689 1690// Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant, 1691// or 0 if they cannot be merged. 1692func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 { 1693 r, _, _, mask_rlw := DecodePPC64RotateMask(rlw) 1694 1695 // Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask. 1696 r_mask := bits.RotateLeft32(mask, int(r)) 1697 1698 mask_out := (mask_rlw & uint64(r_mask)) 1699 1700 // Verify the result is still a valid bitmask of <= 32 bits. 1701 if !isPPC64WordRotateMask(int64(mask_out)) { 1702 return 0 1703 } 1704 return encodePPC64RotateMask(r, int64(mask_out), 32) 1705} 1706 1707// Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant, 1708// or 0 if they cannot be merged. 1709func mergePPC64SldiRlwinm(sldi, rlw int64) int64 { 1710 r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw) 1711 if mb > me || mb < sldi { 1712 // Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case. 1713 // Likewise, if mb is less than the shift amount, it cannot be merged. 1714 return 0 1715 } 1716 // combine the masks, and adjust for the final left shift. 1717 mask_3 := mask_1 << sldi 1718 r_3 := (r_1 + sldi) & 31 // This can wrap. 1719 1720 // Verify the result is still a valid bitmask of <= 32 bits. 1721 if uint64(uint32(mask_3)) != mask_3 { 1722 return 0 1723 } 1724 return encodePPC64RotateMask(r_3, int64(mask_3), 32) 1725} 1726 1727// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)), 1728// or return 0 if they cannot be combined. 1729func mergePPC64SldiSrw(sld, srw int64) int64 { 1730 if sld > srw || srw >= 32 { 1731 return 0 1732 } 1733 mask_r := uint32(0xFFFFFFFF) >> uint(srw) 1734 mask_l := uint32(0xFFFFFFFF) >> uint(sld) 1735 mask := (mask_r & mask_l) << uint(sld) 1736 return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32) 1737} 1738 1739// Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y) 1740// to (Select0 (opCC x y)) without having to explicitly fixup every user 1741// of op. 1742// 1743// E.g consider the case: 1744// a = (ADD x y) 1745// b = (CMPconst [0] a) 1746// c = (OR a z) 1747// 1748// A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y))) 1749// would produce: 1750// a = (ADD x y) 1751// a' = (ADDCC x y) 1752// a” = (Select0 a') 1753// b = (CMPconst [0] a”) 1754// c = (OR a z) 1755// 1756// which makes it impossible to rewrite the second user. Instead the result 1757// of this conversion is: 1758// a' = (ADDCC x y) 1759// a = (Select0 a') 1760// b = (CMPconst [0] a) 1761// c = (OR a z) 1762// 1763// Which makes it trivial to rewrite b using a lowering rule. 1764func convertPPC64OpToOpCC(op *Value) *Value { 1765 ccOpMap := map[Op]Op{ 1766 OpPPC64ADD: OpPPC64ADDCC, 1767 OpPPC64ADDconst: OpPPC64ADDCCconst, 1768 OpPPC64AND: OpPPC64ANDCC, 1769 OpPPC64ANDconst: OpPPC64ANDCCconst, 1770 OpPPC64ANDN: OpPPC64ANDNCC, 1771 OpPPC64CNTLZD: OpPPC64CNTLZDCC, 1772 OpPPC64OR: OpPPC64ORCC, 1773 OpPPC64RLDICL: OpPPC64RLDICLCC, 1774 OpPPC64SUB: OpPPC64SUBCC, 1775 OpPPC64NEG: OpPPC64NEGCC, 1776 OpPPC64NOR: OpPPC64NORCC, 1777 OpPPC64XOR: OpPPC64XORCC, 1778 } 1779 b := op.Block 1780 opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt) 1781 opCC.AddArgs(op.Args...) 1782 op.reset(OpSelect0) 1783 op.AddArgs(opCC) 1784 return op 1785} 1786 1787// Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0. 1788func convertPPC64RldiclAndccconst(sauxint int64) int64 { 1789 r, _, _, mask := DecodePPC64RotateMask(sauxint) 1790 if r != 0 || mask&0xFFFF != mask { 1791 return 0 1792 } 1793 return int64(mask) 1794} 1795 1796// Convenience function to rotate a 32 bit constant value by another constant. 1797func rotateLeft32(v, rotate int64) int64 { 1798 return int64(bits.RotateLeft32(uint32(v), int(rotate))) 1799} 1800 1801func rotateRight64(v, rotate int64) int64 { 1802 return int64(bits.RotateLeft64(uint64(v), int(-rotate))) 1803} 1804 1805// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format. 1806func armBFAuxInt(lsb, width int64) arm64BitField { 1807 if lsb < 0 || lsb > 63 { 1808 panic("ARM(64) bit field lsb constant out of range") 1809 } 1810 if width < 1 || lsb+width > 64 { 1811 panic("ARM(64) bit field width constant out of range") 1812 } 1813 return arm64BitField(width | lsb<<8) 1814} 1815 1816// returns the lsb part of the auxInt field of arm64 bitfield ops. 1817func (bfc arm64BitField) getARM64BFlsb() int64 { 1818 return int64(uint64(bfc) >> 8) 1819} 1820 1821// returns the width part of the auxInt field of arm64 bitfield ops. 1822func (bfc arm64BitField) getARM64BFwidth() int64 { 1823 return int64(bfc) & 0xff 1824} 1825 1826// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask. 1827func isARM64BFMask(lsb, mask, rshift int64) bool { 1828 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1829 return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64 1830} 1831 1832// returns the bitfield width of mask >> rshift for arm64 bitfield ops. 1833func arm64BFWidth(mask, rshift int64) int64 { 1834 shiftedMask := int64(uint64(mask) >> uint64(rshift)) 1835 if shiftedMask == 0 { 1836 panic("ARM64 BF mask is zero") 1837 } 1838 return nto(shiftedMask) 1839} 1840 1841// sizeof returns the size of t in bytes. 1842// It will panic if t is not a *types.Type. 1843func sizeof(t interface{}) int64 { 1844 return t.(*types.Type).Size() 1845} 1846 1847// registerizable reports whether t is a primitive type that fits in 1848// a register. It assumes float64 values will always fit into registers 1849// even if that isn't strictly true. 1850func registerizable(b *Block, typ *types.Type) bool { 1851 if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() { 1852 return true 1853 } 1854 if typ.IsInteger() { 1855 return typ.Size() <= b.Func.Config.RegSize 1856 } 1857 return false 1858} 1859 1860// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed. 1861func needRaceCleanup(sym *AuxCall, v *Value) bool { 1862 f := v.Block.Func 1863 if !f.Config.Race { 1864 return false 1865 } 1866 if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") { 1867 return false 1868 } 1869 for _, b := range f.Blocks { 1870 for _, v := range b.Values { 1871 switch v.Op { 1872 case OpStaticCall, OpStaticLECall: 1873 // Check for racefuncenter will encounter racefuncexit and vice versa. 1874 // Allow calls to panic* 1875 s := v.Aux.(*AuxCall).Fn.String() 1876 switch s { 1877 case "runtime.racefuncenter", "runtime.racefuncexit", 1878 "runtime.panicdivide", "runtime.panicwrap", 1879 "runtime.panicshift": 1880 continue 1881 } 1882 // If we encountered any call, we need to keep racefunc*, 1883 // for accurate stacktraces. 1884 return false 1885 case OpPanicBounds, OpPanicExtend: 1886 // Note: these are panic generators that are ok (like the static calls above). 1887 case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall: 1888 // We must keep the race functions if there are any other call types. 1889 return false 1890 } 1891 } 1892 } 1893 if isSameCall(sym, "runtime.racefuncenter") { 1894 // TODO REGISTER ABI this needs to be cleaned up. 1895 // If we're removing racefuncenter, remove its argument as well. 1896 if v.Args[0].Op != OpStore { 1897 if v.Op == OpStaticLECall { 1898 // there is no store, yet. 1899 return true 1900 } 1901 return false 1902 } 1903 mem := v.Args[0].Args[2] 1904 v.Args[0].reset(OpCopy) 1905 v.Args[0].AddArg(mem) 1906 } 1907 return true 1908} 1909 1910// symIsRO reports whether sym is a read-only global. 1911func symIsRO(sym interface{}) bool { 1912 lsym := sym.(*obj.LSym) 1913 return lsym.Type == objabi.SRODATA && len(lsym.R) == 0 1914} 1915 1916// symIsROZero reports whether sym is a read-only global whose data contains all zeros. 1917func symIsROZero(sym Sym) bool { 1918 lsym := sym.(*obj.LSym) 1919 if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 { 1920 return false 1921 } 1922 for _, b := range lsym.P { 1923 if b != 0 { 1924 return false 1925 } 1926 } 1927 return true 1928} 1929 1930// isFixed32 returns true if the int32 at offset off in symbol sym 1931// is known and constant. 1932func isFixed32(c *Config, sym Sym, off int64) bool { 1933 return isFixed(c, sym, off, 4) 1934} 1935 1936// isFixed returns true if the range [off,off+size] of the symbol sym 1937// is known and constant. 1938func isFixed(c *Config, sym Sym, off, size int64) bool { 1939 lsym := sym.(*obj.LSym) 1940 if lsym.Extra == nil { 1941 return false 1942 } 1943 if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok { 1944 if off == 2*c.PtrSize && size == 4 { 1945 return true // type hash field 1946 } 1947 } 1948 return false 1949} 1950func fixed32(c *Config, sym Sym, off int64) int32 { 1951 lsym := sym.(*obj.LSym) 1952 if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok { 1953 if off == 2*c.PtrSize { 1954 return int32(types.TypeHash(ti.Type.(*types.Type))) 1955 } 1956 } 1957 base.Fatalf("fixed32 data not known for %s:%d", sym, off) 1958 return 0 1959} 1960 1961// isFixedSym returns true if the contents of sym at the given offset 1962// is known and is the constant address of another symbol. 1963func isFixedSym(sym Sym, off int64) bool { 1964 lsym := sym.(*obj.LSym) 1965 switch { 1966 case lsym.Type == objabi.SRODATA: 1967 // itabs, dictionaries 1968 default: 1969 return false 1970 } 1971 for _, r := range lsym.R { 1972 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 { 1973 return true 1974 } 1975 } 1976 return false 1977} 1978func fixedSym(f *Func, sym Sym, off int64) Sym { 1979 lsym := sym.(*obj.LSym) 1980 for _, r := range lsym.R { 1981 if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off { 1982 if strings.HasPrefix(r.Sym.Name, "type:") { 1983 // In case we're loading a type out of a dictionary, we need to record 1984 // that the containing function might put that type in an interface. 1985 // That information is currently recorded in relocations in the dictionary, 1986 // but if we perform this load at compile time then the dictionary 1987 // might be dead. 1988 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym()) 1989 } else if strings.HasPrefix(r.Sym.Name, "go:itab") { 1990 // Same, but if we're using an itab we need to record that the 1991 // itab._type might be put in an interface. 1992 reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym()) 1993 } 1994 return r.Sym 1995 } 1996 } 1997 base.Fatalf("fixedSym data not known for %s:%d", sym, off) 1998 return nil 1999} 2000 2001// read8 reads one byte from the read-only global sym at offset off. 2002func read8(sym interface{}, off int64) uint8 { 2003 lsym := sym.(*obj.LSym) 2004 if off >= int64(len(lsym.P)) || off < 0 { 2005 // Invalid index into the global sym. 2006 // This can happen in dead code, so we don't want to panic. 2007 // Just return any value, it will eventually get ignored. 2008 // See issue 29215. 2009 return 0 2010 } 2011 return lsym.P[off] 2012} 2013 2014// read16 reads two bytes from the read-only global sym at offset off. 2015func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 { 2016 lsym := sym.(*obj.LSym) 2017 // lsym.P is written lazily. 2018 // Bytes requested after the end of lsym.P are 0. 2019 var src []byte 2020 if 0 <= off && off < int64(len(lsym.P)) { 2021 src = lsym.P[off:] 2022 } 2023 buf := make([]byte, 2) 2024 copy(buf, src) 2025 return byteorder.Uint16(buf) 2026} 2027 2028// read32 reads four bytes from the read-only global sym at offset off. 2029func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 { 2030 lsym := sym.(*obj.LSym) 2031 var src []byte 2032 if 0 <= off && off < int64(len(lsym.P)) { 2033 src = lsym.P[off:] 2034 } 2035 buf := make([]byte, 4) 2036 copy(buf, src) 2037 return byteorder.Uint32(buf) 2038} 2039 2040// read64 reads eight bytes from the read-only global sym at offset off. 2041func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 { 2042 lsym := sym.(*obj.LSym) 2043 var src []byte 2044 if 0 <= off && off < int64(len(lsym.P)) { 2045 src = lsym.P[off:] 2046 } 2047 buf := make([]byte, 8) 2048 copy(buf, src) 2049 return byteorder.Uint64(buf) 2050} 2051 2052// sequentialAddresses reports true if it can prove that x + n == y 2053func sequentialAddresses(x, y *Value, n int64) bool { 2054 if x == y && n == 0 { 2055 return true 2056 } 2057 if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil && 2058 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 2059 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 2060 return true 2061 } 2062 if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && 2063 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 2064 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 2065 return true 2066 } 2067 if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil && 2068 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 2069 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 2070 return true 2071 } 2072 if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && 2073 (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || 2074 x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { 2075 return true 2076 } 2077 return false 2078} 2079 2080// flagConstant represents the result of a compile-time comparison. 2081// The sense of these flags does not necessarily represent the hardware's notion 2082// of a flags register - these are just a compile-time construct. 2083// We happen to match the semantics to those of arm/arm64. 2084// Note that these semantics differ from x86: the carry flag has the opposite 2085// sense on a subtraction! 2086// 2087// On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C. 2088// On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C. 2089// (because it does x + ^y + C). 2090// 2091// See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag 2092type flagConstant uint8 2093 2094// N reports whether the result of an operation is negative (high bit set). 2095func (fc flagConstant) N() bool { 2096 return fc&1 != 0 2097} 2098 2099// Z reports whether the result of an operation is 0. 2100func (fc flagConstant) Z() bool { 2101 return fc&2 != 0 2102} 2103 2104// C reports whether an unsigned add overflowed (carry), or an 2105// unsigned subtract did not underflow (borrow). 2106func (fc flagConstant) C() bool { 2107 return fc&4 != 0 2108} 2109 2110// V reports whether a signed operation overflowed or underflowed. 2111func (fc flagConstant) V() bool { 2112 return fc&8 != 0 2113} 2114 2115func (fc flagConstant) eq() bool { 2116 return fc.Z() 2117} 2118func (fc flagConstant) ne() bool { 2119 return !fc.Z() 2120} 2121func (fc flagConstant) lt() bool { 2122 return fc.N() != fc.V() 2123} 2124func (fc flagConstant) le() bool { 2125 return fc.Z() || fc.lt() 2126} 2127func (fc flagConstant) gt() bool { 2128 return !fc.Z() && fc.ge() 2129} 2130func (fc flagConstant) ge() bool { 2131 return fc.N() == fc.V() 2132} 2133func (fc flagConstant) ult() bool { 2134 return !fc.C() 2135} 2136func (fc flagConstant) ule() bool { 2137 return fc.Z() || fc.ult() 2138} 2139func (fc flagConstant) ugt() bool { 2140 return !fc.Z() && fc.uge() 2141} 2142func (fc flagConstant) uge() bool { 2143 return fc.C() 2144} 2145 2146func (fc flagConstant) ltNoov() bool { 2147 return fc.lt() && !fc.V() 2148} 2149func (fc flagConstant) leNoov() bool { 2150 return fc.le() && !fc.V() 2151} 2152func (fc flagConstant) gtNoov() bool { 2153 return fc.gt() && !fc.V() 2154} 2155func (fc flagConstant) geNoov() bool { 2156 return fc.ge() && !fc.V() 2157} 2158 2159func (fc flagConstant) String() string { 2160 return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V()) 2161} 2162 2163type flagConstantBuilder struct { 2164 N bool 2165 Z bool 2166 C bool 2167 V bool 2168} 2169 2170func (fcs flagConstantBuilder) encode() flagConstant { 2171 var fc flagConstant 2172 if fcs.N { 2173 fc |= 1 2174 } 2175 if fcs.Z { 2176 fc |= 2 2177 } 2178 if fcs.C { 2179 fc |= 4 2180 } 2181 if fcs.V { 2182 fc |= 8 2183 } 2184 return fc 2185} 2186 2187// Note: addFlags(x,y) != subFlags(x,-y) in some situations: 2188// - the results of the C flag are different 2189// - the results of the V flag when y==minint are different 2190 2191// addFlags64 returns the flags that would be set from computing x+y. 2192func addFlags64(x, y int64) flagConstant { 2193 var fcb flagConstantBuilder 2194 fcb.Z = x+y == 0 2195 fcb.N = x+y < 0 2196 fcb.C = uint64(x+y) < uint64(x) 2197 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 2198 return fcb.encode() 2199} 2200 2201// subFlags64 returns the flags that would be set from computing x-y. 2202func subFlags64(x, y int64) flagConstant { 2203 var fcb flagConstantBuilder 2204 fcb.Z = x-y == 0 2205 fcb.N = x-y < 0 2206 fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model. 2207 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 2208 return fcb.encode() 2209} 2210 2211// addFlags32 returns the flags that would be set from computing x+y. 2212func addFlags32(x, y int32) flagConstant { 2213 var fcb flagConstantBuilder 2214 fcb.Z = x+y == 0 2215 fcb.N = x+y < 0 2216 fcb.C = uint32(x+y) < uint32(x) 2217 fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0 2218 return fcb.encode() 2219} 2220 2221// subFlags32 returns the flags that would be set from computing x-y. 2222func subFlags32(x, y int32) flagConstant { 2223 var fcb flagConstantBuilder 2224 fcb.Z = x-y == 0 2225 fcb.N = x-y < 0 2226 fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model. 2227 fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0 2228 return fcb.encode() 2229} 2230 2231// logicFlags64 returns flags set to the sign/zeroness of x. 2232// C and V are set to false. 2233func logicFlags64(x int64) flagConstant { 2234 var fcb flagConstantBuilder 2235 fcb.Z = x == 0 2236 fcb.N = x < 0 2237 return fcb.encode() 2238} 2239 2240// logicFlags32 returns flags set to the sign/zeroness of x. 2241// C and V are set to false. 2242func logicFlags32(x int32) flagConstant { 2243 var fcb flagConstantBuilder 2244 fcb.Z = x == 0 2245 fcb.N = x < 0 2246 return fcb.encode() 2247} 2248 2249func makeJumpTableSym(b *Block) *obj.LSym { 2250 s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID)) 2251 // The jump table symbol is accessed only from the function symbol. 2252 s.Set(obj.AttrStatic, true) 2253 return s 2254} 2255 2256// canRotate reports whether the architecture supports 2257// rotates of integer registers with the given number of bits. 2258func canRotate(c *Config, bits int64) bool { 2259 if bits > c.PtrSize*8 { 2260 // Don't rewrite to rotates bigger than the machine word. 2261 return false 2262 } 2263 switch c.arch { 2264 case "386", "amd64", "arm64", "riscv64": 2265 return true 2266 case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64": 2267 return bits >= 32 2268 default: 2269 return false 2270 } 2271} 2272 2273// isARM64bitcon reports whether a constant can be encoded into a logical instruction. 2274func isARM64bitcon(x uint64) bool { 2275 if x == 1<<64-1 || x == 0 { 2276 return false 2277 } 2278 // determine the period and sign-extend a unit to 64 bits 2279 switch { 2280 case x != x>>32|x<<32: 2281 // period is 64 2282 // nothing to do 2283 case x != x>>16|x<<48: 2284 // period is 32 2285 x = uint64(int64(int32(x))) 2286 case x != x>>8|x<<56: 2287 // period is 16 2288 x = uint64(int64(int16(x))) 2289 case x != x>>4|x<<60: 2290 // period is 8 2291 x = uint64(int64(int8(x))) 2292 default: 2293 // period is 4 or 2, always true 2294 // 0001, 0010, 0100, 1000 -- 0001 rotate 2295 // 0011, 0110, 1100, 1001 -- 0011 rotate 2296 // 0111, 1011, 1101, 1110 -- 0111 rotate 2297 // 0101, 1010 -- 01 rotate, repeat 2298 return true 2299 } 2300 return sequenceOfOnes(x) || sequenceOfOnes(^x) 2301} 2302 2303// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros. 2304func sequenceOfOnes(x uint64) bool { 2305 y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2 2306 y += x 2307 return (y-1)&y == 0 2308} 2309 2310// isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction. 2311func isARM64addcon(v int64) bool { 2312 /* uimm12 or uimm24? */ 2313 if v < 0 { 2314 return false 2315 } 2316 if (v & 0xFFF) == 0 { 2317 v >>= 12 2318 } 2319 return v <= 0xFFF 2320} 2321 2322// setPos sets the position of v to pos, then returns true. 2323// Useful for setting the result of a rewrite's position to 2324// something other than the default. 2325func setPos(v *Value, pos src.XPos) bool { 2326 v.Pos = pos 2327 return true 2328} 2329