1// Inferno utils/6l/span.c 2// https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c 3// 4// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. 5// Portions Copyright © 1995-1997 C H Forsyth ([email protected]) 6// Portions Copyright © 1997-1999 Vita Nuova Limited 7// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8// Portions Copyright © 2004,2006 Bruce Ellis 9// Portions Copyright © 2005-2007 C H Forsyth ([email protected]) 10// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others 11// Portions Copyright © 2009 The Go Authors. All rights reserved. 12// 13// Permission is hereby granted, free of charge, to any person obtaining a copy 14// of this software and associated documentation files (the "Software"), to deal 15// in the Software without restriction, including without limitation the rights 16// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17// copies of the Software, and to permit persons to whom the Software is 18// furnished to do so, subject to the following conditions: 19// 20// The above copyright notice and this permission notice shall be included in 21// all copies or substantial portions of the Software. 22// 23// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29// THE SOFTWARE. 30 31package x86 32 33import ( 34 "cmd/internal/obj" 35 "cmd/internal/objabi" 36 "cmd/internal/sys" 37 "encoding/binary" 38 "fmt" 39 "internal/buildcfg" 40 "log" 41 "strings" 42) 43 44var ( 45 plan9privates *obj.LSym 46) 47 48// Instruction layout. 49 50// Loop alignment constants: 51// want to align loop entry to loopAlign-byte boundary, 52// and willing to insert at most maxLoopPad bytes of NOP to do so. 53// We define a loop entry as the target of a backward jump. 54// 55// gcc uses maxLoopPad = 10 for its 'generic x86-64' config, 56// and it aligns all jump targets, not just backward jump targets. 57// 58// As of 6/1/2012, the effect of setting maxLoopPad = 10 here 59// is very slight but negative, so the alignment is disabled by 60// setting MaxLoopPad = 0. The code is here for reference and 61// for future experiments. 62const ( 63 loopAlign = 16 64 maxLoopPad = 0 65) 66 67// Bit flags that are used to express jump target properties. 68const ( 69 // branchBackwards marks targets that are located behind. 70 // Used to express jumps to loop headers. 71 branchBackwards = (1 << iota) 72 // branchShort marks branches those target is close, 73 // with offset is in -128..127 range. 74 branchShort 75 // branchLoopHead marks loop entry. 76 // Used to insert padding for misaligned loops. 77 branchLoopHead 78) 79 80// opBytes holds optab encoding bytes. 81// Each ytab reserves fixed amount of bytes in this array. 82// 83// The size should be the minimal number of bytes that 84// are enough to hold biggest optab op lines. 85type opBytes [31]uint8 86 87type Optab struct { 88 as obj.As 89 ytab []ytab 90 prefix uint8 91 op opBytes 92} 93 94type movtab struct { 95 as obj.As 96 ft uint8 97 f3t uint8 98 tt uint8 99 code uint8 100 op [4]uint8 101} 102 103const ( 104 Yxxx = iota 105 Ynone 106 Yi0 // $0 107 Yi1 // $1 108 Yu2 // $x, x fits in uint2 109 Yi8 // $x, x fits in int8 110 Yu8 // $x, x fits in uint8 111 Yu7 // $x, x in 0..127 (fits in both int8 and uint8) 112 Ys32 113 Yi32 114 Yi64 115 Yiauto 116 Yal 117 Ycl 118 Yax 119 Ycx 120 Yrb 121 Yrl 122 Yrl32 // Yrl on 32-bit system 123 Yrf 124 Yf0 125 Yrx 126 Ymb 127 Yml 128 Ym 129 Ybr 130 Ycs 131 Yss 132 Yds 133 Yes 134 Yfs 135 Ygs 136 Ygdtr 137 Yidtr 138 Yldtr 139 Ymsw 140 Ytask 141 Ycr0 142 Ycr1 143 Ycr2 144 Ycr3 145 Ycr4 146 Ycr5 147 Ycr6 148 Ycr7 149 Ycr8 150 Ydr0 151 Ydr1 152 Ydr2 153 Ydr3 154 Ydr4 155 Ydr5 156 Ydr6 157 Ydr7 158 Ytr0 159 Ytr1 160 Ytr2 161 Ytr3 162 Ytr4 163 Ytr5 164 Ytr6 165 Ytr7 166 Ymr 167 Ymm 168 Yxr0 // X0 only. "<XMM0>" notation in Intel manual. 169 YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex 170 Yxr // X0..X15 171 YxrEvex // X0..X31 172 Yxm 173 YxmEvex // YxrEvex+Ym 174 Yxvm // VSIB vector array; vm32x/vm64x 175 YxvmEvex // Yxvm which permits High-16 X register as index. 176 YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex 177 Yyr // Y0..Y15 178 YyrEvex // Y0..Y31 179 Yym 180 YymEvex // YyrEvex+Ym 181 Yyvm // VSIB vector array; vm32y/vm64y 182 YyvmEvex // Yyvm which permits High-16 Y register as index. 183 YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex 184 Yzr // Z0..Z31 185 Yzm // Yzr+Ym 186 Yzvm // VSIB vector array; vm32z/vm64z 187 Yk0 // K0 188 Yknot0 // K1..K7; write mask 189 Yk // K0..K7; used for KOP 190 Ykm // Yk+Ym; used for KOP 191 Ytls 192 Ytextsize 193 Yindir 194 Ymax 195) 196 197const ( 198 Zxxx = iota 199 Zlit 200 Zlitm_r 201 Zlitr_m 202 Zlit_m_r 203 Z_rp 204 Zbr 205 Zcall 206 Zcallcon 207 Zcallduff 208 Zcallind 209 Zcallindreg 210 Zib_ 211 Zib_rp 212 Zibo_m 213 Zibo_m_xm 214 Zil_ 215 Zil_rp 216 Ziq_rp 217 Zilo_m 218 Zjmp 219 Zjmpcon 220 Zloop 221 Zo_iw 222 Zm_o 223 Zm_r 224 Z_m_r 225 Zm2_r 226 Zm_r_xm 227 Zm_r_i_xm 228 Zm_r_xm_nr 229 Zr_m_xm_nr 230 Zibm_r // mmx1,mmx2/mem64,imm8 231 Zibr_m 232 Zmb_r 233 Zaut_r 234 Zo_m 235 Zo_m64 236 Zpseudo 237 Zr_m 238 Zr_m_xm 239 Zrp_ 240 Z_ib 241 Z_il 242 Zm_ibo 243 Zm_ilo 244 Zib_rr 245 Zil_rr 246 Zbyte 247 248 Zvex_rm_v_r 249 Zvex_rm_v_ro 250 Zvex_r_v_rm 251 Zvex_i_rm_vo 252 Zvex_v_rm_r 253 Zvex_i_rm_r 254 Zvex_i_r_v 255 Zvex_i_rm_v_r 256 Zvex 257 Zvex_rm_r_vo 258 Zvex_i_r_rm 259 Zvex_hr_rm_v_r 260 261 Zevex_first 262 Zevex_i_r_k_rm 263 Zevex_i_r_rm 264 Zevex_i_rm_k_r 265 Zevex_i_rm_k_vo 266 Zevex_i_rm_r 267 Zevex_i_rm_v_k_r 268 Zevex_i_rm_v_r 269 Zevex_i_rm_vo 270 Zevex_k_rmo 271 Zevex_r_k_rm 272 Zevex_r_v_k_rm 273 Zevex_r_v_rm 274 Zevex_rm_k_r 275 Zevex_rm_v_k_r 276 Zevex_rm_v_r 277 Zevex_last 278 279 Zmax 280) 281 282const ( 283 Px = 0 284 Px1 = 1 // symbolic; exact value doesn't matter 285 P32 = 0x32 // 32-bit only 286 Pe = 0x66 // operand escape 287 Pm = 0x0f // 2byte opcode escape 288 Pq = 0xff // both escapes: 66 0f 289 Pb = 0xfe // byte operands 290 Pf2 = 0xf2 // xmm escape 1: f2 0f 291 Pf3 = 0xf3 // xmm escape 2: f3 0f 292 Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f 293 Pq3 = 0x67 // xmm escape 3: 66 48 0f 294 Pq4 = 0x68 // xmm escape 4: 66 0F 38 295 Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38 296 Pq5 = 0x6a // xmm escape 5: F3 0F 38 297 Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38 298 Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f 299 Pw = 0x48 // Rex.w 300 Pw8 = 0x90 // symbolic; exact value doesn't matter 301 Py = 0x80 // defaults to 64-bit mode 302 Py1 = 0x81 // symbolic; exact value doesn't matter 303 Py3 = 0x83 // symbolic; exact value doesn't matter 304 Pavx = 0x84 // symbolic; exact value doesn't matter 305 306 RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R 307 Rxw = 1 << 3 // =1, 64-bit operand size 308 Rxr = 1 << 2 // extend modrm reg 309 Rxx = 1 << 1 // extend sib index 310 Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg 311) 312 313const ( 314 // Encoding for VEX prefix in tables. 315 // The P, L, and W fields are chosen to match 316 // their eventual locations in the VEX prefix bytes. 317 318 // Encoding for VEX prefix in tables. 319 // The P, L, and W fields are chosen to match 320 // their eventual locations in the VEX prefix bytes. 321 322 // Using spare bit to make leading [E]VEX encoding byte different from 323 // 0x0f even if all other VEX fields are 0. 324 avxEscape = 1 << 6 325 326 // P field - 2 bits 327 vex66 = 1 << 0 328 vexF3 = 2 << 0 329 vexF2 = 3 << 0 330 // L field - 1 bit 331 vexLZ = 0 << 2 332 vexLIG = 0 << 2 333 vex128 = 0 << 2 334 vex256 = 1 << 2 335 // W field - 1 bit 336 vexWIG = 0 << 7 337 vexW0 = 0 << 7 338 vexW1 = 1 << 7 339 // M field - 5 bits, but mostly reserved; we can store up to 3 340 vex0F = 1 << 3 341 vex0F38 = 2 << 3 342 vex0F3A = 3 << 3 343) 344 345var ycover [Ymax * Ymax]uint8 346 347var reg [MAXREG]int 348 349var regrex [MAXREG + 1]int 350 351var ynone = []ytab{ 352 {Zlit, 1, argList{}}, 353} 354 355var ytext = []ytab{ 356 {Zpseudo, 0, argList{Ymb, Ytextsize}}, 357 {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}}, 358} 359 360var ynop = []ytab{ 361 {Zpseudo, 0, argList{}}, 362 {Zpseudo, 0, argList{Yiauto}}, 363 {Zpseudo, 0, argList{Yml}}, 364 {Zpseudo, 0, argList{Yrf}}, 365 {Zpseudo, 0, argList{Yxr}}, 366 {Zpseudo, 0, argList{Yiauto}}, 367 {Zpseudo, 0, argList{Yml}}, 368 {Zpseudo, 0, argList{Yrf}}, 369 {Zpseudo, 1, argList{Yxr}}, 370} 371 372var yfuncdata = []ytab{ 373 {Zpseudo, 0, argList{Yi32, Ym}}, 374} 375 376var ypcdata = []ytab{ 377 {Zpseudo, 0, argList{Yi32, Yi32}}, 378} 379 380var yxorb = []ytab{ 381 {Zib_, 1, argList{Yi32, Yal}}, 382 {Zibo_m, 2, argList{Yi32, Ymb}}, 383 {Zr_m, 1, argList{Yrb, Ymb}}, 384 {Zm_r, 1, argList{Ymb, Yrb}}, 385} 386 387var yaddl = []ytab{ 388 {Zibo_m, 2, argList{Yi8, Yml}}, 389 {Zil_, 1, argList{Yi32, Yax}}, 390 {Zilo_m, 2, argList{Yi32, Yml}}, 391 {Zr_m, 1, argList{Yrl, Yml}}, 392 {Zm_r, 1, argList{Yml, Yrl}}, 393} 394 395var yincl = []ytab{ 396 {Z_rp, 1, argList{Yrl}}, 397 {Zo_m, 2, argList{Yml}}, 398} 399 400var yincq = []ytab{ 401 {Zo_m, 2, argList{Yml}}, 402} 403 404var ycmpb = []ytab{ 405 {Z_ib, 1, argList{Yal, Yi32}}, 406 {Zm_ibo, 2, argList{Ymb, Yi32}}, 407 {Zm_r, 1, argList{Ymb, Yrb}}, 408 {Zr_m, 1, argList{Yrb, Ymb}}, 409} 410 411var ycmpl = []ytab{ 412 {Zm_ibo, 2, argList{Yml, Yi8}}, 413 {Z_il, 1, argList{Yax, Yi32}}, 414 {Zm_ilo, 2, argList{Yml, Yi32}}, 415 {Zm_r, 1, argList{Yml, Yrl}}, 416 {Zr_m, 1, argList{Yrl, Yml}}, 417} 418 419var yshb = []ytab{ 420 {Zo_m, 2, argList{Yi1, Ymb}}, 421 {Zibo_m, 2, argList{Yu8, Ymb}}, 422 {Zo_m, 2, argList{Ycx, Ymb}}, 423} 424 425var yshl = []ytab{ 426 {Zo_m, 2, argList{Yi1, Yml}}, 427 {Zibo_m, 2, argList{Yu8, Yml}}, 428 {Zo_m, 2, argList{Ycl, Yml}}, 429 {Zo_m, 2, argList{Ycx, Yml}}, 430} 431 432var ytestl = []ytab{ 433 {Zil_, 1, argList{Yi32, Yax}}, 434 {Zilo_m, 2, argList{Yi32, Yml}}, 435 {Zr_m, 1, argList{Yrl, Yml}}, 436 {Zm_r, 1, argList{Yml, Yrl}}, 437} 438 439var ymovb = []ytab{ 440 {Zr_m, 1, argList{Yrb, Ymb}}, 441 {Zm_r, 1, argList{Ymb, Yrb}}, 442 {Zib_rp, 1, argList{Yi32, Yrb}}, 443 {Zibo_m, 2, argList{Yi32, Ymb}}, 444} 445 446var ybtl = []ytab{ 447 {Zibo_m, 2, argList{Yi8, Yml}}, 448 {Zr_m, 1, argList{Yrl, Yml}}, 449} 450 451var ymovw = []ytab{ 452 {Zr_m, 1, argList{Yrl, Yml}}, 453 {Zm_r, 1, argList{Yml, Yrl}}, 454 {Zil_rp, 1, argList{Yi32, Yrl}}, 455 {Zilo_m, 2, argList{Yi32, Yml}}, 456 {Zaut_r, 2, argList{Yiauto, Yrl}}, 457} 458 459var ymovl = []ytab{ 460 {Zr_m, 1, argList{Yrl, Yml}}, 461 {Zm_r, 1, argList{Yml, Yrl}}, 462 {Zil_rp, 1, argList{Yi32, Yrl}}, 463 {Zilo_m, 2, argList{Yi32, Yml}}, 464 {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD 465 {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD 466 {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit) 467 {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit) 468 {Zaut_r, 2, argList{Yiauto, Yrl}}, 469} 470 471var yret = []ytab{ 472 {Zo_iw, 1, argList{}}, 473 {Zo_iw, 1, argList{Yi32}}, 474} 475 476var ymovq = []ytab{ 477 // valid in 32-bit mode 478 {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding) 479 {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ 480 {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q 481 {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2 482 {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64 483 484 // valid only in 64-bit mode, usually with 64-bit prefix 485 {Zr_m, 1, argList{Yrl, Yml}}, // 0x89 486 {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b 487 {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0) 488 {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate 489 {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0) 490 {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD 491 {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD 492 {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load 493 {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store 494 {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ 495} 496 497var ymovbe = []ytab{ 498 {Zlitm_r, 3, argList{Ym, Yrl}}, 499 {Zlitr_m, 3, argList{Yrl, Ym}}, 500} 501 502var ym_rl = []ytab{ 503 {Zm_r, 1, argList{Ym, Yrl}}, 504} 505 506var yrl_m = []ytab{ 507 {Zr_m, 1, argList{Yrl, Ym}}, 508} 509 510var ymb_rl = []ytab{ 511 {Zmb_r, 1, argList{Ymb, Yrl}}, 512} 513 514var yml_rl = []ytab{ 515 {Zm_r, 1, argList{Yml, Yrl}}, 516} 517 518var yrl_ml = []ytab{ 519 {Zr_m, 1, argList{Yrl, Yml}}, 520} 521 522var yml_mb = []ytab{ 523 {Zr_m, 1, argList{Yrb, Ymb}}, 524 {Zm_r, 1, argList{Ymb, Yrb}}, 525} 526 527var yrb_mb = []ytab{ 528 {Zr_m, 1, argList{Yrb, Ymb}}, 529} 530 531var yxchg = []ytab{ 532 {Z_rp, 1, argList{Yax, Yrl}}, 533 {Zrp_, 1, argList{Yrl, Yax}}, 534 {Zr_m, 1, argList{Yrl, Yml}}, 535 {Zm_r, 1, argList{Yml, Yrl}}, 536} 537 538var ydivl = []ytab{ 539 {Zm_o, 2, argList{Yml}}, 540} 541 542var ydivb = []ytab{ 543 {Zm_o, 2, argList{Ymb}}, 544} 545 546var yimul = []ytab{ 547 {Zm_o, 2, argList{Yml}}, 548 {Zib_rr, 1, argList{Yi8, Yrl}}, 549 {Zil_rr, 1, argList{Yi32, Yrl}}, 550 {Zm_r, 2, argList{Yml, Yrl}}, 551} 552 553var yimul3 = []ytab{ 554 {Zibm_r, 2, argList{Yi8, Yml, Yrl}}, 555 {Zibm_r, 2, argList{Yi32, Yml, Yrl}}, 556} 557 558var ybyte = []ytab{ 559 {Zbyte, 1, argList{Yi64}}, 560} 561 562var yin = []ytab{ 563 {Zib_, 1, argList{Yi32}}, 564 {Zlit, 1, argList{}}, 565} 566 567var yint = []ytab{ 568 {Zib_, 1, argList{Yi32}}, 569} 570 571var ypushl = []ytab{ 572 {Zrp_, 1, argList{Yrl}}, 573 {Zm_o, 2, argList{Ym}}, 574 {Zib_, 1, argList{Yi8}}, 575 {Zil_, 1, argList{Yi32}}, 576} 577 578var ypopl = []ytab{ 579 {Z_rp, 1, argList{Yrl}}, 580 {Zo_m, 2, argList{Ym}}, 581} 582 583var ywrfsbase = []ytab{ 584 {Zm_o, 2, argList{Yrl}}, 585} 586 587var yrdrand = []ytab{ 588 {Zo_m, 2, argList{Yrl}}, 589} 590 591var yclflush = []ytab{ 592 {Zo_m, 2, argList{Ym}}, 593} 594 595var ybswap = []ytab{ 596 {Z_rp, 2, argList{Yrl}}, 597} 598 599var yscond = []ytab{ 600 {Zo_m, 2, argList{Ymb}}, 601} 602 603var yjcond = []ytab{ 604 {Zbr, 0, argList{Ybr}}, 605 {Zbr, 0, argList{Yi0, Ybr}}, 606 {Zbr, 1, argList{Yi1, Ybr}}, 607} 608 609var yloop = []ytab{ 610 {Zloop, 1, argList{Ybr}}, 611} 612 613var ycall = []ytab{ 614 {Zcallindreg, 0, argList{Yml}}, 615 {Zcallindreg, 2, argList{Yrx, Yrx}}, 616 {Zcallind, 2, argList{Yindir}}, 617 {Zcall, 0, argList{Ybr}}, 618 {Zcallcon, 1, argList{Yi32}}, 619} 620 621var yduff = []ytab{ 622 {Zcallduff, 1, argList{Yi32}}, 623} 624 625var yjmp = []ytab{ 626 {Zo_m64, 2, argList{Yml}}, 627 {Zjmp, 0, argList{Ybr}}, 628 {Zjmpcon, 1, argList{Yi32}}, 629} 630 631var yfmvd = []ytab{ 632 {Zm_o, 2, argList{Ym, Yf0}}, 633 {Zo_m, 2, argList{Yf0, Ym}}, 634 {Zm_o, 2, argList{Yrf, Yf0}}, 635 {Zo_m, 2, argList{Yf0, Yrf}}, 636} 637 638var yfmvdp = []ytab{ 639 {Zo_m, 2, argList{Yf0, Ym}}, 640 {Zo_m, 2, argList{Yf0, Yrf}}, 641} 642 643var yfmvf = []ytab{ 644 {Zm_o, 2, argList{Ym, Yf0}}, 645 {Zo_m, 2, argList{Yf0, Ym}}, 646} 647 648var yfmvx = []ytab{ 649 {Zm_o, 2, argList{Ym, Yf0}}, 650} 651 652var yfmvp = []ytab{ 653 {Zo_m, 2, argList{Yf0, Ym}}, 654} 655 656var yfcmv = []ytab{ 657 {Zm_o, 2, argList{Yrf, Yf0}}, 658} 659 660var yfadd = []ytab{ 661 {Zm_o, 2, argList{Ym, Yf0}}, 662 {Zm_o, 2, argList{Yrf, Yf0}}, 663 {Zo_m, 2, argList{Yf0, Yrf}}, 664} 665 666var yfxch = []ytab{ 667 {Zo_m, 2, argList{Yf0, Yrf}}, 668 {Zm_o, 2, argList{Yrf, Yf0}}, 669} 670 671var ycompp = []ytab{ 672 {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1 673} 674 675var ystsw = []ytab{ 676 {Zo_m, 2, argList{Ym}}, 677 {Zlit, 1, argList{Yax}}, 678} 679 680var ysvrs_mo = []ytab{ 681 {Zm_o, 2, argList{Ym}}, 682} 683 684// unaryDst version of "ysvrs_mo". 685var ysvrs_om = []ytab{ 686 {Zo_m, 2, argList{Ym}}, 687} 688 689var ymm = []ytab{ 690 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 691 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 692} 693 694var yxm = []ytab{ 695 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 696} 697 698var yxm_q4 = []ytab{ 699 {Zm_r, 1, argList{Yxm, Yxr}}, 700} 701 702var yxcvm1 = []ytab{ 703 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 704 {Zm_r_xm, 2, argList{Yxm, Ymr}}, 705} 706 707var yxcvm2 = []ytab{ 708 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 709 {Zm_r_xm, 2, argList{Ymm, Yxr}}, 710} 711 712var yxr = []ytab{ 713 {Zm_r_xm, 1, argList{Yxr, Yxr}}, 714} 715 716var yxr_ml = []ytab{ 717 {Zr_m_xm, 1, argList{Yxr, Yml}}, 718} 719 720var ymr = []ytab{ 721 {Zm_r, 1, argList{Ymr, Ymr}}, 722} 723 724var ymr_ml = []ytab{ 725 {Zr_m_xm, 1, argList{Ymr, Yml}}, 726} 727 728var yxcmpi = []ytab{ 729 {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}}, 730} 731 732var yxmov = []ytab{ 733 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 734 {Zr_m_xm, 1, argList{Yxr, Yxm}}, 735} 736 737var yxcvfl = []ytab{ 738 {Zm_r_xm, 1, argList{Yxm, Yrl}}, 739} 740 741var yxcvlf = []ytab{ 742 {Zm_r_xm, 1, argList{Yml, Yxr}}, 743} 744 745var yxcvfq = []ytab{ 746 {Zm_r_xm, 2, argList{Yxm, Yrl}}, 747} 748 749var yxcvqf = []ytab{ 750 {Zm_r_xm, 2, argList{Yml, Yxr}}, 751} 752 753var yps = []ytab{ 754 {Zm_r_xm, 1, argList{Ymm, Ymr}}, 755 {Zibo_m_xm, 2, argList{Yi8, Ymr}}, 756 {Zm_r_xm, 2, argList{Yxm, Yxr}}, 757 {Zibo_m_xm, 3, argList{Yi8, Yxr}}, 758} 759 760var yxrrl = []ytab{ 761 {Zm_r, 1, argList{Yxr, Yrl}}, 762} 763 764var ymrxr = []ytab{ 765 {Zm_r, 1, argList{Ymr, Yxr}}, 766 {Zm_r_xm, 1, argList{Yxm, Yxr}}, 767} 768 769var ymshuf = []ytab{ 770 {Zibm_r, 2, argList{Yi8, Ymm, Ymr}}, 771} 772 773var ymshufb = []ytab{ 774 {Zm2_r, 2, argList{Yxm, Yxr}}, 775} 776 777// It should never have more than 1 entry, 778// because some optab entries have opcode sequences that 779// are longer than 2 bytes (zoffset=2 here), 780// ROUNDPD and ROUNDPS and recently added BLENDPD, 781// to name a few. 782var yxshuf = []ytab{ 783 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 784} 785 786var yextrw = []ytab{ 787 {Zibm_r, 2, argList{Yu8, Yxr, Yrl}}, 788 {Zibr_m, 2, argList{Yu8, Yxr, Yml}}, 789} 790 791var yextr = []ytab{ 792 {Zibr_m, 3, argList{Yu8, Yxr, Ymm}}, 793} 794 795var yinsrw = []ytab{ 796 {Zibm_r, 2, argList{Yu8, Yml, Yxr}}, 797} 798 799var yinsr = []ytab{ 800 {Zibm_r, 3, argList{Yu8, Ymm, Yxr}}, 801} 802 803var ypsdq = []ytab{ 804 {Zibo_m, 2, argList{Yi8, Yxr}}, 805} 806 807var ymskb = []ytab{ 808 {Zm_r_xm, 2, argList{Yxr, Yrl}}, 809 {Zm_r_xm, 1, argList{Ymr, Yrl}}, 810} 811 812var ycrc32l = []ytab{ 813 {Zlitm_r, 0, argList{Yml, Yrl}}, 814} 815 816var ycrc32b = []ytab{ 817 {Zlitm_r, 0, argList{Ymb, Yrl}}, 818} 819 820var yprefetch = []ytab{ 821 {Zm_o, 2, argList{Ym}}, 822} 823 824var yaes = []ytab{ 825 {Zlitm_r, 2, argList{Yxm, Yxr}}, 826} 827 828var yxbegin = []ytab{ 829 {Zjmp, 1, argList{Ybr}}, 830} 831 832var yxabort = []ytab{ 833 {Zib_, 1, argList{Yu8}}, 834} 835 836var ylddqu = []ytab{ 837 {Zm_r, 1, argList{Ym, Yxr}}, 838} 839 840var ypalignr = []ytab{ 841 {Zibm_r, 2, argList{Yu8, Yxm, Yxr}}, 842} 843 844var ysha256rnds2 = []ytab{ 845 {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}}, 846} 847 848var yblendvpd = []ytab{ 849 {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}}, 850} 851 852var ymmxmm0f38 = []ytab{ 853 {Zlitm_r, 3, argList{Ymm, Ymr}}, 854 {Zlitm_r, 5, argList{Yxm, Yxr}}, 855} 856 857var yextractps = []ytab{ 858 {Zibr_m, 2, argList{Yu2, Yxr, Yml}}, 859} 860 861var ysha1rnds4 = []ytab{ 862 {Zibm_r, 2, argList{Yu2, Yxm, Yxr}}, 863} 864 865// You are doasm, holding in your hand a *obj.Prog with p.As set to, say, 866// ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab 867// to find the entry with the given p.As and then looks through the ytable for 868// that instruction (the second field in the optab struct) for a line whose 869// first two values match the Ytypes of the p.From and p.To operands. The 870// function oclass computes the specific Ytype of an operand and then the set 871// of more general Ytypes that it satisfies is implied by the ycover table, set 872// up in instinit. For example, oclass distinguishes the constants 0 and 1 873// from the more general 8-bit constants, but instinit says 874// 875// ycover[Yi0*Ymax+Ys32] = 1 876// ycover[Yi1*Ymax+Ys32] = 1 877// ycover[Yi8*Ymax+Ys32] = 1 878// 879// which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32) 880// if that's what an instruction can handle. 881// 882// In parallel with the scan through the ytable for the appropriate line, there 883// is a z pointer that starts out pointing at the strange magic byte list in 884// the Optab struct. With each step past a non-matching ytable line, z 885// advances by the 4th entry in the line. When a matching line is found, that 886// z pointer has the extra data to use in laying down the instruction bytes. 887// The actual bytes laid down are a function of the 3rd entry in the line (that 888// is, the Ztype) and the z bytes. 889// 890// For example, let's look at AADDL. The optab line says: 891// 892// {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 893// 894// and yaddl says 895// 896// var yaddl = []ytab{ 897// {Yi8, Ynone, Yml, Zibo_m, 2}, 898// {Yi32, Ynone, Yax, Zil_, 1}, 899// {Yi32, Ynone, Yml, Zilo_m, 2}, 900// {Yrl, Ynone, Yml, Zr_m, 1}, 901// {Yml, Ynone, Yrl, Zm_r, 1}, 902// } 903// 904// so there are 5 possible types of ADDL instruction that can be laid down, and 905// possible states used to lay them down (Ztype and z pointer, assuming z 906// points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are: 907// 908// Yi8, Yml -> Zibo_m, z (0x83, 00) 909// Yi32, Yax -> Zil_, z+2 (0x05) 910// Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00) 911// Yrl, Yml -> Zr_m, z+2+1+2 (0x01) 912// Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03) 913// 914// The Pconstant in the optab line controls the prefix bytes to emit. That's 915// relatively straightforward as this program goes. 916// 917// The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for 918// example, is an opcode byte (z[0]) then an asmando (which is some kind of 919// encoded addressing mode for the Yml arg), and then a single immediate byte. 920// Zilo_m is the same but a long (32-bit) immediate. 921var optab = 922// as, ytab, andproto, opcode 923[...]Optab{ 924 {obj.AXXX, nil, 0, opBytes{}}, 925 {AAAA, ynone, P32, opBytes{0x37}}, 926 {AAAD, ynone, P32, opBytes{0xd5, 0x0a}}, 927 {AAAM, ynone, P32, opBytes{0xd4, 0x0a}}, 928 {AAAS, ynone, P32, opBytes{0x3f}}, 929 {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}}, 930 {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 931 {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 932 {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}}, 933 {AADCXL, yml_rl, Pq4, opBytes{0xf6}}, 934 {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}}, 935 {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}}, 936 {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 937 {AADDPD, yxm, Pq, opBytes{0x58}}, 938 {AADDPS, yxm, Pm, opBytes{0x58}}, 939 {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 940 {AADDSD, yxm, Pf2, opBytes{0x58}}, 941 {AADDSS, yxm, Pf3, opBytes{0x58}}, 942 {AADDSUBPD, yxm, Pq, opBytes{0xd0}}, 943 {AADDSUBPS, yxm, Pf2, opBytes{0xd0}}, 944 {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}}, 945 {AADOXL, yml_rl, Pq5, opBytes{0xf6}}, 946 {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}}, 947 {AADJSP, nil, 0, opBytes{}}, 948 {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}}, 949 {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 950 {AANDNPD, yxm, Pq, opBytes{0x55}}, 951 {AANDNPS, yxm, Pm, opBytes{0x55}}, 952 {AANDPD, yxm, Pq, opBytes{0x54}}, 953 {AANDPS, yxm, Pm, opBytes{0x54}}, 954 {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 955 {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}}, 956 {AARPL, yrl_ml, P32, opBytes{0x63}}, 957 {ABOUNDL, yrl_m, P32, opBytes{0x62}}, 958 {ABOUNDW, yrl_m, Pe, opBytes{0x62}}, 959 {ABSFL, yml_rl, Pm, opBytes{0xbc}}, 960 {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}}, 961 {ABSFW, yml_rl, Pq, opBytes{0xbc}}, 962 {ABSRL, yml_rl, Pm, opBytes{0xbd}}, 963 {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}}, 964 {ABSRW, yml_rl, Pq, opBytes{0xbd}}, 965 {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}}, 966 {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}}, 967 {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}}, 968 {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}}, 969 {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}}, 970 {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}}, 971 {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}}, 972 {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}}, 973 {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}}, 974 {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}}, 975 {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}}, 976 {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}}, 977 {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}}, 978 {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}}, 979 {ABYTE, ybyte, Px, opBytes{1}}, 980 {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}}, 981 {ACBW, ynone, Pe, opBytes{0x98}}, 982 {ACDQ, ynone, Px, opBytes{0x99}}, 983 {ACDQE, ynone, Pw, opBytes{0x98}}, 984 {ACLAC, ynone, Pm, opBytes{01, 0xca}}, 985 {ACLC, ynone, Px, opBytes{0xf8}}, 986 {ACLD, ynone, Px, opBytes{0xfc}}, 987 {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}}, 988 {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}}, 989 {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}}, 990 {ACLI, ynone, Px, opBytes{0xfa}}, 991 {ACLTS, ynone, Pm, opBytes{0x06}}, 992 {ACLWB, yclflush, Pq, opBytes{0xae, 06}}, 993 {ACMC, ynone, Px, opBytes{0xf5}}, 994 {ACMOVLCC, yml_rl, Pm, opBytes{0x43}}, 995 {ACMOVLCS, yml_rl, Pm, opBytes{0x42}}, 996 {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}}, 997 {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}}, 998 {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}}, 999 {ACMOVLHI, yml_rl, Pm, opBytes{0x47}}, 1000 {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}}, 1001 {ACMOVLLS, yml_rl, Pm, opBytes{0x46}}, 1002 {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}}, 1003 {ACMOVLMI, yml_rl, Pm, opBytes{0x48}}, 1004 {ACMOVLNE, yml_rl, Pm, opBytes{0x45}}, 1005 {ACMOVLOC, yml_rl, Pm, opBytes{0x41}}, 1006 {ACMOVLOS, yml_rl, Pm, opBytes{0x40}}, 1007 {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}}, 1008 {ACMOVLPL, yml_rl, Pm, opBytes{0x49}}, 1009 {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}}, 1010 {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}}, 1011 {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}}, 1012 {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}}, 1013 {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}}, 1014 {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}}, 1015 {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}}, 1016 {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}}, 1017 {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}}, 1018 {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}}, 1019 {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}}, 1020 {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}}, 1021 {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}}, 1022 {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}}, 1023 {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}}, 1024 {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}}, 1025 {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}}, 1026 {ACMOVWCC, yml_rl, Pq, opBytes{0x43}}, 1027 {ACMOVWCS, yml_rl, Pq, opBytes{0x42}}, 1028 {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}}, 1029 {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}}, 1030 {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}}, 1031 {ACMOVWHI, yml_rl, Pq, opBytes{0x47}}, 1032 {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}}, 1033 {ACMOVWLS, yml_rl, Pq, opBytes{0x46}}, 1034 {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}}, 1035 {ACMOVWMI, yml_rl, Pq, opBytes{0x48}}, 1036 {ACMOVWNE, yml_rl, Pq, opBytes{0x45}}, 1037 {ACMOVWOC, yml_rl, Pq, opBytes{0x41}}, 1038 {ACMOVWOS, yml_rl, Pq, opBytes{0x40}}, 1039 {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}}, 1040 {ACMOVWPL, yml_rl, Pq, opBytes{0x49}}, 1041 {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}}, 1042 {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}}, 1043 {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1044 {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}}, 1045 {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}}, 1046 {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1047 {ACMPSB, ynone, Pb, opBytes{0xa6}}, 1048 {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}}, 1049 {ACMPSL, ynone, Px, opBytes{0xa7}}, 1050 {ACMPSQ, ynone, Pw, opBytes{0xa7}}, 1051 {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}}, 1052 {ACMPSW, ynone, Pe, opBytes{0xa7}}, 1053 {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}}, 1054 {ACOMISD, yxm, Pe, opBytes{0x2f}}, 1055 {ACOMISS, yxm, Pm, opBytes{0x2f}}, 1056 {ACPUID, ynone, Pm, opBytes{0xa2}}, 1057 {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}}, 1058 {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}}, 1059 {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}}, 1060 {ACVTPD2PS, yxm, Pe, opBytes{0x5a}}, 1061 {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}}, 1062 {ACVTPS2PD, yxm, Pm, opBytes{0x5a}}, 1063 {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}}, 1064 {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}}, 1065 {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}}, 1066 {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}}, 1067 {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}}, 1068 {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}}, 1069 {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}}, 1070 {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}}, 1071 {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}}, 1072 {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}}, 1073 {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}}, 1074 {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}}, 1075 {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}}, 1076 {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}}, 1077 {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}}, 1078 {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}}, 1079 {ACWD, ynone, Pe, opBytes{0x99}}, 1080 {ACWDE, ynone, Px, opBytes{0x98}}, 1081 {ACQO, ynone, Pw, opBytes{0x99}}, 1082 {ADAA, ynone, P32, opBytes{0x27}}, 1083 {ADAS, ynone, P32, opBytes{0x2f}}, 1084 {ADECB, yscond, Pb, opBytes{0xfe, 01}}, 1085 {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}}, 1086 {ADECQ, yincq, Pw, opBytes{0xff, 01}}, 1087 {ADECW, yincq, Pe, opBytes{0xff, 01}}, 1088 {ADIVB, ydivb, Pb, opBytes{0xf6, 06}}, 1089 {ADIVL, ydivl, Px, opBytes{0xf7, 06}}, 1090 {ADIVPD, yxm, Pe, opBytes{0x5e}}, 1091 {ADIVPS, yxm, Pm, opBytes{0x5e}}, 1092 {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}}, 1093 {ADIVSD, yxm, Pf2, opBytes{0x5e}}, 1094 {ADIVSS, yxm, Pf3, opBytes{0x5e}}, 1095 {ADIVW, ydivl, Pe, opBytes{0xf7, 06}}, 1096 {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}}, 1097 {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}}, 1098 {AEMMS, ynone, Pm, opBytes{0x77}}, 1099 {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}}, 1100 {AENTER, nil, 0, opBytes{}}, // botch 1101 {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}}, 1102 {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}}, 1103 {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}}, 1104 {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}}, 1105 {AHLT, ynone, Px, opBytes{0xf4}}, 1106 {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}}, 1107 {AIDIVL, ydivl, Px, opBytes{0xf7, 07}}, 1108 {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}}, 1109 {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}}, 1110 {AIMULB, ydivb, Pb, opBytes{0xf6, 05}}, 1111 {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1112 {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1113 {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}}, 1114 {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}}, 1115 {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}}, 1116 {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}}, 1117 {AINB, yin, Pb, opBytes{0xe4, 0xec}}, 1118 {AINW, yin, Pe, opBytes{0xe5, 0xed}}, 1119 {AINL, yin, Px, opBytes{0xe5, 0xed}}, 1120 {AINCB, yscond, Pb, opBytes{0xfe, 00}}, 1121 {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}}, 1122 {AINCQ, yincq, Pw, opBytes{0xff, 00}}, 1123 {AINCW, yincq, Pe, opBytes{0xff, 00}}, 1124 {AINSB, ynone, Pb, opBytes{0x6c}}, 1125 {AINSL, ynone, Px, opBytes{0x6d}}, 1126 {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}}, 1127 {AINSW, ynone, Pe, opBytes{0x6d}}, 1128 {AICEBP, ynone, Px, opBytes{0xf1}}, 1129 {AINT, yint, Px, opBytes{0xcd}}, 1130 {AINTO, ynone, P32, opBytes{0xce}}, 1131 {AIRETL, ynone, Px, opBytes{0xcf}}, 1132 {AIRETQ, ynone, Pw, opBytes{0xcf}}, 1133 {AIRETW, ynone, Pe, opBytes{0xcf}}, 1134 {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}}, 1135 {AJCS, yjcond, Px, opBytes{0x72, 0x82}}, 1136 {AJCXZL, yloop, Px, opBytes{0xe3}}, 1137 {AJCXZW, yloop, Px, opBytes{0xe3}}, 1138 {AJCXZQ, yloop, Px, opBytes{0xe3}}, 1139 {AJEQ, yjcond, Px, opBytes{0x74, 0x84}}, 1140 {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}}, 1141 {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}}, 1142 {AJHI, yjcond, Px, opBytes{0x77, 0x87}}, 1143 {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}}, 1144 {AJLS, yjcond, Px, opBytes{0x76, 0x86}}, 1145 {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}}, 1146 {AJMI, yjcond, Px, opBytes{0x78, 0x88}}, 1147 {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}}, 1148 {AJNE, yjcond, Px, opBytes{0x75, 0x85}}, 1149 {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}}, 1150 {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}}, 1151 {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}}, 1152 {AJPL, yjcond, Px, opBytes{0x79, 0x89}}, 1153 {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}}, 1154 {AHADDPD, yxm, Pq, opBytes{0x7c}}, 1155 {AHADDPS, yxm, Pf2, opBytes{0x7c}}, 1156 {AHSUBPD, yxm, Pq, opBytes{0x7d}}, 1157 {AHSUBPS, yxm, Pf2, opBytes{0x7d}}, 1158 {ALAHF, ynone, Px, opBytes{0x9f}}, 1159 {ALARL, yml_rl, Pm, opBytes{0x02}}, 1160 {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}}, 1161 {ALARW, yml_rl, Pq, opBytes{0x02}}, 1162 {ALDDQU, ylddqu, Pf2, opBytes{0xf0}}, 1163 {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}}, 1164 {ALEAL, ym_rl, Px, opBytes{0x8d}}, 1165 {ALEAQ, ym_rl, Pw, opBytes{0x8d}}, 1166 {ALEAVEL, ynone, P32, opBytes{0xc9}}, 1167 {ALEAVEQ, ynone, Py, opBytes{0xc9}}, 1168 {ALEAVEW, ynone, Pe, opBytes{0xc9}}, 1169 {ALEAW, ym_rl, Pe, opBytes{0x8d}}, 1170 {ALOCK, ynone, Px, opBytes{0xf0}}, 1171 {ALODSB, ynone, Pb, opBytes{0xac}}, 1172 {ALODSL, ynone, Px, opBytes{0xad}}, 1173 {ALODSQ, ynone, Pw, opBytes{0xad}}, 1174 {ALODSW, ynone, Pe, opBytes{0xad}}, 1175 {ALONG, ybyte, Px, opBytes{4}}, 1176 {ALOOP, yloop, Px, opBytes{0xe2}}, 1177 {ALOOPEQ, yloop, Px, opBytes{0xe1}}, 1178 {ALOOPNE, yloop, Px, opBytes{0xe0}}, 1179 {ALTR, ydivl, Pm, opBytes{0x00, 03}}, 1180 {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}}, 1181 {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}}, 1182 {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}}, 1183 {ALSLL, yml_rl, Pm, opBytes{0x03}}, 1184 {ALSLW, yml_rl, Pq, opBytes{0x03}}, 1185 {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}}, 1186 {AMASKMOVOU, yxr, Pe, opBytes{0xf7}}, 1187 {AMASKMOVQ, ymr, Pm, opBytes{0xf7}}, 1188 {AMAXPD, yxm, Pe, opBytes{0x5f}}, 1189 {AMAXPS, yxm, Pm, opBytes{0x5f}}, 1190 {AMAXSD, yxm, Pf2, opBytes{0x5f}}, 1191 {AMAXSS, yxm, Pf3, opBytes{0x5f}}, 1192 {AMINPD, yxm, Pe, opBytes{0x5d}}, 1193 {AMINPS, yxm, Pm, opBytes{0x5d}}, 1194 {AMINSD, yxm, Pf2, opBytes{0x5d}}, 1195 {AMINSS, yxm, Pf3, opBytes{0x5d}}, 1196 {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}}, 1197 {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}}, 1198 {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}}, 1199 {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}}, 1200 {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}}, 1201 {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}}, 1202 {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}}, 1203 {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}}, 1204 {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}}, 1205 {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}}, 1206 {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}}, 1207 {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}}, 1208 {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}}, 1209 {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}}, 1210 {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}}, 1211 {AMOVHLPS, yxr, Pm, opBytes{0x12}}, 1212 {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}}, 1213 {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}}, 1214 {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1215 {AMOVLHPS, yxr, Pm, opBytes{0x16}}, 1216 {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}}, 1217 {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}}, 1218 {AMOVLQSX, yml_rl, Pw, opBytes{0x63}}, 1219 {AMOVLQZX, yml_rl, Px, opBytes{0x8b}}, 1220 {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}}, 1221 {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}}, 1222 {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}}, 1223 {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}}, 1224 {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}}, 1225 {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}}, 1226 {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}}, 1227 {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}}, 1228 {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}}, 1229 {AMOVSB, ynone, Pb, opBytes{0xa4}}, 1230 {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}}, 1231 {AMOVSL, ynone, Px, opBytes{0xa5}}, 1232 {AMOVSQ, ynone, Pw, opBytes{0xa5}}, 1233 {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}}, 1234 {AMOVSW, ynone, Pe, opBytes{0xa5}}, 1235 {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}}, 1236 {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}}, 1237 {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}}, 1238 {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}}, 1239 {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}}, 1240 {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}}, 1241 {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}}, 1242 {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}}, 1243 {AMULB, ydivb, Pb, opBytes{0xf6, 04}}, 1244 {AMULL, ydivl, Px, opBytes{0xf7, 04}}, 1245 {AMULPD, yxm, Pe, opBytes{0x59}}, 1246 {AMULPS, yxm, Ym, opBytes{0x59}}, 1247 {AMULQ, ydivl, Pw, opBytes{0xf7, 04}}, 1248 {AMULSD, yxm, Pf2, opBytes{0x59}}, 1249 {AMULSS, yxm, Pf3, opBytes{0x59}}, 1250 {AMULW, ydivl, Pe, opBytes{0xf7, 04}}, 1251 {ANEGB, yscond, Pb, opBytes{0xf6, 03}}, 1252 {ANEGL, yscond, Px, opBytes{0xf7, 03}}, 1253 {ANEGQ, yscond, Pw, opBytes{0xf7, 03}}, 1254 {ANEGW, yscond, Pe, opBytes{0xf7, 03}}, 1255 {obj.ANOP, ynop, Px, opBytes{0, 0}}, 1256 {ANOTB, yscond, Pb, opBytes{0xf6, 02}}, 1257 {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here. 1258 {ANOTQ, yscond, Pw, opBytes{0xf7, 02}}, 1259 {ANOTW, yscond, Pe, opBytes{0xf7, 02}}, 1260 {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}}, 1261 {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1262 {AORPD, yxm, Pq, opBytes{0x56}}, 1263 {AORPS, yxm, Pm, opBytes{0x56}}, 1264 {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1265 {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}}, 1266 {AOUTB, yin, Pb, opBytes{0xe6, 0xee}}, 1267 {AOUTL, yin, Px, opBytes{0xe7, 0xef}}, 1268 {AOUTW, yin, Pe, opBytes{0xe7, 0xef}}, 1269 {AOUTSB, ynone, Pb, opBytes{0x6e}}, 1270 {AOUTSL, ynone, Px, opBytes{0x6f}}, 1271 {AOUTSW, ynone, Pe, opBytes{0x6f}}, 1272 {APABSB, yxm_q4, Pq4, opBytes{0x1c}}, 1273 {APABSD, yxm_q4, Pq4, opBytes{0x1e}}, 1274 {APABSW, yxm_q4, Pq4, opBytes{0x1d}}, 1275 {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}}, 1276 {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}}, 1277 {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}}, 1278 {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}}, 1279 {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}}, 1280 {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}}, 1281 {APADDQ, yxm, Pe, opBytes{0xd4}}, 1282 {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}}, 1283 {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}}, 1284 {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}}, 1285 {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}}, 1286 {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}}, 1287 {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}}, 1288 {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}}, 1289 {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}}, 1290 {APAUSE, ynone, Px, opBytes{0xf3, 0x90}}, 1291 {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}}, 1292 {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}}, 1293 {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}}, 1294 {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}}, 1295 {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}}, 1296 {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}}, 1297 {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}}, 1298 {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}}, 1299 {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}}, 1300 {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}}, 1301 {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}}, 1302 {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}}, 1303 {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}}, 1304 {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}}, 1305 {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}}, 1306 {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}}, 1307 {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}}, 1308 {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}}, 1309 {APHADDSW, yxm_q4, Pq4, opBytes{0x03}}, 1310 {APHADDW, yxm_q4, Pq4, opBytes{0x01}}, 1311 {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}}, 1312 {APHSUBD, yxm_q4, Pq4, opBytes{0x06}}, 1313 {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}}, 1314 {APHSUBW, yxm_q4, Pq4, opBytes{0x05}}, 1315 {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}}, 1316 {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}}, 1317 {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}}, 1318 {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}}, 1319 {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}}, 1320 {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}}, 1321 {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}}, 1322 {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}}, 1323 {APMAXSW, yxm, Pe, opBytes{0xee}}, 1324 {APMAXUB, yxm, Pe, opBytes{0xde}}, 1325 {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}}, 1326 {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}}, 1327 {APMINSB, yxm_q4, Pq4, opBytes{0x38}}, 1328 {APMINSD, yxm_q4, Pq4, opBytes{0x39}}, 1329 {APMINSW, yxm, Pe, opBytes{0xea}}, 1330 {APMINUB, yxm, Pe, opBytes{0xda}}, 1331 {APMINUD, yxm_q4, Pq4, opBytes{0x3b}}, 1332 {APMINUW, yxm_q4, Pq4, opBytes{0x3a}}, 1333 {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}}, 1334 {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}}, 1335 {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}}, 1336 {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}}, 1337 {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}}, 1338 {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}}, 1339 {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}}, 1340 {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}}, 1341 {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}}, 1342 {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}}, 1343 {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}}, 1344 {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}}, 1345 {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}}, 1346 {APMULDQ, yxm_q4, Pq4, opBytes{0x28}}, 1347 {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}}, 1348 {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}}, 1349 {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}}, 1350 {APMULLD, yxm_q4, Pq4, opBytes{0x40}}, 1351 {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}}, 1352 {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}}, 1353 {APOPAL, ynone, P32, opBytes{0x61}}, 1354 {APOPAW, ynone, Pe, opBytes{0x61}}, 1355 {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}}, 1356 {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}}, 1357 {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}}, 1358 {APOPFL, ynone, P32, opBytes{0x9d}}, 1359 {APOPFQ, ynone, Py, opBytes{0x9d}}, 1360 {APOPFW, ynone, Pe, opBytes{0x9d}}, 1361 {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}}, 1362 {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}}, 1363 {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}}, 1364 {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}}, 1365 {APSADBW, yxm, Pq, opBytes{0xf6}}, 1366 {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}}, 1367 {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}}, 1368 {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}}, 1369 {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}}, 1370 {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}}, 1371 {APSIGNB, yxm_q4, Pq4, opBytes{0x08}}, 1372 {APSIGND, yxm_q4, Pq4, opBytes{0x0a}}, 1373 {APSIGNW, yxm_q4, Pq4, opBytes{0x09}}, 1374 {APSLLO, ypsdq, Pq, opBytes{0x73, 07}}, 1375 {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}}, 1376 {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}}, 1377 {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}}, 1378 {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}}, 1379 {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}}, 1380 {APSRLO, ypsdq, Pq, opBytes{0x73, 03}}, 1381 {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}}, 1382 {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}}, 1383 {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}}, 1384 {APSUBB, yxm, Pe, opBytes{0xf8}}, 1385 {APSUBL, yxm, Pe, opBytes{0xfa}}, 1386 {APSUBQ, yxm, Pe, opBytes{0xfb}}, 1387 {APSUBSB, yxm, Pe, opBytes{0xe8}}, 1388 {APSUBSW, yxm, Pe, opBytes{0xe9}}, 1389 {APSUBUSB, yxm, Pe, opBytes{0xd8}}, 1390 {APSUBUSW, yxm, Pe, opBytes{0xd9}}, 1391 {APSUBW, yxm, Pe, opBytes{0xf9}}, 1392 {APTEST, yxm_q4, Pq4, opBytes{0x17}}, 1393 {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}}, 1394 {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}}, 1395 {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}}, 1396 {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}}, 1397 {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}}, 1398 {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}}, 1399 {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}}, 1400 {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}}, 1401 {APUSHAL, ynone, P32, opBytes{0x60}}, 1402 {APUSHAW, ynone, Pe, opBytes{0x60}}, 1403 {APUSHFL, ynone, P32, opBytes{0x9c}}, 1404 {APUSHFQ, ynone, Py, opBytes{0x9c}}, 1405 {APUSHFW, ynone, Pe, opBytes{0x9c}}, 1406 {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1407 {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1408 {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}}, 1409 {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}}, 1410 {AQUAD, ybyte, Px, opBytes{8}}, 1411 {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}}, 1412 {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1413 {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1414 {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}}, 1415 {ARCPPS, yxm, Pm, opBytes{0x53}}, 1416 {ARCPSS, yxm, Pf3, opBytes{0x53}}, 1417 {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}}, 1418 {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1419 {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1420 {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}}, 1421 {AREP, ynone, Px, opBytes{0xf3}}, 1422 {AREPN, ynone, Px, opBytes{0xf2}}, 1423 {obj.ARET, ynone, Px, opBytes{0xc3}}, 1424 {ARETFW, yret, Pe, opBytes{0xcb, 0xca}}, 1425 {ARETFL, yret, Px, opBytes{0xcb, 0xca}}, 1426 {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}}, 1427 {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}}, 1428 {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1429 {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1430 {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}}, 1431 {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}}, 1432 {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1433 {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1434 {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}}, 1435 {ARSQRTPS, yxm, Pm, opBytes{0x52}}, 1436 {ARSQRTSS, yxm, Pf3, opBytes{0x52}}, 1437 {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL 1438 {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1439 {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1440 {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1441 {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1442 {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}}, 1443 {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1444 {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1445 {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}}, 1446 {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}}, 1447 {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1448 {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1449 {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}}, 1450 {ASCASB, ynone, Pb, opBytes{0xae}}, 1451 {ASCASL, ynone, Px, opBytes{0xaf}}, 1452 {ASCASQ, ynone, Pw, opBytes{0xaf}}, 1453 {ASCASW, ynone, Pe, opBytes{0xaf}}, 1454 {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}}, 1455 {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}}, 1456 {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}}, 1457 {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}}, 1458 {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}}, 1459 {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}}, 1460 {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}}, 1461 {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}}, 1462 {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}}, 1463 {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}}, 1464 {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}}, 1465 {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}}, 1466 {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}}, 1467 {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}}, 1468 {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}}, 1469 {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}}, 1470 {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}}, 1471 {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1472 {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1473 {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}}, 1474 {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}}, 1475 {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1476 {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1477 {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}}, 1478 {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}}, 1479 {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}}, 1480 {ASQRTPD, yxm, Pe, opBytes{0x51}}, 1481 {ASQRTPS, yxm, Pm, opBytes{0x51}}, 1482 {ASQRTSD, yxm, Pf2, opBytes{0x51}}, 1483 {ASQRTSS, yxm, Pf3, opBytes{0x51}}, 1484 {ASTC, ynone, Px, opBytes{0xf9}}, 1485 {ASTD, ynone, Px, opBytes{0xfd}}, 1486 {ASTI, ynone, Px, opBytes{0xfb}}, 1487 {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}}, 1488 {ASTOSB, ynone, Pb, opBytes{0xaa}}, 1489 {ASTOSL, ynone, Px, opBytes{0xab}}, 1490 {ASTOSQ, ynone, Pw, opBytes{0xab}}, 1491 {ASTOSW, ynone, Pe, opBytes{0xab}}, 1492 {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}}, 1493 {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1494 {ASUBPD, yxm, Pe, opBytes{0x5c}}, 1495 {ASUBPS, yxm, Pm, opBytes{0x5c}}, 1496 {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1497 {ASUBSD, yxm, Pf2, opBytes{0x5c}}, 1498 {ASUBSS, yxm, Pf3, opBytes{0x5c}}, 1499 {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}}, 1500 {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}}, 1501 {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall 1502 {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}}, 1503 {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1504 {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1505 {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}}, 1506 {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}}, 1507 {obj.ATEXT, ytext, Px, opBytes{}}, 1508 {AUCOMISD, yxm, Pe, opBytes{0x2e}}, 1509 {AUCOMISS, yxm, Pm, opBytes{0x2e}}, 1510 {AUNPCKHPD, yxm, Pe, opBytes{0x15}}, 1511 {AUNPCKHPS, yxm, Pm, opBytes{0x15}}, 1512 {AUNPCKLPD, yxm, Pe, opBytes{0x14}}, 1513 {AUNPCKLPS, yxm, Pm, opBytes{0x14}}, 1514 {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}}, 1515 {AVERR, ydivl, Pm, opBytes{0x00, 04}}, 1516 {AVERW, ydivl, Pm, opBytes{0x00, 05}}, 1517 {AWAIT, ynone, Px, opBytes{0x9b}}, 1518 {AWORD, ybyte, Px, opBytes{2}}, 1519 {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}}, 1520 {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}}, 1521 {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}}, 1522 {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}}, 1523 {AXLAT, ynone, Px, opBytes{0xd7}}, 1524 {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}}, 1525 {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1526 {AXORPD, yxm, Pe, opBytes{0x57}}, 1527 {AXORPS, yxm, Pm, opBytes{0x57}}, 1528 {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1529 {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}}, 1530 {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}}, 1531 {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}}, 1532 {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}}, 1533 {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}}, 1534 {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}}, 1535 {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}}, 1536 {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}}, 1537 {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}}, 1538 {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}}, 1539 {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}}, 1540 {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}}, 1541 {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}}, 1542 {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}}, 1543 {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}}, 1544 {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}}, 1545 {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}}, 1546 {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}}, 1547 {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}}, 1548 {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}}, 1549 {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}}, 1550 {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}}, 1551 {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}}, 1552 {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}}, 1553 {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}}, 1554 {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}}, 1555 {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}}, 1556 {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}}, 1557 {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}}, 1558 {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch 1559 {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch 1560 {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}}, 1561 {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}}, 1562 {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}}, 1563 {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}}, 1564 {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}}, 1565 {AFCOML, yfmvx, Px, opBytes{0xda, 02}}, 1566 {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}}, 1567 {AFCOMW, yfmvx, Px, opBytes{0xde, 02}}, 1568 {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}}, 1569 {AFUCOM, ycompp, Px, opBytes{0xdd, 04}}, 1570 {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}}, 1571 {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}}, 1572 {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}}, 1573 {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}}, 1574 {AFADDDP, ycompp, Px, opBytes{0xde, 00}}, 1575 {AFADDW, yfmvx, Px, opBytes{0xde, 00}}, 1576 {AFADDL, yfmvx, Px, opBytes{0xda, 00}}, 1577 {AFADDF, yfmvx, Px, opBytes{0xd8, 00}}, 1578 {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}}, 1579 {AFMULDP, ycompp, Px, opBytes{0xde, 01}}, 1580 {AFMULW, yfmvx, Px, opBytes{0xde, 01}}, 1581 {AFMULL, yfmvx, Px, opBytes{0xda, 01}}, 1582 {AFMULF, yfmvx, Px, opBytes{0xd8, 01}}, 1583 {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}}, 1584 {AFSUBDP, ycompp, Px, opBytes{0xde, 05}}, 1585 {AFSUBW, yfmvx, Px, opBytes{0xde, 04}}, 1586 {AFSUBL, yfmvx, Px, opBytes{0xda, 04}}, 1587 {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}}, 1588 {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}}, 1589 {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}}, 1590 {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}}, 1591 {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}}, 1592 {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}}, 1593 {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}}, 1594 {AFDIVDP, ycompp, Px, opBytes{0xde, 07}}, 1595 {AFDIVW, yfmvx, Px, opBytes{0xde, 06}}, 1596 {AFDIVL, yfmvx, Px, opBytes{0xda, 06}}, 1597 {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}}, 1598 {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}}, 1599 {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}}, 1600 {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}}, 1601 {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}}, 1602 {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}}, 1603 {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}}, 1604 {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}}, 1605 {AFFREE, nil, 0, opBytes{}}, 1606 {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}}, 1607 {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}}, 1608 {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}}, 1609 {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}}, 1610 {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}}, 1611 {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}}, 1612 {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}}, 1613 {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}}, 1614 {AFABS, ynone, Px, opBytes{0xd9, 0xe1}}, 1615 {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}}, 1616 {AFBSTP, yclflush, Px, opBytes{0xdf, 06}}, 1617 {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}}, 1618 {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}}, 1619 {AFCOS, ynone, Px, opBytes{0xd9, 0xff}}, 1620 {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}}, 1621 {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}}, 1622 {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}}, 1623 {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}}, 1624 {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}}, 1625 {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}}, 1626 {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}}, 1627 {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}}, 1628 {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}}, 1629 {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}}, 1630 {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}}, 1631 {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}}, 1632 {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}}, 1633 {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}}, 1634 {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}}, 1635 {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}}, 1636 {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}}, 1637 {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}}, 1638 {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}}, 1639 {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}}, 1640 {AFTST, ynone, Px, opBytes{0xd9, 0xe4}}, 1641 {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}}, 1642 {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}}, 1643 {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}}, 1644 {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}}, 1645 {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}}, 1646 {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}}, 1647 {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}}, 1648 {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}}, 1649 {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}}, 1650 {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}}, 1651 {AINVD, ynone, Pm, opBytes{0x08}}, 1652 {AINVLPG, ydivb, Pm, opBytes{0x01, 07}}, 1653 {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}}, 1654 {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}}, 1655 {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}}, 1656 {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}}, 1657 {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}}, 1658 {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}}, 1659 {ARDMSR, ynone, Pm, opBytes{0x32}}, 1660 {ARDPMC, ynone, Pm, opBytes{0x33}}, 1661 {ARDTSC, ynone, Pm, opBytes{0x31}}, 1662 {ARSM, ynone, Pm, opBytes{0xaa}}, 1663 {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}}, 1664 {ASYSRET, ynone, Pm, opBytes{0x07}}, 1665 {AWBINVD, ynone, Pm, opBytes{0x09}}, 1666 {AWRMSR, ynone, Pm, opBytes{0x30}}, 1667 {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}}, 1668 {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}}, 1669 {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}}, 1670 {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}}, 1671 {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}}, 1672 {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}}, 1673 {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1674 {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1675 {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}}, 1676 {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}}, 1677 {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}}, 1678 {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}}, 1679 {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}}, 1680 {AMOVQL, yrl_ml, Px, opBytes{0x89}}, 1681 {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}}, 1682 {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}}, 1683 {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}}, 1684 {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}}, 1685 {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}}, 1686 {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}}, 1687 {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}}, 1688 {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}}, 1689 {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}}, 1690 {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}}, 1691 {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}}, 1692 {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}}, 1693 {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}}, 1694 {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}}, 1695 {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}}, 1696 {AMOVDDUP, yxm, Pf2, opBytes{0x12}}, 1697 {AMOVSHDUP, yxm, Pf3, opBytes{0x16}}, 1698 {AMOVSLDUP, yxm, Pf3, opBytes{0x12}}, 1699 {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}}, 1700 {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}}, 1701 {AUD1, ynone, Pm, opBytes{0xb9, 0}}, 1702 {AUD2, ynone, Pm, opBytes{0x0b, 0}}, 1703 {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}}, 1704 {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}}, 1705 {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}}, 1706 {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}}, 1707 {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}}, 1708 {ALMSW, ydivl, Pm, opBytes{0x01, 06}}, 1709 {ALLDT, ydivl, Pm, opBytes{0x00, 02}}, 1710 {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}}, 1711 {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}}, 1712 {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1713 {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1714 {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}}, 1715 {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}}, 1716 {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}}, 1717 {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}}, 1718 {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}}, 1719 {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}}, 1720 {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}}, 1721 {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}}, 1722 {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}}, 1723 {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}}, 1724 {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}}, 1725 {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}}, 1726 {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}}, 1727 {ASGDT, yclflush, Pm, opBytes{0x01, 00}}, 1728 {ASIDT, yclflush, Pm, opBytes{0x01, 01}}, 1729 {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}}, 1730 {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}}, 1731 {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}}, 1732 {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}}, 1733 {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}}, 1734 {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}}, 1735 {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}}, 1736 {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}}, 1737 {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}}, 1738 {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}}, 1739 {AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1740 {AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}}, 1741 {AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}}, 1742 {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}}, 1743 {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}}, 1744 {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}}, 1745 {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}}, 1746 {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}}, 1747 {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}}, 1748 {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}}, 1749 {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}}, 1750 {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}}, 1751 {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}}, 1752 {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}}, 1753 {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}}, 1754 {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}}, 1755 {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}}, 1756 {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}}, 1757 {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}}, 1758 {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}}, 1759 {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}}, 1760 {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}}, 1761 {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}}, 1762 {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}}, 1763 {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}}, 1764 {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}}, 1765 {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}}, 1766 {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}}, 1767 {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}}, 1768 {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}}, 1769 {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}}, 1770 {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}}, 1771 {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}}, 1772 {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}}, 1773 {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}}, 1774 {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}}, 1775 {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}}, 1776 {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}}, 1777 {ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}}, 1778 1779 {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}}, 1780 {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}}, 1781 {AXACQUIRE, ynone, Px, opBytes{0xf2}}, 1782 {AXRELEASE, ynone, Px, opBytes{0xf3}}, 1783 {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}}, 1784 {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}}, 1785 {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}}, 1786 {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}}, 1787 {AXGETBV, ynone, Pm, opBytes{01, 0xd0}}, 1788 {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}}, 1789 {obj.APCDATA, ypcdata, Px, opBytes{0, 0}}, 1790 {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}}, 1791 {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}}, 1792 1793 {obj.AEND, nil, 0, opBytes{}}, 1794 {0, nil, 0, opBytes{}}, 1795} 1796 1797var opindex [(ALAST + 1) & obj.AMask]*Optab 1798 1799// useAbs reports whether s describes a symbol that must avoid pc-relative addressing. 1800// This happens on systems like Solaris that call .so functions instead of system calls. 1801// It does not seem to be necessary for any other systems. This is probably working 1802// around a Solaris-specific bug that should be fixed differently, but we don't know 1803// what that bug is. And this does fix it. 1804func useAbs(ctxt *obj.Link, s *obj.LSym) bool { 1805 if ctxt.Headtype == objabi.Hsolaris { 1806 // All the Solaris dynamic imports from libc.so begin with "libc_". 1807 return strings.HasPrefix(s.Name, "libc_") 1808 } 1809 return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared 1810} 1811 1812// single-instruction no-ops of various lengths. 1813// constructed by hand and disassembled with gdb to verify. 1814// see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion. 1815var nop = [][16]uint8{ 1816 {0x90}, 1817 {0x66, 0x90}, 1818 {0x0F, 0x1F, 0x00}, 1819 {0x0F, 0x1F, 0x40, 0x00}, 1820 {0x0F, 0x1F, 0x44, 0x00, 0x00}, 1821 {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, 1822 {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, 1823 {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1824 {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1825} 1826 1827// Native Client rejects the repeated 0x66 prefix. 1828// {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, 1829func fillnop(p []byte, n int) { 1830 var m int 1831 1832 for n > 0 { 1833 m = n 1834 if m > len(nop) { 1835 m = len(nop) 1836 } 1837 copy(p[:m], nop[m-1][:m]) 1838 p = p[m:] 1839 n -= m 1840 } 1841} 1842 1843func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 { 1844 s.Grow(int64(c) + int64(pad)) 1845 fillnop(s.P[c:], int(pad)) 1846 return c + pad 1847} 1848 1849func spadjop(ctxt *obj.Link, l, q obj.As) obj.As { 1850 if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 { 1851 return l 1852 } 1853 return q 1854} 1855 1856// isJump returns whether p is a jump instruction. 1857// It is used to ensure that no standalone or macro-fused jump will straddle 1858// or end on a 32 byte boundary by inserting NOPs before the jumps. 1859func isJump(p *obj.Prog) bool { 1860 return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL || 1861 p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO 1862} 1863 1864// lookForJCC returns the first real instruction starting from p, if that instruction is a conditional 1865// jump. Otherwise, nil is returned. 1866func lookForJCC(p *obj.Prog) *obj.Prog { 1867 // Skip any PCDATA, FUNCDATA or NOP instructions 1868 var q *obj.Prog 1869 for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link { 1870 } 1871 1872 if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL { 1873 return nil 1874 } 1875 1876 switch q.As { 1877 case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI, 1878 AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT: 1879 default: 1880 return nil 1881 } 1882 1883 return q 1884} 1885 1886// fusedJump determines whether p can be fused with a subsequent conditional jump instruction. 1887// If it can, we return true followed by the total size of the fused jump. If it can't, we return false. 1888// Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2. 1889func fusedJump(p *obj.Prog) (bool, uint8) { 1890 var fusedSize uint8 1891 1892 // The first instruction in a macro fused pair may be preceded by the LOCK prefix, 1893 // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we 1894 // need to be careful to insert any padding before the locks rather than directly after them. 1895 1896 if p.As == AXRELEASE || p.As == AXACQUIRE { 1897 fusedSize += p.Isize 1898 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1899 } 1900 if p == nil { 1901 return false, 0 1902 } 1903 } 1904 if p.As == ALOCK { 1905 fusedSize += p.Isize 1906 for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link { 1907 } 1908 if p == nil { 1909 return false, 0 1910 } 1911 } 1912 cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW 1913 1914 cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ || 1915 p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp 1916 1917 testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW || 1918 p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW 1919 1920 incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW || 1921 p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW 1922 1923 if !cmpAddSub && !testAnd && !incDec { 1924 return false, 0 1925 } 1926 1927 if !incDec { 1928 var argOne obj.AddrType 1929 var argTwo obj.AddrType 1930 if cmp { 1931 argOne = p.From.Type 1932 argTwo = p.To.Type 1933 } else { 1934 argOne = p.To.Type 1935 argTwo = p.From.Type 1936 } 1937 if argOne == obj.TYPE_REG { 1938 if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM { 1939 return false, 0 1940 } 1941 } else if argOne == obj.TYPE_MEM { 1942 if argTwo != obj.TYPE_REG { 1943 return false, 0 1944 } 1945 } else { 1946 return false, 0 1947 } 1948 } 1949 1950 fusedSize += p.Isize 1951 jmp := lookForJCC(p) 1952 if jmp == nil { 1953 return false, 0 1954 } 1955 1956 fusedSize += jmp.Isize 1957 1958 if testAnd { 1959 return true, fusedSize 1960 } 1961 1962 if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI || 1963 jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC { 1964 return false, 0 1965 } 1966 1967 if cmpAddSub { 1968 return true, fusedSize 1969 } 1970 1971 if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS { 1972 return false, 0 1973 } 1974 1975 return true, fusedSize 1976} 1977 1978type padJumpsCtx int32 1979 1980func makePjcCtx(ctxt *obj.Link) padJumpsCtx { 1981 // Disable jump padding on 32 bit builds by setting 1982 // padJumps to 0. 1983 if ctxt.Arch.Family == sys.I386 { 1984 return padJumpsCtx(0) 1985 } 1986 1987 // Disable jump padding for hand written assembly code. 1988 if ctxt.IsAsm { 1989 return padJumpsCtx(0) 1990 } 1991 1992 return padJumpsCtx(32) 1993} 1994 1995// padJump detects whether the instruction being assembled is a standalone or a macro-fused 1996// jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does 1997// not cross or end on a 32 byte boundary. 1998func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 { 1999 if pjc == 0 { 2000 return c 2001 } 2002 2003 var toPad int32 2004 fj, fjSize := fusedJump(p) 2005 mask := int32(pjc - 1) 2006 if fj { 2007 if (c&mask)+int32(fjSize) >= int32(pjc) { 2008 toPad = int32(pjc) - (c & mask) 2009 } 2010 } else if isJump(p) { 2011 if (c&mask)+int32(p.Isize) >= int32(pjc) { 2012 toPad = int32(pjc) - (c & mask) 2013 } 2014 } 2015 if toPad <= 0 { 2016 return c 2017 } 2018 2019 return noppad(ctxt, s, c, toPad) 2020} 2021 2022// reAssemble is called if an instruction's size changes during assembly. If 2023// it does and the instruction is a standalone or a macro-fused jump we need to 2024// reassemble. 2025func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool { 2026 if pjc == 0 { 2027 return false 2028 } 2029 2030 fj, _ := fusedJump(p) 2031 return fj || isJump(p) 2032} 2033 2034type nopPad struct { 2035 p *obj.Prog // Instruction before the pad 2036 n int32 // Size of the pad 2037} 2038 2039// requireAlignment ensures that the function alignment is at 2040// least as high as a, which should be a power of two 2041// and between 8 and 2048, inclusive. 2042// 2043// the boolean result indicates whether the alignment meets those constraints 2044func requireAlignment(a int64, ctxt *obj.Link, cursym *obj.LSym) bool { 2045 if !((a&(a-1) == 0) && 8 <= a && a <= 2048) { 2046 ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a) 2047 return false 2048 } 2049 // By default function alignment is 32 bytes for amd64 2050 if cursym.Func().Align < int32(a) { 2051 cursym.Func().Align = int32(a) 2052 } 2053 return true 2054} 2055 2056func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { 2057 if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 { 2058 ctxt.Diag("-spectre=ret not supported on 386") 2059 ctxt.Retpoline = false // don't keep printing 2060 } 2061 2062 pjc := makePjcCtx(ctxt) 2063 2064 if s.P != nil { 2065 return 2066 } 2067 2068 if ycover[0] == 0 { 2069 ctxt.Diag("x86 tables not initialized, call x86.instinit first") 2070 } 2071 2072 for p := s.Func().Text; p != nil; p = p.Link { 2073 if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil { 2074 p.To.SetTarget(p) 2075 } 2076 if p.As == AADJSP { 2077 p.To.Type = obj.TYPE_REG 2078 p.To.Reg = REG_SP 2079 // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive. 2080 // One exception: It is smaller to encode $-0x80 than $0x80. 2081 // For that case, flip the sign and the op: 2082 // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'. 2083 switch v := p.From.Offset; { 2084 case v == 0: 2085 p.As = obj.ANOP 2086 case v == 0x80 || (v < 0 && v != -0x80): 2087 p.As = spadjop(ctxt, AADDL, AADDQ) 2088 p.From.Offset *= -1 2089 default: 2090 p.As = spadjop(ctxt, ASUBL, ASUBQ) 2091 } 2092 } 2093 if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) { 2094 if p.To.Type != obj.TYPE_REG { 2095 ctxt.Diag("non-retpoline-compatible: %v", p) 2096 continue 2097 } 2098 p.To.Type = obj.TYPE_BRANCH 2099 p.To.Name = obj.NAME_EXTERN 2100 p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg))) 2101 p.To.Reg = 0 2102 p.To.Offset = 0 2103 } 2104 } 2105 2106 var count int64 // rough count of number of instructions 2107 for p := s.Func().Text; p != nil; p = p.Link { 2108 count++ 2109 p.Back = branchShort // use short branches first time through 2110 if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) { 2111 p.Back |= branchBackwards 2112 q.Back |= branchLoopHead 2113 } 2114 } 2115 s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction 2116 2117 var ab AsmBuf 2118 var n int 2119 var c int32 2120 errors := ctxt.Errors 2121 var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) 2122 nrelocs0 := len(s.R) 2123 for { 2124 // This loop continues while there are reasons to re-assemble 2125 // whole block, like the presence of long forward jumps. 2126 reAssemble := false 2127 for i := range s.R[nrelocs0:] { 2128 s.R[nrelocs0+i] = obj.Reloc{} 2129 } 2130 s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler 2131 s.P = s.P[:0] 2132 c = 0 2133 var pPrev *obj.Prog 2134 nops = nops[:0] 2135 for p := s.Func().Text; p != nil; p = p.Link { 2136 c0 := c 2137 c = pjc.padJump(ctxt, s, p, c) 2138 2139 if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX { 2140 v := obj.AlignmentPadding(c, p, ctxt, s) 2141 if v > 0 { 2142 s.Grow(int64(c) + int64(v)) 2143 fillnop(s.P[c:], int(v)) 2144 } 2145 p.Pc = int64(c) 2146 c += int32(v) 2147 pPrev = p 2148 continue 2149 2150 } 2151 2152 if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 { 2153 // pad with NOPs 2154 v := -c & (loopAlign - 1) 2155 2156 if v <= maxLoopPad { 2157 s.Grow(int64(c) + int64(v)) 2158 fillnop(s.P[c:], int(v)) 2159 c += v 2160 } 2161 } 2162 2163 p.Pc = int64(c) 2164 2165 // process forward jumps to p 2166 for q := p.Rel; q != nil; q = q.Forwd { 2167 v := int32(p.Pc - (q.Pc + int64(q.Isize))) 2168 if q.Back&branchShort != 0 { 2169 if v > 127 { 2170 reAssemble = true 2171 q.Back ^= branchShort 2172 } 2173 2174 if q.As == AJCXZL || q.As == AXBEGIN { 2175 s.P[q.Pc+2] = byte(v) 2176 } else { 2177 s.P[q.Pc+1] = byte(v) 2178 } 2179 } else { 2180 binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v)) 2181 } 2182 } 2183 2184 p.Rel = nil 2185 2186 p.Pc = int64(c) 2187 ab.asmins(ctxt, s, p) 2188 m := ab.Len() 2189 if int(p.Isize) != m { 2190 p.Isize = uint8(m) 2191 if pjc.reAssemble(p) { 2192 // We need to re-assemble here to check for jumps and fused jumps 2193 // that span or end on 32 byte boundaries. 2194 reAssemble = true 2195 } 2196 } 2197 2198 s.Grow(p.Pc + int64(m)) 2199 copy(s.P[p.Pc:], ab.Bytes()) 2200 // If there was padding, remember it. 2201 if pPrev != nil && !ctxt.IsAsm && c > c0 { 2202 nops = append(nops, nopPad{p: pPrev, n: c - c0}) 2203 } 2204 c += int32(m) 2205 pPrev = p 2206 } 2207 2208 n++ 2209 if n > 1000 { 2210 ctxt.Diag("span must be looping") 2211 log.Fatalf("loop") 2212 } 2213 if !reAssemble { 2214 break 2215 } 2216 if ctxt.Errors > errors { 2217 return 2218 } 2219 } 2220 // splice padding nops into Progs 2221 for _, n := range nops { 2222 pp := n.p 2223 np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)} 2224 pp.Link = np 2225 } 2226 2227 s.Size = int64(c) 2228 2229 if false { /* debug['a'] > 1 */ 2230 fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0) 2231 var i int 2232 for i = 0; i < len(s.P); i++ { 2233 fmt.Printf(" %.2x", s.P[i]) 2234 if i%16 == 15 { 2235 fmt.Printf("\n %.6x", uint(i+1)) 2236 } 2237 } 2238 2239 if i%16 != 0 { 2240 fmt.Printf("\n") 2241 } 2242 2243 for i := 0; i < len(s.R); i++ { 2244 r := &s.R[i] 2245 fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add) 2246 } 2247 } 2248 2249 // Mark nonpreemptible instruction sequences. 2250 // The 2-instruction TLS access sequence 2251 // MOVQ TLS, BX 2252 // MOVQ 0(BX)(TLS*1), BX 2253 // is not async preemptible, as if it is preempted and resumed on 2254 // a different thread, the TLS address may become invalid. 2255 if !CanUse1InsnTLS(ctxt) { 2256 useTLS := func(p *obj.Prog) bool { 2257 // Only need to mark the second instruction, which has 2258 // REG_TLS as Index. (It is okay to interrupt and restart 2259 // the first instruction.) 2260 return p.From.Index == REG_TLS 2261 } 2262 obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil) 2263 } 2264 2265 // Now that we know byte offsets, we can generate jump table entries. 2266 // TODO: could this live in obj instead of obj/$ARCH? 2267 for _, jt := range s.Func().JumpTables { 2268 for i, p := range jt.Targets { 2269 // The ith jumptable entry points to the p.Pc'th 2270 // byte in the function symbol s. 2271 jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc) 2272 } 2273 } 2274} 2275 2276func instinit(ctxt *obj.Link) { 2277 if ycover[0] != 0 { 2278 // Already initialized; stop now. 2279 // This happens in the cmd/asm tests, 2280 // each of which re-initializes the arch. 2281 return 2282 } 2283 2284 switch ctxt.Headtype { 2285 case objabi.Hplan9: 2286 plan9privates = ctxt.Lookup("_privates") 2287 } 2288 2289 for i := range avxOptab { 2290 c := avxOptab[i].as 2291 if opindex[c&obj.AMask] != nil { 2292 ctxt.Diag("phase error in avxOptab: %d (%v)", i, c) 2293 } 2294 opindex[c&obj.AMask] = &avxOptab[i] 2295 } 2296 for i := 1; optab[i].as != 0; i++ { 2297 c := optab[i].as 2298 if opindex[c&obj.AMask] != nil { 2299 ctxt.Diag("phase error in optab: %d (%v)", i, c) 2300 } 2301 opindex[c&obj.AMask] = &optab[i] 2302 } 2303 2304 for i := 0; i < Ymax; i++ { 2305 ycover[i*Ymax+i] = 1 2306 } 2307 2308 ycover[Yi0*Ymax+Yu2] = 1 2309 ycover[Yi1*Ymax+Yu2] = 1 2310 2311 ycover[Yi0*Ymax+Yi8] = 1 2312 ycover[Yi1*Ymax+Yi8] = 1 2313 ycover[Yu2*Ymax+Yi8] = 1 2314 ycover[Yu7*Ymax+Yi8] = 1 2315 2316 ycover[Yi0*Ymax+Yu7] = 1 2317 ycover[Yi1*Ymax+Yu7] = 1 2318 ycover[Yu2*Ymax+Yu7] = 1 2319 2320 ycover[Yi0*Ymax+Yu8] = 1 2321 ycover[Yi1*Ymax+Yu8] = 1 2322 ycover[Yu2*Ymax+Yu8] = 1 2323 ycover[Yu7*Ymax+Yu8] = 1 2324 2325 ycover[Yi0*Ymax+Ys32] = 1 2326 ycover[Yi1*Ymax+Ys32] = 1 2327 ycover[Yu2*Ymax+Ys32] = 1 2328 ycover[Yu7*Ymax+Ys32] = 1 2329 ycover[Yu8*Ymax+Ys32] = 1 2330 ycover[Yi8*Ymax+Ys32] = 1 2331 2332 ycover[Yi0*Ymax+Yi32] = 1 2333 ycover[Yi1*Ymax+Yi32] = 1 2334 ycover[Yu2*Ymax+Yi32] = 1 2335 ycover[Yu7*Ymax+Yi32] = 1 2336 ycover[Yu8*Ymax+Yi32] = 1 2337 ycover[Yi8*Ymax+Yi32] = 1 2338 ycover[Ys32*Ymax+Yi32] = 1 2339 2340 ycover[Yi0*Ymax+Yi64] = 1 2341 ycover[Yi1*Ymax+Yi64] = 1 2342 ycover[Yu7*Ymax+Yi64] = 1 2343 ycover[Yu2*Ymax+Yi64] = 1 2344 ycover[Yu8*Ymax+Yi64] = 1 2345 ycover[Yi8*Ymax+Yi64] = 1 2346 ycover[Ys32*Ymax+Yi64] = 1 2347 ycover[Yi32*Ymax+Yi64] = 1 2348 2349 ycover[Yal*Ymax+Yrb] = 1 2350 ycover[Ycl*Ymax+Yrb] = 1 2351 ycover[Yax*Ymax+Yrb] = 1 2352 ycover[Ycx*Ymax+Yrb] = 1 2353 ycover[Yrx*Ymax+Yrb] = 1 2354 ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32 2355 2356 ycover[Ycl*Ymax+Ycx] = 1 2357 2358 ycover[Yax*Ymax+Yrx] = 1 2359 ycover[Ycx*Ymax+Yrx] = 1 2360 2361 ycover[Yax*Ymax+Yrl] = 1 2362 ycover[Ycx*Ymax+Yrl] = 1 2363 ycover[Yrx*Ymax+Yrl] = 1 2364 ycover[Yrl32*Ymax+Yrl] = 1 2365 2366 ycover[Yf0*Ymax+Yrf] = 1 2367 2368 ycover[Yal*Ymax+Ymb] = 1 2369 ycover[Ycl*Ymax+Ymb] = 1 2370 ycover[Yax*Ymax+Ymb] = 1 2371 ycover[Ycx*Ymax+Ymb] = 1 2372 ycover[Yrx*Ymax+Ymb] = 1 2373 ycover[Yrb*Ymax+Ymb] = 1 2374 ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32 2375 ycover[Ym*Ymax+Ymb] = 1 2376 2377 ycover[Yax*Ymax+Yml] = 1 2378 ycover[Ycx*Ymax+Yml] = 1 2379 ycover[Yrx*Ymax+Yml] = 1 2380 ycover[Yrl*Ymax+Yml] = 1 2381 ycover[Yrl32*Ymax+Yml] = 1 2382 ycover[Ym*Ymax+Yml] = 1 2383 2384 ycover[Yax*Ymax+Ymm] = 1 2385 ycover[Ycx*Ymax+Ymm] = 1 2386 ycover[Yrx*Ymax+Ymm] = 1 2387 ycover[Yrl*Ymax+Ymm] = 1 2388 ycover[Yrl32*Ymax+Ymm] = 1 2389 ycover[Ym*Ymax+Ymm] = 1 2390 ycover[Ymr*Ymax+Ymm] = 1 2391 2392 ycover[Yxr0*Ymax+Yxr] = 1 2393 2394 ycover[Ym*Ymax+Yxm] = 1 2395 ycover[Yxr0*Ymax+Yxm] = 1 2396 ycover[Yxr*Ymax+Yxm] = 1 2397 2398 ycover[Ym*Ymax+Yym] = 1 2399 ycover[Yyr*Ymax+Yym] = 1 2400 2401 ycover[Yxr0*Ymax+YxrEvex] = 1 2402 ycover[Yxr*Ymax+YxrEvex] = 1 2403 2404 ycover[Ym*Ymax+YxmEvex] = 1 2405 ycover[Yxr0*Ymax+YxmEvex] = 1 2406 ycover[Yxr*Ymax+YxmEvex] = 1 2407 ycover[YxrEvex*Ymax+YxmEvex] = 1 2408 2409 ycover[Yyr*Ymax+YyrEvex] = 1 2410 2411 ycover[Ym*Ymax+YymEvex] = 1 2412 ycover[Yyr*Ymax+YymEvex] = 1 2413 ycover[YyrEvex*Ymax+YymEvex] = 1 2414 2415 ycover[Ym*Ymax+Yzm] = 1 2416 ycover[Yzr*Ymax+Yzm] = 1 2417 2418 ycover[Yk0*Ymax+Yk] = 1 2419 ycover[Yknot0*Ymax+Yk] = 1 2420 2421 ycover[Yk0*Ymax+Ykm] = 1 2422 ycover[Yknot0*Ymax+Ykm] = 1 2423 ycover[Yk*Ymax+Ykm] = 1 2424 ycover[Ym*Ymax+Ykm] = 1 2425 2426 ycover[Yxvm*Ymax+YxvmEvex] = 1 2427 2428 ycover[Yyvm*Ymax+YyvmEvex] = 1 2429 2430 for i := 0; i < MAXREG; i++ { 2431 reg[i] = -1 2432 if i >= REG_AL && i <= REG_R15B { 2433 reg[i] = (i - REG_AL) & 7 2434 if i >= REG_SPB && i <= REG_DIB { 2435 regrex[i] = 0x40 2436 } 2437 if i >= REG_R8B && i <= REG_R15B { 2438 regrex[i] = Rxr | Rxx | Rxb 2439 } 2440 } 2441 2442 if i >= REG_AH && i <= REG_BH { 2443 reg[i] = 4 + ((i - REG_AH) & 7) 2444 } 2445 if i >= REG_AX && i <= REG_R15 { 2446 reg[i] = (i - REG_AX) & 7 2447 if i >= REG_R8 { 2448 regrex[i] = Rxr | Rxx | Rxb 2449 } 2450 } 2451 2452 if i >= REG_F0 && i <= REG_F0+7 { 2453 reg[i] = (i - REG_F0) & 7 2454 } 2455 if i >= REG_M0 && i <= REG_M0+7 { 2456 reg[i] = (i - REG_M0) & 7 2457 } 2458 if i >= REG_K0 && i <= REG_K0+7 { 2459 reg[i] = (i - REG_K0) & 7 2460 } 2461 if i >= REG_X0 && i <= REG_X0+15 { 2462 reg[i] = (i - REG_X0) & 7 2463 if i >= REG_X0+8 { 2464 regrex[i] = Rxr | Rxx | Rxb 2465 } 2466 } 2467 if i >= REG_X16 && i <= REG_X16+15 { 2468 reg[i] = (i - REG_X16) & 7 2469 if i >= REG_X16+8 { 2470 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2471 } else { 2472 regrex[i] = RxrEvex 2473 } 2474 } 2475 if i >= REG_Y0 && i <= REG_Y0+15 { 2476 reg[i] = (i - REG_Y0) & 7 2477 if i >= REG_Y0+8 { 2478 regrex[i] = Rxr | Rxx | Rxb 2479 } 2480 } 2481 if i >= REG_Y16 && i <= REG_Y16+15 { 2482 reg[i] = (i - REG_Y16) & 7 2483 if i >= REG_Y16+8 { 2484 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2485 } else { 2486 regrex[i] = RxrEvex 2487 } 2488 } 2489 if i >= REG_Z0 && i <= REG_Z0+15 { 2490 reg[i] = (i - REG_Z0) & 7 2491 if i > REG_Z0+7 { 2492 regrex[i] = Rxr | Rxx | Rxb 2493 } 2494 } 2495 if i >= REG_Z16 && i <= REG_Z16+15 { 2496 reg[i] = (i - REG_Z16) & 7 2497 if i >= REG_Z16+8 { 2498 regrex[i] = Rxr | Rxx | Rxb | RxrEvex 2499 } else { 2500 regrex[i] = RxrEvex 2501 } 2502 } 2503 2504 if i >= REG_CR+8 && i <= REG_CR+15 { 2505 regrex[i] = Rxr 2506 } 2507 } 2508} 2509 2510var isAndroid = buildcfg.GOOS == "android" 2511 2512func prefixof(ctxt *obj.Link, a *obj.Addr) int { 2513 if a.Reg < REG_CS && a.Index < REG_CS { // fast path 2514 return 0 2515 } 2516 if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE { 2517 switch a.Reg { 2518 case REG_CS: 2519 return 0x2e 2520 2521 case REG_DS: 2522 return 0x3e 2523 2524 case REG_ES: 2525 return 0x26 2526 2527 case REG_FS: 2528 return 0x64 2529 2530 case REG_GS: 2531 return 0x65 2532 2533 case REG_TLS: 2534 // NOTE: Systems listed here should be only systems that 2535 // support direct TLS references like 8(TLS) implemented as 2536 // direct references from FS or GS. Systems that require 2537 // the initial-exec model, where you load the TLS base into 2538 // a register and then index from that register, do not reach 2539 // this code and should not be listed. 2540 if ctxt.Arch.Family == sys.I386 { 2541 switch ctxt.Headtype { 2542 default: 2543 if isAndroid { 2544 return 0x65 // GS 2545 } 2546 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2547 2548 case objabi.Hdarwin, 2549 objabi.Hdragonfly, 2550 objabi.Hfreebsd, 2551 objabi.Hnetbsd, 2552 objabi.Hopenbsd: 2553 return 0x65 // GS 2554 } 2555 } 2556 2557 switch ctxt.Headtype { 2558 default: 2559 log.Fatalf("unknown TLS base register for %v", ctxt.Headtype) 2560 2561 case objabi.Hlinux: 2562 if isAndroid { 2563 return 0x64 // FS 2564 } 2565 2566 if ctxt.Flag_shared { 2567 log.Fatalf("unknown TLS base register for linux with -shared") 2568 } else { 2569 return 0x64 // FS 2570 } 2571 2572 case objabi.Hdragonfly, 2573 objabi.Hfreebsd, 2574 objabi.Hnetbsd, 2575 objabi.Hopenbsd, 2576 objabi.Hsolaris: 2577 return 0x64 // FS 2578 2579 case objabi.Hdarwin: 2580 return 0x65 // GS 2581 } 2582 } 2583 } 2584 2585 switch a.Index { 2586 case REG_CS: 2587 return 0x2e 2588 2589 case REG_DS: 2590 return 0x3e 2591 2592 case REG_ES: 2593 return 0x26 2594 2595 case REG_TLS: 2596 if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows { 2597 // When building for inclusion into a shared library, an instruction of the form 2598 // MOV off(CX)(TLS*1), AX 2599 // becomes 2600 // mov %gs:off(%ecx), %eax // on i386 2601 // mov %fs:off(%rcx), %rax // on amd64 2602 // which assumes that the correct TLS offset has been loaded into CX (today 2603 // there is only one TLS variable -- g -- so this is OK). When not building for 2604 // a shared library the instruction it becomes 2605 // mov 0x0(%ecx), %eax // on i386 2606 // mov 0x0(%rcx), %rax // on amd64 2607 // and a R_TLS_LE relocation, and so does not require a prefix. 2608 if ctxt.Arch.Family == sys.I386 { 2609 return 0x65 // GS 2610 } 2611 return 0x64 // FS 2612 } 2613 2614 case REG_FS: 2615 return 0x64 2616 2617 case REG_GS: 2618 return 0x65 2619 } 2620 2621 return 0 2622} 2623 2624// oclassRegList returns multisource operand class for addr. 2625func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int { 2626 // TODO(quasilyte): when oclass register case is refactored into 2627 // lookup table, use it here to get register kind more easily. 2628 // Helper functions like regIsXmm should go away too (they will become redundant). 2629 2630 regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 } 2631 regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 } 2632 regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 } 2633 2634 reg0, reg1 := decodeRegisterRange(addr.Offset) 2635 low := regIndex(int16(reg0)) 2636 high := regIndex(int16(reg1)) 2637 2638 if ctxt.Arch.Family == sys.I386 { 2639 if low >= 8 || high >= 8 { 2640 return Yxxx 2641 } 2642 } 2643 2644 switch high - low { 2645 case 3: 2646 switch { 2647 case regIsXmm(reg0) && regIsXmm(reg1): 2648 return YxrEvexMulti4 2649 case regIsYmm(reg0) && regIsYmm(reg1): 2650 return YyrEvexMulti4 2651 case regIsZmm(reg0) && regIsZmm(reg1): 2652 return YzrMulti4 2653 default: 2654 return Yxxx 2655 } 2656 default: 2657 return Yxxx 2658 } 2659} 2660 2661// oclassVMem returns V-mem (vector memory with VSIB) operand class. 2662// For addr that is not V-mem returns (Yxxx, false). 2663func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) { 2664 switch addr.Index { 2665 case REG_X0 + 0, 2666 REG_X0 + 1, 2667 REG_X0 + 2, 2668 REG_X0 + 3, 2669 REG_X0 + 4, 2670 REG_X0 + 5, 2671 REG_X0 + 6, 2672 REG_X0 + 7: 2673 return Yxvm, true 2674 case REG_X8 + 0, 2675 REG_X8 + 1, 2676 REG_X8 + 2, 2677 REG_X8 + 3, 2678 REG_X8 + 4, 2679 REG_X8 + 5, 2680 REG_X8 + 6, 2681 REG_X8 + 7: 2682 if ctxt.Arch.Family == sys.I386 { 2683 return Yxxx, true 2684 } 2685 return Yxvm, true 2686 case REG_X16 + 0, 2687 REG_X16 + 1, 2688 REG_X16 + 2, 2689 REG_X16 + 3, 2690 REG_X16 + 4, 2691 REG_X16 + 5, 2692 REG_X16 + 6, 2693 REG_X16 + 7, 2694 REG_X16 + 8, 2695 REG_X16 + 9, 2696 REG_X16 + 10, 2697 REG_X16 + 11, 2698 REG_X16 + 12, 2699 REG_X16 + 13, 2700 REG_X16 + 14, 2701 REG_X16 + 15: 2702 if ctxt.Arch.Family == sys.I386 { 2703 return Yxxx, true 2704 } 2705 return YxvmEvex, true 2706 2707 case REG_Y0 + 0, 2708 REG_Y0 + 1, 2709 REG_Y0 + 2, 2710 REG_Y0 + 3, 2711 REG_Y0 + 4, 2712 REG_Y0 + 5, 2713 REG_Y0 + 6, 2714 REG_Y0 + 7: 2715 return Yyvm, true 2716 case REG_Y8 + 0, 2717 REG_Y8 + 1, 2718 REG_Y8 + 2, 2719 REG_Y8 + 3, 2720 REG_Y8 + 4, 2721 REG_Y8 + 5, 2722 REG_Y8 + 6, 2723 REG_Y8 + 7: 2724 if ctxt.Arch.Family == sys.I386 { 2725 return Yxxx, true 2726 } 2727 return Yyvm, true 2728 case REG_Y16 + 0, 2729 REG_Y16 + 1, 2730 REG_Y16 + 2, 2731 REG_Y16 + 3, 2732 REG_Y16 + 4, 2733 REG_Y16 + 5, 2734 REG_Y16 + 6, 2735 REG_Y16 + 7, 2736 REG_Y16 + 8, 2737 REG_Y16 + 9, 2738 REG_Y16 + 10, 2739 REG_Y16 + 11, 2740 REG_Y16 + 12, 2741 REG_Y16 + 13, 2742 REG_Y16 + 14, 2743 REG_Y16 + 15: 2744 if ctxt.Arch.Family == sys.I386 { 2745 return Yxxx, true 2746 } 2747 return YyvmEvex, true 2748 2749 case REG_Z0 + 0, 2750 REG_Z0 + 1, 2751 REG_Z0 + 2, 2752 REG_Z0 + 3, 2753 REG_Z0 + 4, 2754 REG_Z0 + 5, 2755 REG_Z0 + 6, 2756 REG_Z0 + 7: 2757 return Yzvm, true 2758 case REG_Z8 + 0, 2759 REG_Z8 + 1, 2760 REG_Z8 + 2, 2761 REG_Z8 + 3, 2762 REG_Z8 + 4, 2763 REG_Z8 + 5, 2764 REG_Z8 + 6, 2765 REG_Z8 + 7, 2766 REG_Z8 + 8, 2767 REG_Z8 + 9, 2768 REG_Z8 + 10, 2769 REG_Z8 + 11, 2770 REG_Z8 + 12, 2771 REG_Z8 + 13, 2772 REG_Z8 + 14, 2773 REG_Z8 + 15, 2774 REG_Z8 + 16, 2775 REG_Z8 + 17, 2776 REG_Z8 + 18, 2777 REG_Z8 + 19, 2778 REG_Z8 + 20, 2779 REG_Z8 + 21, 2780 REG_Z8 + 22, 2781 REG_Z8 + 23: 2782 if ctxt.Arch.Family == sys.I386 { 2783 return Yxxx, true 2784 } 2785 return Yzvm, true 2786 } 2787 2788 return Yxxx, false 2789} 2790 2791func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int { 2792 switch a.Type { 2793 case obj.TYPE_REGLIST: 2794 return oclassRegList(ctxt, a) 2795 2796 case obj.TYPE_NONE: 2797 return Ynone 2798 2799 case obj.TYPE_BRANCH: 2800 return Ybr 2801 2802 case obj.TYPE_INDIR: 2803 if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 { 2804 return Yindir 2805 } 2806 return Yxxx 2807 2808 case obj.TYPE_MEM: 2809 // Pseudo registers have negative index, but SP is 2810 // not pseudo on x86, hence REG_SP check is not redundant. 2811 if a.Index == REG_SP || a.Index < 0 { 2812 // Can't use FP/SB/PC/SP as the index register. 2813 return Yxxx 2814 } 2815 2816 if vmem, ok := oclassVMem(ctxt, a); ok { 2817 return vmem 2818 } 2819 2820 if ctxt.Arch.Family == sys.AMD64 { 2821 switch a.Name { 2822 case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF: 2823 // Global variables can't use index registers and their 2824 // base register is %rip (%rip is encoded as REG_NONE). 2825 if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 { 2826 return Yxxx 2827 } 2828 case obj.NAME_AUTO, obj.NAME_PARAM: 2829 // These names must have a base of SP. The old compiler 2830 // uses 0 for the base register. SSA uses REG_SP. 2831 if a.Reg != REG_SP && a.Reg != 0 { 2832 return Yxxx 2833 } 2834 case obj.NAME_NONE: 2835 // everything is ok 2836 default: 2837 // unknown name 2838 return Yxxx 2839 } 2840 } 2841 return Ym 2842 2843 case obj.TYPE_ADDR: 2844 switch a.Name { 2845 case obj.NAME_GOTREF: 2846 ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF") 2847 return Yxxx 2848 2849 case obj.NAME_EXTERN, 2850 obj.NAME_STATIC: 2851 if a.Sym != nil && useAbs(ctxt, a.Sym) { 2852 return Yi32 2853 } 2854 return Yiauto // use pc-relative addressing 2855 2856 case obj.NAME_AUTO, 2857 obj.NAME_PARAM: 2858 return Yiauto 2859 } 2860 2861 // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index 2862 // and got Yi32 in an earlier version of this code. 2863 // Keep doing that until we fix yduff etc. 2864 if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") { 2865 return Yi32 2866 } 2867 2868 if a.Sym != nil || a.Name != obj.NAME_NONE { 2869 ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a)) 2870 } 2871 fallthrough 2872 2873 case obj.TYPE_CONST: 2874 if a.Sym != nil { 2875 ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a)) 2876 } 2877 2878 v := a.Offset 2879 if ctxt.Arch.Family == sys.I386 { 2880 v = int64(int32(v)) 2881 } 2882 switch { 2883 case v == 0: 2884 return Yi0 2885 case v == 1: 2886 return Yi1 2887 case v >= 0 && v <= 3: 2888 return Yu2 2889 case v >= 0 && v <= 127: 2890 return Yu7 2891 case v >= 0 && v <= 255: 2892 return Yu8 2893 case v >= -128 && v <= 127: 2894 return Yi8 2895 } 2896 if ctxt.Arch.Family == sys.I386 { 2897 return Yi32 2898 } 2899 l := int32(v) 2900 if int64(l) == v { 2901 return Ys32 // can sign extend 2902 } 2903 if v>>32 == 0 { 2904 return Yi32 // unsigned 2905 } 2906 return Yi64 2907 2908 case obj.TYPE_TEXTSIZE: 2909 return Ytextsize 2910 } 2911 2912 if a.Type != obj.TYPE_REG { 2913 ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a)) 2914 return Yxxx 2915 } 2916 2917 switch a.Reg { 2918 case REG_AL: 2919 return Yal 2920 2921 case REG_AX: 2922 return Yax 2923 2924 /* 2925 case REG_SPB: 2926 */ 2927 case REG_BPB, 2928 REG_SIB, 2929 REG_DIB, 2930 REG_R8B, 2931 REG_R9B, 2932 REG_R10B, 2933 REG_R11B, 2934 REG_R12B, 2935 REG_R13B, 2936 REG_R14B, 2937 REG_R15B: 2938 if ctxt.Arch.Family == sys.I386 { 2939 return Yxxx 2940 } 2941 fallthrough 2942 2943 case REG_DL, 2944 REG_BL, 2945 REG_AH, 2946 REG_CH, 2947 REG_DH, 2948 REG_BH: 2949 return Yrb 2950 2951 case REG_CL: 2952 return Ycl 2953 2954 case REG_CX: 2955 return Ycx 2956 2957 case REG_DX, REG_BX: 2958 return Yrx 2959 2960 case REG_R8, // not really Yrl 2961 REG_R9, 2962 REG_R10, 2963 REG_R11, 2964 REG_R12, 2965 REG_R13, 2966 REG_R14, 2967 REG_R15: 2968 if ctxt.Arch.Family == sys.I386 { 2969 return Yxxx 2970 } 2971 fallthrough 2972 2973 case REG_SP, REG_BP, REG_SI, REG_DI: 2974 if ctxt.Arch.Family == sys.I386 { 2975 return Yrl32 2976 } 2977 return Yrl 2978 2979 case REG_F0 + 0: 2980 return Yf0 2981 2982 case REG_F0 + 1, 2983 REG_F0 + 2, 2984 REG_F0 + 3, 2985 REG_F0 + 4, 2986 REG_F0 + 5, 2987 REG_F0 + 6, 2988 REG_F0 + 7: 2989 return Yrf 2990 2991 case REG_M0 + 0, 2992 REG_M0 + 1, 2993 REG_M0 + 2, 2994 REG_M0 + 3, 2995 REG_M0 + 4, 2996 REG_M0 + 5, 2997 REG_M0 + 6, 2998 REG_M0 + 7: 2999 return Ymr 3000 3001 case REG_X0: 3002 return Yxr0 3003 3004 case REG_X0 + 1, 3005 REG_X0 + 2, 3006 REG_X0 + 3, 3007 REG_X0 + 4, 3008 REG_X0 + 5, 3009 REG_X0 + 6, 3010 REG_X0 + 7, 3011 REG_X0 + 8, 3012 REG_X0 + 9, 3013 REG_X0 + 10, 3014 REG_X0 + 11, 3015 REG_X0 + 12, 3016 REG_X0 + 13, 3017 REG_X0 + 14, 3018 REG_X0 + 15: 3019 return Yxr 3020 3021 case REG_X0 + 16, 3022 REG_X0 + 17, 3023 REG_X0 + 18, 3024 REG_X0 + 19, 3025 REG_X0 + 20, 3026 REG_X0 + 21, 3027 REG_X0 + 22, 3028 REG_X0 + 23, 3029 REG_X0 + 24, 3030 REG_X0 + 25, 3031 REG_X0 + 26, 3032 REG_X0 + 27, 3033 REG_X0 + 28, 3034 REG_X0 + 29, 3035 REG_X0 + 30, 3036 REG_X0 + 31: 3037 return YxrEvex 3038 3039 case REG_Y0 + 0, 3040 REG_Y0 + 1, 3041 REG_Y0 + 2, 3042 REG_Y0 + 3, 3043 REG_Y0 + 4, 3044 REG_Y0 + 5, 3045 REG_Y0 + 6, 3046 REG_Y0 + 7, 3047 REG_Y0 + 8, 3048 REG_Y0 + 9, 3049 REG_Y0 + 10, 3050 REG_Y0 + 11, 3051 REG_Y0 + 12, 3052 REG_Y0 + 13, 3053 REG_Y0 + 14, 3054 REG_Y0 + 15: 3055 return Yyr 3056 3057 case REG_Y0 + 16, 3058 REG_Y0 + 17, 3059 REG_Y0 + 18, 3060 REG_Y0 + 19, 3061 REG_Y0 + 20, 3062 REG_Y0 + 21, 3063 REG_Y0 + 22, 3064 REG_Y0 + 23, 3065 REG_Y0 + 24, 3066 REG_Y0 + 25, 3067 REG_Y0 + 26, 3068 REG_Y0 + 27, 3069 REG_Y0 + 28, 3070 REG_Y0 + 29, 3071 REG_Y0 + 30, 3072 REG_Y0 + 31: 3073 return YyrEvex 3074 3075 case REG_Z0 + 0, 3076 REG_Z0 + 1, 3077 REG_Z0 + 2, 3078 REG_Z0 + 3, 3079 REG_Z0 + 4, 3080 REG_Z0 + 5, 3081 REG_Z0 + 6, 3082 REG_Z0 + 7: 3083 return Yzr 3084 3085 case REG_Z0 + 8, 3086 REG_Z0 + 9, 3087 REG_Z0 + 10, 3088 REG_Z0 + 11, 3089 REG_Z0 + 12, 3090 REG_Z0 + 13, 3091 REG_Z0 + 14, 3092 REG_Z0 + 15, 3093 REG_Z0 + 16, 3094 REG_Z0 + 17, 3095 REG_Z0 + 18, 3096 REG_Z0 + 19, 3097 REG_Z0 + 20, 3098 REG_Z0 + 21, 3099 REG_Z0 + 22, 3100 REG_Z0 + 23, 3101 REG_Z0 + 24, 3102 REG_Z0 + 25, 3103 REG_Z0 + 26, 3104 REG_Z0 + 27, 3105 REG_Z0 + 28, 3106 REG_Z0 + 29, 3107 REG_Z0 + 30, 3108 REG_Z0 + 31: 3109 if ctxt.Arch.Family == sys.I386 { 3110 return Yxxx 3111 } 3112 return Yzr 3113 3114 case REG_K0: 3115 return Yk0 3116 3117 case REG_K0 + 1, 3118 REG_K0 + 2, 3119 REG_K0 + 3, 3120 REG_K0 + 4, 3121 REG_K0 + 5, 3122 REG_K0 + 6, 3123 REG_K0 + 7: 3124 return Yknot0 3125 3126 case REG_CS: 3127 return Ycs 3128 case REG_SS: 3129 return Yss 3130 case REG_DS: 3131 return Yds 3132 case REG_ES: 3133 return Yes 3134 case REG_FS: 3135 return Yfs 3136 case REG_GS: 3137 return Ygs 3138 case REG_TLS: 3139 return Ytls 3140 3141 case REG_GDTR: 3142 return Ygdtr 3143 case REG_IDTR: 3144 return Yidtr 3145 case REG_LDTR: 3146 return Yldtr 3147 case REG_MSW: 3148 return Ymsw 3149 case REG_TASK: 3150 return Ytask 3151 3152 case REG_CR + 0: 3153 return Ycr0 3154 case REG_CR + 1: 3155 return Ycr1 3156 case REG_CR + 2: 3157 return Ycr2 3158 case REG_CR + 3: 3159 return Ycr3 3160 case REG_CR + 4: 3161 return Ycr4 3162 case REG_CR + 5: 3163 return Ycr5 3164 case REG_CR + 6: 3165 return Ycr6 3166 case REG_CR + 7: 3167 return Ycr7 3168 case REG_CR + 8: 3169 return Ycr8 3170 3171 case REG_DR + 0: 3172 return Ydr0 3173 case REG_DR + 1: 3174 return Ydr1 3175 case REG_DR + 2: 3176 return Ydr2 3177 case REG_DR + 3: 3178 return Ydr3 3179 case REG_DR + 4: 3180 return Ydr4 3181 case REG_DR + 5: 3182 return Ydr5 3183 case REG_DR + 6: 3184 return Ydr6 3185 case REG_DR + 7: 3186 return Ydr7 3187 3188 case REG_TR + 0: 3189 return Ytr0 3190 case REG_TR + 1: 3191 return Ytr1 3192 case REG_TR + 2: 3193 return Ytr2 3194 case REG_TR + 3: 3195 return Ytr3 3196 case REG_TR + 4: 3197 return Ytr4 3198 case REG_TR + 5: 3199 return Ytr5 3200 case REG_TR + 6: 3201 return Ytr6 3202 case REG_TR + 7: 3203 return Ytr7 3204 } 3205 3206 return Yxxx 3207} 3208 3209// AsmBuf is a simple buffer to assemble variable-length x86 instructions into 3210// and hold assembly state. 3211type AsmBuf struct { 3212 buf [100]byte 3213 off int 3214 rexflag int 3215 vexflag bool // Per inst: true for VEX-encoded 3216 evexflag bool // Per inst: true for EVEX-encoded 3217 rep bool 3218 repn bool 3219 lock bool 3220 3221 evex evexBits // Initialized when evexflag is true 3222} 3223 3224// Put1 appends one byte to the end of the buffer. 3225func (ab *AsmBuf) Put1(x byte) { 3226 ab.buf[ab.off] = x 3227 ab.off++ 3228} 3229 3230// Put2 appends two bytes to the end of the buffer. 3231func (ab *AsmBuf) Put2(x, y byte) { 3232 ab.buf[ab.off+0] = x 3233 ab.buf[ab.off+1] = y 3234 ab.off += 2 3235} 3236 3237// Put3 appends three bytes to the end of the buffer. 3238func (ab *AsmBuf) Put3(x, y, z byte) { 3239 ab.buf[ab.off+0] = x 3240 ab.buf[ab.off+1] = y 3241 ab.buf[ab.off+2] = z 3242 ab.off += 3 3243} 3244 3245// Put4 appends four bytes to the end of the buffer. 3246func (ab *AsmBuf) Put4(x, y, z, w byte) { 3247 ab.buf[ab.off+0] = x 3248 ab.buf[ab.off+1] = y 3249 ab.buf[ab.off+2] = z 3250 ab.buf[ab.off+3] = w 3251 ab.off += 4 3252} 3253 3254// PutInt16 writes v into the buffer using little-endian encoding. 3255func (ab *AsmBuf) PutInt16(v int16) { 3256 ab.buf[ab.off+0] = byte(v) 3257 ab.buf[ab.off+1] = byte(v >> 8) 3258 ab.off += 2 3259} 3260 3261// PutInt32 writes v into the buffer using little-endian encoding. 3262func (ab *AsmBuf) PutInt32(v int32) { 3263 ab.buf[ab.off+0] = byte(v) 3264 ab.buf[ab.off+1] = byte(v >> 8) 3265 ab.buf[ab.off+2] = byte(v >> 16) 3266 ab.buf[ab.off+3] = byte(v >> 24) 3267 ab.off += 4 3268} 3269 3270// PutInt64 writes v into the buffer using little-endian encoding. 3271func (ab *AsmBuf) PutInt64(v int64) { 3272 ab.buf[ab.off+0] = byte(v) 3273 ab.buf[ab.off+1] = byte(v >> 8) 3274 ab.buf[ab.off+2] = byte(v >> 16) 3275 ab.buf[ab.off+3] = byte(v >> 24) 3276 ab.buf[ab.off+4] = byte(v >> 32) 3277 ab.buf[ab.off+5] = byte(v >> 40) 3278 ab.buf[ab.off+6] = byte(v >> 48) 3279 ab.buf[ab.off+7] = byte(v >> 56) 3280 ab.off += 8 3281} 3282 3283// Put copies b into the buffer. 3284func (ab *AsmBuf) Put(b []byte) { 3285 copy(ab.buf[ab.off:], b) 3286 ab.off += len(b) 3287} 3288 3289// PutOpBytesLit writes zero terminated sequence of bytes from op, 3290// starting at specified offset (e.g. z counter value). 3291// Trailing 0 is not written. 3292// 3293// Intended to be used for literal Z cases. 3294// Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r). 3295func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) { 3296 for int(op[offset]) != 0 { 3297 ab.Put1(byte(op[offset])) 3298 offset++ 3299 } 3300} 3301 3302// Insert inserts b at offset i. 3303func (ab *AsmBuf) Insert(i int, b byte) { 3304 ab.off++ 3305 copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1]) 3306 ab.buf[i] = b 3307} 3308 3309// Last returns the byte at the end of the buffer. 3310func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] } 3311 3312// Len returns the length of the buffer. 3313func (ab *AsmBuf) Len() int { return ab.off } 3314 3315// Bytes returns the contents of the buffer. 3316func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] } 3317 3318// Reset empties the buffer. 3319func (ab *AsmBuf) Reset() { ab.off = 0 } 3320 3321// At returns the byte at offset i. 3322func (ab *AsmBuf) At(i int) byte { return ab.buf[i] } 3323 3324// asmidx emits SIB byte. 3325func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) { 3326 var i int 3327 3328 // X/Y index register is used in VSIB. 3329 switch index { 3330 default: 3331 goto bad 3332 3333 case REG_NONE: 3334 i = 4 << 3 3335 goto bas 3336 3337 case REG_R8, 3338 REG_R9, 3339 REG_R10, 3340 REG_R11, 3341 REG_R12, 3342 REG_R13, 3343 REG_R14, 3344 REG_R15, 3345 REG_X8, 3346 REG_X9, 3347 REG_X10, 3348 REG_X11, 3349 REG_X12, 3350 REG_X13, 3351 REG_X14, 3352 REG_X15, 3353 REG_X16, 3354 REG_X17, 3355 REG_X18, 3356 REG_X19, 3357 REG_X20, 3358 REG_X21, 3359 REG_X22, 3360 REG_X23, 3361 REG_X24, 3362 REG_X25, 3363 REG_X26, 3364 REG_X27, 3365 REG_X28, 3366 REG_X29, 3367 REG_X30, 3368 REG_X31, 3369 REG_Y8, 3370 REG_Y9, 3371 REG_Y10, 3372 REG_Y11, 3373 REG_Y12, 3374 REG_Y13, 3375 REG_Y14, 3376 REG_Y15, 3377 REG_Y16, 3378 REG_Y17, 3379 REG_Y18, 3380 REG_Y19, 3381 REG_Y20, 3382 REG_Y21, 3383 REG_Y22, 3384 REG_Y23, 3385 REG_Y24, 3386 REG_Y25, 3387 REG_Y26, 3388 REG_Y27, 3389 REG_Y28, 3390 REG_Y29, 3391 REG_Y30, 3392 REG_Y31, 3393 REG_Z8, 3394 REG_Z9, 3395 REG_Z10, 3396 REG_Z11, 3397 REG_Z12, 3398 REG_Z13, 3399 REG_Z14, 3400 REG_Z15, 3401 REG_Z16, 3402 REG_Z17, 3403 REG_Z18, 3404 REG_Z19, 3405 REG_Z20, 3406 REG_Z21, 3407 REG_Z22, 3408 REG_Z23, 3409 REG_Z24, 3410 REG_Z25, 3411 REG_Z26, 3412 REG_Z27, 3413 REG_Z28, 3414 REG_Z29, 3415 REG_Z30, 3416 REG_Z31: 3417 if ctxt.Arch.Family == sys.I386 { 3418 goto bad 3419 } 3420 fallthrough 3421 3422 case REG_AX, 3423 REG_CX, 3424 REG_DX, 3425 REG_BX, 3426 REG_BP, 3427 REG_SI, 3428 REG_DI, 3429 REG_X0, 3430 REG_X1, 3431 REG_X2, 3432 REG_X3, 3433 REG_X4, 3434 REG_X5, 3435 REG_X6, 3436 REG_X7, 3437 REG_Y0, 3438 REG_Y1, 3439 REG_Y2, 3440 REG_Y3, 3441 REG_Y4, 3442 REG_Y5, 3443 REG_Y6, 3444 REG_Y7, 3445 REG_Z0, 3446 REG_Z1, 3447 REG_Z2, 3448 REG_Z3, 3449 REG_Z4, 3450 REG_Z5, 3451 REG_Z6, 3452 REG_Z7: 3453 i = reg[index] << 3 3454 } 3455 3456 switch scale { 3457 default: 3458 goto bad 3459 3460 case 1: 3461 break 3462 3463 case 2: 3464 i |= 1 << 6 3465 3466 case 4: 3467 i |= 2 << 6 3468 3469 case 8: 3470 i |= 3 << 6 3471 } 3472 3473bas: 3474 switch base { 3475 default: 3476 goto bad 3477 3478 case REG_NONE: // must be mod=00 3479 i |= 5 3480 3481 case REG_R8, 3482 REG_R9, 3483 REG_R10, 3484 REG_R11, 3485 REG_R12, 3486 REG_R13, 3487 REG_R14, 3488 REG_R15: 3489 if ctxt.Arch.Family == sys.I386 { 3490 goto bad 3491 } 3492 fallthrough 3493 3494 case REG_AX, 3495 REG_CX, 3496 REG_DX, 3497 REG_BX, 3498 REG_SP, 3499 REG_BP, 3500 REG_SI, 3501 REG_DI: 3502 i |= reg[base] 3503 } 3504 3505 ab.Put1(byte(i)) 3506 return 3507 3508bad: 3509 ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base) 3510 ab.Put1(0) 3511} 3512 3513func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) { 3514 var rel obj.Reloc 3515 3516 v := vaddr(ctxt, p, a, &rel) 3517 if rel.Siz != 0 { 3518 if rel.Siz != 4 { 3519 ctxt.Diag("bad reloc") 3520 } 3521 r := obj.Addrel(cursym) 3522 *r = rel 3523 r.Off = int32(p.Pc + int64(ab.Len())) 3524 } 3525 3526 ab.PutInt32(int32(v)) 3527} 3528 3529func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 { 3530 if r != nil { 3531 *r = obj.Reloc{} 3532 } 3533 3534 switch a.Name { 3535 case obj.NAME_STATIC, 3536 obj.NAME_GOTREF, 3537 obj.NAME_EXTERN: 3538 s := a.Sym 3539 if r == nil { 3540 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3541 log.Fatalf("reloc") 3542 } 3543 3544 if a.Name == obj.NAME_GOTREF { 3545 r.Siz = 4 3546 r.Type = objabi.R_GOTPCREL 3547 } else if useAbs(ctxt, s) { 3548 r.Siz = 4 3549 r.Type = objabi.R_ADDR 3550 } else { 3551 r.Siz = 4 3552 r.Type = objabi.R_PCREL 3553 } 3554 3555 r.Off = -1 // caller must fill in 3556 r.Sym = s 3557 r.Add = a.Offset 3558 3559 return 0 3560 } 3561 3562 if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS { 3563 if r == nil { 3564 ctxt.Diag("need reloc for %v", obj.Dconv(p, a)) 3565 log.Fatalf("reloc") 3566 } 3567 3568 if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin { 3569 r.Type = objabi.R_TLS_LE 3570 r.Siz = 4 3571 r.Off = -1 // caller must fill in 3572 r.Add = a.Offset 3573 } 3574 return 0 3575 } 3576 3577 return a.Offset 3578} 3579 3580func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) { 3581 var base int 3582 var rel obj.Reloc 3583 3584 rex &= 0x40 | Rxr 3585 if a.Offset != int64(int32(a.Offset)) { 3586 // The rules are slightly different for 386 and AMD64, 3587 // mostly for historical reasons. We may unify them later, 3588 // but it must be discussed beforehand. 3589 // 3590 // For 64bit mode only LEAL is allowed to overflow. 3591 // It's how https://golang.org/cl/59630 made it. 3592 // crypto/sha1/sha1block_amd64.s depends on this feature. 3593 // 3594 // For 32bit mode rules are more permissive. 3595 // If offset fits uint32, it's permitted. 3596 // This is allowed for assembly that wants to use 32-bit hex 3597 // constants, e.g. LEAL 0x99999999(AX), AX. 3598 overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) || 3599 (ctxt.Arch.Family != sys.AMD64 && 3600 int64(uint32(a.Offset)) == a.Offset && 3601 ab.rexflag&Rxw == 0) 3602 if !overflowOK { 3603 ctxt.Diag("offset too large in %s", p) 3604 } 3605 } 3606 v := int32(a.Offset) 3607 rel.Siz = 0 3608 3609 switch a.Type { 3610 case obj.TYPE_ADDR: 3611 if a.Name == obj.NAME_NONE { 3612 ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE") 3613 } 3614 if a.Index == REG_TLS { 3615 ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS") 3616 } 3617 goto bad 3618 3619 case obj.TYPE_REG: 3620 const regFirst = REG_AL 3621 const regLast = REG_Z31 3622 if a.Reg < regFirst || regLast < a.Reg { 3623 goto bad 3624 } 3625 if v != 0 { 3626 goto bad 3627 } 3628 ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3)) 3629 ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex 3630 return 3631 } 3632 3633 if a.Type != obj.TYPE_MEM { 3634 goto bad 3635 } 3636 3637 if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) { 3638 base := int(a.Reg) 3639 switch a.Name { 3640 case obj.NAME_EXTERN, 3641 obj.NAME_GOTREF, 3642 obj.NAME_STATIC: 3643 if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 { 3644 goto bad 3645 } 3646 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3647 // The base register has already been set. It holds the PC 3648 // of this instruction returned by a PC-reading thunk. 3649 // See obj6.go:rewriteToPcrel. 3650 } else { 3651 base = REG_NONE 3652 } 3653 v = int32(vaddr(ctxt, p, a, &rel)) 3654 3655 case obj.NAME_AUTO, 3656 obj.NAME_PARAM: 3657 base = REG_SP 3658 } 3659 3660 ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex 3661 if base == REG_NONE { 3662 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3663 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3664 goto putrelv 3665 } 3666 3667 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3668 ab.Put1(byte(0<<6 | 4<<0 | r<<3)) 3669 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3670 return 3671 } 3672 3673 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3674 ab.Put1(byte(1<<6 | 4<<0 | r<<3)) 3675 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3676 ab.Put1(disp8) 3677 return 3678 } 3679 3680 ab.Put1(byte(2<<6 | 4<<0 | r<<3)) 3681 ab.asmidx(ctxt, int(a.Scale), int(a.Index), base) 3682 goto putrelv 3683 } 3684 3685 base = int(a.Reg) 3686 switch a.Name { 3687 case obj.NAME_STATIC, 3688 obj.NAME_GOTREF, 3689 obj.NAME_EXTERN: 3690 if a.Sym == nil { 3691 ctxt.Diag("bad addr: %v", p) 3692 } 3693 if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared { 3694 // The base register has already been set. It holds the PC 3695 // of this instruction returned by a PC-reading thunk. 3696 // See obj6.go:rewriteToPcrel. 3697 } else { 3698 base = REG_NONE 3699 } 3700 v = int32(vaddr(ctxt, p, a, &rel)) 3701 3702 case obj.NAME_AUTO, 3703 obj.NAME_PARAM: 3704 base = REG_SP 3705 } 3706 3707 if base == REG_TLS { 3708 v = int32(vaddr(ctxt, p, a, &rel)) 3709 } 3710 3711 ab.rexflag |= regrex[base]&Rxb | rex 3712 if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS { 3713 if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 { 3714 if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) { 3715 ctxt.Diag("%v has offset against gotref", p) 3716 } 3717 ab.Put1(byte(0<<6 | 5<<0 | r<<3)) 3718 goto putrelv 3719 } 3720 3721 // temporary 3722 ab.Put2( 3723 byte(0<<6|4<<0|r<<3), // sib present 3724 0<<6|4<<3|5<<0, // DS:d32 3725 ) 3726 goto putrelv 3727 } 3728 3729 if base == REG_SP || base == REG_R12 { 3730 if v == 0 { 3731 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3732 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3733 return 3734 } 3735 3736 if disp8, ok := toDisp8(v, p, ab); ok { 3737 ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3)) 3738 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3739 ab.Put1(disp8) 3740 return 3741 } 3742 3743 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3744 ab.asmidx(ctxt, int(a.Scale), REG_NONE, base) 3745 goto putrelv 3746 } 3747 3748 if REG_AX <= base && base <= REG_R15 { 3749 if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid && 3750 ctxt.Headtype != objabi.Hwindows { 3751 rel = obj.Reloc{} 3752 rel.Type = objabi.R_TLS_LE 3753 rel.Siz = 4 3754 rel.Sym = nil 3755 rel.Add = int64(v) 3756 v = 0 3757 } 3758 3759 if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 { 3760 ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3)) 3761 return 3762 } 3763 3764 if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 { 3765 ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8) 3766 return 3767 } 3768 3769 ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3)) 3770 goto putrelv 3771 } 3772 3773 goto bad 3774 3775putrelv: 3776 if rel.Siz != 0 { 3777 if rel.Siz != 4 { 3778 ctxt.Diag("bad rel") 3779 goto bad 3780 } 3781 3782 r := obj.Addrel(cursym) 3783 *r = rel 3784 r.Off = int32(p.Pc + int64(ab.Len())) 3785 } 3786 3787 ab.PutInt32(v) 3788 return 3789 3790bad: 3791 ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a)) 3792} 3793 3794func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) { 3795 ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0) 3796} 3797 3798func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) { 3799 ab.asmandsz(ctxt, cursym, p, a, o, 0, 0) 3800} 3801 3802func bytereg(a *obj.Addr, t *uint8) { 3803 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) { 3804 a.Reg += REG_AL - REG_AX 3805 *t = 0 3806 } 3807} 3808 3809func unbytereg(a *obj.Addr, t *uint8) { 3810 if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) { 3811 a.Reg += REG_AX - REG_AL 3812 *t = 0 3813 } 3814} 3815 3816const ( 3817 movLit uint8 = iota // Like Zlit 3818 movRegMem 3819 movMemReg 3820 movRegMem2op 3821 movMemReg2op 3822 movFullPtr // Load full pointer, trash heap (unsupported) 3823 movDoubleShift 3824 movTLSReg 3825) 3826 3827var ymovtab = []movtab{ 3828 // push 3829 {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}}, 3830 {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}}, 3831 {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}}, 3832 {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}}, 3833 {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3834 {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3835 {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}}, 3836 {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}}, 3837 {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}}, 3838 {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}}, 3839 {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}}, 3840 {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}}, 3841 {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}}, 3842 {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}}, 3843 3844 // pop 3845 {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}}, 3846 {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}}, 3847 {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}}, 3848 {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3849 {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3850 {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}}, 3851 {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}}, 3852 {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}}, 3853 {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}}, 3854 {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}}, 3855 {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}}, 3856 {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}}, 3857 3858 // mov seg 3859 {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}}, 3860 {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}}, 3861 {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}}, 3862 {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}}, 3863 {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}}, 3864 {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}}, 3865 {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}}, 3866 {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}}, 3867 {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}}, 3868 {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}}, 3869 {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}}, 3870 {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}}, 3871 3872 // mov cr 3873 {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3874 {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3875 {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3876 {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3877 {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3878 {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}}, 3879 {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}}, 3880 {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}}, 3881 {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}}, 3882 {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}}, 3883 {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3884 {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3885 {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3886 {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3887 {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3888 {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}}, 3889 {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}}, 3890 {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}}, 3891 {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}}, 3892 {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}}, 3893 3894 // mov dr 3895 {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3896 {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3897 {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3898 {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}}, 3899 {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}}, 3900 {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}}, 3901 {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}}, 3902 {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}}, 3903 {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3904 {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3905 {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3906 {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}}, 3907 {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}}, 3908 {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}}, 3909 {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}}, 3910 {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}}, 3911 3912 // mov tr 3913 {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}}, 3914 {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}}, 3915 {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}}, 3916 {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}}, 3917 3918 // lgdt, sgdt, lidt, sidt 3919 {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3920 {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3921 {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3922 {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3923 {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}}, 3924 {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}}, 3925 {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}}, 3926 {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}}, 3927 3928 // lldt, sldt 3929 {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}}, 3930 {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}}, 3931 3932 // lmsw, smsw 3933 {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}}, 3934 {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}}, 3935 3936 // ltr, str 3937 {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}}, 3938 {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}}, 3939 3940 /* load full pointer - unsupported 3941 {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}}, 3942 {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}}, 3943 */ 3944 3945 // double shift 3946 {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3947 {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3948 {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}}, 3949 {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3950 {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3951 {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}}, 3952 {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3953 {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3954 {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}}, 3955 {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3956 {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3957 {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}}, 3958 {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3959 {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3960 {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}}, 3961 {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3962 {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3963 {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}}, 3964 3965 // load TLS base 3966 {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3967 {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}}, 3968 {0, 0, 0, 0, 0, [4]uint8{}}, 3969} 3970 3971func isax(a *obj.Addr) bool { 3972 switch a.Reg { 3973 case REG_AX, REG_AL, REG_AH: 3974 return true 3975 } 3976 3977 return a.Index == REG_AX 3978} 3979 3980func subreg(p *obj.Prog, from int, to int) { 3981 if false { /* debug['Q'] */ 3982 fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to)) 3983 } 3984 3985 if int(p.From.Reg) == from { 3986 p.From.Reg = int16(to) 3987 p.Ft = 0 3988 } 3989 3990 if int(p.To.Reg) == from { 3991 p.To.Reg = int16(to) 3992 p.Tt = 0 3993 } 3994 3995 if int(p.From.Index) == from { 3996 p.From.Index = int16(to) 3997 p.Ft = 0 3998 } 3999 4000 if int(p.To.Index) == from { 4001 p.To.Index = int16(to) 4002 p.Tt = 0 4003 } 4004 4005 if false { /* debug['Q'] */ 4006 fmt.Printf("%v\n", p) 4007 } 4008} 4009 4010func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int { 4011 switch op { 4012 case Pm, Pe, Pf2, Pf3: 4013 if osize != 1 { 4014 if op != Pm { 4015 ab.Put1(byte(op)) 4016 } 4017 ab.Put1(Pm) 4018 z++ 4019 op = int(o.op[z]) 4020 break 4021 } 4022 fallthrough 4023 4024 default: 4025 if ab.Len() == 0 || ab.Last() != Pm { 4026 ab.Put1(Pm) 4027 } 4028 } 4029 4030 ab.Put1(byte(op)) 4031 return z 4032} 4033 4034var bpduff1 = []byte{ 4035 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP) 4036 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP 4037} 4038 4039var bpduff2 = []byte{ 4040 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP 4041} 4042 4043// asmevex emits EVEX pregis and opcode byte. 4044// In addition to asmvex r/m, vvvv and reg fields also requires optional 4045// K-masking register. 4046// 4047// Expects asmbuf.evex to be properly initialized. 4048func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) { 4049 ab.evexflag = true 4050 evex := ab.evex 4051 4052 rexR := byte(1) 4053 evexR := byte(1) 4054 rexX := byte(1) 4055 rexB := byte(1) 4056 if r != nil { 4057 if regrex[r.Reg]&Rxr != 0 { 4058 rexR = 0 // "ModR/M.reg" selector 4th bit. 4059 } 4060 if regrex[r.Reg]&RxrEvex != 0 { 4061 evexR = 0 // "ModR/M.reg" selector 5th bit. 4062 } 4063 } 4064 if rm != nil { 4065 if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 { 4066 rexX = 0 4067 } else if regrex[rm.Index]&Rxx != 0 { 4068 rexX = 0 4069 } 4070 if regrex[rm.Reg]&Rxb != 0 { 4071 rexB = 0 4072 } 4073 } 4074 // P0 = [R][X][B][R'][00][mm] 4075 p0 := (rexR << 7) | 4076 (rexX << 6) | 4077 (rexB << 5) | 4078 (evexR << 4) | 4079 (0 << 2) | 4080 (evex.M() << 0) 4081 4082 vexV := byte(0) 4083 if v != nil { 4084 // 4bit-wide reg index. 4085 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4086 } 4087 vexV ^= 0x0F 4088 // P1 = [W][vvvv][1][pp] 4089 p1 := (evex.W() << 7) | 4090 (vexV << 3) | 4091 (1 << 2) | 4092 (evex.P() << 0) 4093 4094 suffix := evexSuffixMap[p.Scond] 4095 evexZ := byte(0) 4096 evexLL := evex.L() 4097 evexB := byte(0) 4098 evexV := byte(1) 4099 evexA := byte(0) 4100 if suffix.zeroing { 4101 if !evex.ZeroingEnabled() { 4102 ctxt.Diag("unsupported zeroing: %v", p) 4103 } 4104 if k == nil { 4105 // When you request zeroing you must specify a mask register. 4106 // See issue 57952. 4107 ctxt.Diag("mask register must be specified for .Z instructions: %v", p) 4108 } else if k.Reg == REG_K0 { 4109 // The mask register must not be K0. That restriction is already 4110 // handled by the Yknot0 restriction in the opcode tables, so we 4111 // won't ever reach here. But put something sensible here just in case. 4112 ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p) 4113 } 4114 evexZ = 1 4115 } 4116 switch { 4117 case suffix.rounding != rcUnset: 4118 if rm != nil && rm.Type == obj.TYPE_MEM { 4119 ctxt.Diag("illegal rounding with memory argument: %v", p) 4120 } else if !evex.RoundingEnabled() { 4121 ctxt.Diag("unsupported rounding: %v", p) 4122 } 4123 evexB = 1 4124 evexLL = suffix.rounding 4125 case suffix.broadcast: 4126 if rm == nil || rm.Type != obj.TYPE_MEM { 4127 ctxt.Diag("illegal broadcast without memory argument: %v", p) 4128 } else if !evex.BroadcastEnabled() { 4129 ctxt.Diag("unsupported broadcast: %v", p) 4130 } 4131 evexB = 1 4132 case suffix.sae: 4133 if rm != nil && rm.Type == obj.TYPE_MEM { 4134 ctxt.Diag("illegal SAE with memory argument: %v", p) 4135 } else if !evex.SaeEnabled() { 4136 ctxt.Diag("unsupported SAE: %v", p) 4137 } 4138 evexB = 1 4139 } 4140 if rm != nil && regrex[rm.Index]&RxrEvex != 0 { 4141 evexV = 0 4142 } else if v != nil && regrex[v.Reg]&RxrEvex != 0 { 4143 evexV = 0 // VSR selector 5th bit. 4144 } 4145 if k != nil { 4146 evexA = byte(reg[k.Reg]) 4147 } 4148 // P2 = [z][L'L][b][V'][aaa] 4149 p2 := (evexZ << 7) | 4150 (evexLL << 5) | 4151 (evexB << 4) | 4152 (evexV << 3) | 4153 (evexA << 0) 4154 4155 const evexEscapeByte = 0x62 4156 ab.Put4(evexEscapeByte, p0, p1, p2) 4157 ab.Put1(evex.opcode) 4158} 4159 4160// Emit VEX prefix and opcode byte. 4161// The three addresses are the r/m, vvvv, and reg fields. 4162// The reg and rm arguments appear in the same order as the 4163// arguments to asmand, which typically follows the call to asmvex. 4164// The final two arguments are the VEX prefix (see encoding above) 4165// and the opcode byte. 4166// For details about vex prefix see: 4167// https://en.wikipedia.org/wiki/VEX_prefix#Technical_description 4168func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) { 4169 ab.vexflag = true 4170 rexR := 0 4171 if r != nil { 4172 rexR = regrex[r.Reg] & Rxr 4173 } 4174 rexB := 0 4175 rexX := 0 4176 if rm != nil { 4177 rexB = regrex[rm.Reg] & Rxb 4178 rexX = regrex[rm.Index] & Rxx 4179 } 4180 vexM := (vex >> 3) & 0x7 4181 vexWLP := vex & 0x87 4182 vexV := byte(0) 4183 if v != nil { 4184 vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF 4185 } 4186 vexV ^= 0xF 4187 if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 { 4188 // Can use 2-byte encoding. 4189 ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP) 4190 } else { 4191 // Must use 3-byte encoding. 4192 ab.Put3(0xc4, 4193 (byte(rexR|rexX|rexB)<<5)^0xE0|vexM, 4194 vexV<<3|vexWLP, 4195 ) 4196 } 4197 ab.Put1(opcode) 4198} 4199 4200// regIndex returns register index that fits in 5 bits. 4201// 4202// R : 3 bit | legacy instructions | N/A 4203// [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr 4204// EVEX.R : 1 bit | EVEX extension bit | RxrEvex 4205// 4206// Examples: 4207// 4208// REG_Z30 => 30 4209// REG_X15 => 15 4210// REG_R9 => 9 4211// REG_AX => 0 4212func regIndex(r int16) int { 4213 lower3bits := reg[r] 4214 high4bit := regrex[r] & Rxr << 1 4215 high5bit := regrex[r] & RxrEvex << 0 4216 return lower3bits | high4bit | high5bit 4217} 4218 4219// avx2gatherValid reports whether p satisfies AVX2 gather constraints. 4220// Reports errors via ctxt. 4221func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4222 // If any pair of the index, mask, or destination registers 4223 // are the same, illegal instruction trap (#UD) is triggered. 4224 index := regIndex(p.GetFrom3().Index) 4225 mask := regIndex(p.From.Reg) 4226 dest := regIndex(p.To.Reg) 4227 if dest == mask || dest == index || mask == index { 4228 ctxt.Diag("mask, index, and destination registers should be distinct: %v", p) 4229 return false 4230 } 4231 4232 return true 4233} 4234 4235// avx512gatherValid reports whether p satisfies AVX512 gather constraints. 4236// Reports errors via ctxt. 4237func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool { 4238 // Illegal instruction trap (#UD) is triggered if the destination vector 4239 // register is the same as index vector in VSIB. 4240 index := regIndex(p.From.Index) 4241 dest := regIndex(p.To.Reg) 4242 if dest == index { 4243 ctxt.Diag("index and destination registers should be distinct: %v", p) 4244 return false 4245 } 4246 4247 return true 4248} 4249 4250func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 4251 o := opindex[p.As&obj.AMask] 4252 4253 if o == nil { 4254 ctxt.Diag("asmins: missing op %v", p) 4255 return 4256 } 4257 4258 if pre := prefixof(ctxt, &p.From); pre != 0 { 4259 ab.Put1(byte(pre)) 4260 } 4261 if pre := prefixof(ctxt, &p.To); pre != 0 { 4262 ab.Put1(byte(pre)) 4263 } 4264 4265 // Checks to warn about instruction/arguments combinations that 4266 // will unconditionally trigger illegal instruction trap (#UD). 4267 switch p.As { 4268 case AVGATHERDPD, 4269 AVGATHERQPD, 4270 AVGATHERDPS, 4271 AVGATHERQPS, 4272 AVPGATHERDD, 4273 AVPGATHERQD, 4274 AVPGATHERDQ, 4275 AVPGATHERQQ: 4276 if p.GetFrom3() == nil { 4277 // gathers need a 3rd arg. See issue 58822. 4278 ctxt.Diag("need a third arg for gather instruction: %v", p) 4279 return 4280 } 4281 // AVX512 gather requires explicit K mask. 4282 if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 { 4283 if !avx512gatherValid(ctxt, p) { 4284 return 4285 } 4286 } else { 4287 if !avx2gatherValid(ctxt, p) { 4288 return 4289 } 4290 } 4291 } 4292 4293 if p.Ft == 0 { 4294 p.Ft = uint8(oclass(ctxt, p, &p.From)) 4295 } 4296 if p.Tt == 0 { 4297 p.Tt = uint8(oclass(ctxt, p, &p.To)) 4298 } 4299 4300 ft := int(p.Ft) * Ymax 4301 var f3t int 4302 tt := int(p.Tt) * Ymax 4303 4304 xo := obj.Bool2int(o.op[0] == 0x0f) 4305 z := 0 4306 var a *obj.Addr 4307 var l int 4308 var op int 4309 var q *obj.Prog 4310 var r *obj.Reloc 4311 var rel obj.Reloc 4312 var v int64 4313 4314 args := make([]int, 0, argListMax) 4315 if ft != Ynone*Ymax { 4316 args = append(args, ft) 4317 } 4318 for i := range p.RestArgs { 4319 args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax) 4320 } 4321 if tt != Ynone*Ymax { 4322 args = append(args, tt) 4323 } 4324 4325 for _, yt := range o.ytab { 4326 // ytab matching is purely args-based, 4327 // but AVX512 suffixes like "Z" or "RU_SAE" will 4328 // add EVEX-only filter that will reject non-EVEX matches. 4329 // 4330 // Consider "VADDPD.BCST 2032(DX), X0, X0". 4331 // Without this rule, operands will lead to VEX-encoded form 4332 // and produce "c5b15813" encoding. 4333 if !yt.match(args) { 4334 // "xo" is always zero for VEX/EVEX encoded insts. 4335 z += int(yt.zoffset) + xo 4336 } else { 4337 if p.Scond != 0 && !evexZcase(yt.zcase) { 4338 // Do not signal error and continue to search 4339 // for matching EVEX-encoded form. 4340 z += int(yt.zoffset) 4341 continue 4342 } 4343 4344 switch o.prefix { 4345 case Px1: // first option valid only in 32-bit mode 4346 if ctxt.Arch.Family == sys.AMD64 && z == 0 { 4347 z += int(yt.zoffset) + xo 4348 continue 4349 } 4350 case Pq: // 16 bit escape and opcode escape 4351 ab.Put2(Pe, Pm) 4352 4353 case Pq3: // 16 bit escape and opcode escape + REX.W 4354 ab.rexflag |= Pw 4355 ab.Put2(Pe, Pm) 4356 4357 case Pq4: // 66 0F 38 4358 ab.Put3(0x66, 0x0F, 0x38) 4359 4360 case Pq4w: // 66 0F 38 + REX.W 4361 ab.rexflag |= Pw 4362 ab.Put3(0x66, 0x0F, 0x38) 4363 4364 case Pq5: // F3 0F 38 4365 ab.Put3(0xF3, 0x0F, 0x38) 4366 4367 case Pq5w: // F3 0F 38 + REX.W 4368 ab.rexflag |= Pw 4369 ab.Put3(0xF3, 0x0F, 0x38) 4370 4371 case Pf2, // xmm opcode escape 4372 Pf3: 4373 ab.Put2(o.prefix, Pm) 4374 4375 case Pef3: 4376 ab.Put3(Pe, Pf3, Pm) 4377 4378 case Pfw: // xmm opcode escape + REX.W 4379 ab.rexflag |= Pw 4380 ab.Put2(Pf3, Pm) 4381 4382 case Pm: // opcode escape 4383 ab.Put1(Pm) 4384 4385 case Pe: // 16 bit escape 4386 ab.Put1(Pe) 4387 4388 case Pw: // 64-bit escape 4389 if ctxt.Arch.Family != sys.AMD64 { 4390 ctxt.Diag("asmins: illegal 64: %v", p) 4391 } 4392 ab.rexflag |= Pw 4393 4394 case Pw8: // 64-bit escape if z >= 8 4395 if z >= 8 { 4396 if ctxt.Arch.Family != sys.AMD64 { 4397 ctxt.Diag("asmins: illegal 64: %v", p) 4398 } 4399 ab.rexflag |= Pw 4400 } 4401 4402 case Pb: // botch 4403 if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) { 4404 goto bad 4405 } 4406 // NOTE(rsc): This is probably safe to do always, 4407 // but when enabled it chooses different encodings 4408 // than the old cmd/internal/obj/i386 code did, 4409 // which breaks our "same bits out" checks. 4410 // In particular, CMPB AX, $0 encodes as 80 f8 00 4411 // in the original obj/i386, and it would encode 4412 // (using a valid, shorter form) as 3c 00 if we enabled 4413 // the call to bytereg here. 4414 if ctxt.Arch.Family == sys.AMD64 { 4415 bytereg(&p.From, &p.Ft) 4416 bytereg(&p.To, &p.Tt) 4417 } 4418 4419 case P32: // 32 bit but illegal if 64-bit mode 4420 if ctxt.Arch.Family == sys.AMD64 { 4421 ctxt.Diag("asmins: illegal in 64-bit mode: %v", p) 4422 } 4423 4424 case Py: // 64-bit only, no prefix 4425 if ctxt.Arch.Family != sys.AMD64 { 4426 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4427 } 4428 4429 case Py1: // 64-bit only if z < 1, no prefix 4430 if z < 1 && ctxt.Arch.Family != sys.AMD64 { 4431 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4432 } 4433 4434 case Py3: // 64-bit only if z < 3, no prefix 4435 if z < 3 && ctxt.Arch.Family != sys.AMD64 { 4436 ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p) 4437 } 4438 } 4439 4440 if z >= len(o.op) { 4441 log.Fatalf("asmins bad table %v", p) 4442 } 4443 op = int(o.op[z]) 4444 if op == 0x0f { 4445 ab.Put1(byte(op)) 4446 z++ 4447 op = int(o.op[z]) 4448 } 4449 4450 switch yt.zcase { 4451 default: 4452 ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p) 4453 return 4454 4455 case Zpseudo: 4456 break 4457 4458 case Zlit: 4459 ab.PutOpBytesLit(z, &o.op) 4460 4461 case Zlitr_m: 4462 ab.PutOpBytesLit(z, &o.op) 4463 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4464 4465 case Zlitm_r: 4466 ab.PutOpBytesLit(z, &o.op) 4467 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4468 4469 case Zlit_m_r: 4470 ab.PutOpBytesLit(z, &o.op) 4471 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4472 4473 case Zmb_r: 4474 bytereg(&p.From, &p.Ft) 4475 fallthrough 4476 4477 case Zm_r: 4478 ab.Put1(byte(op)) 4479 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4480 4481 case Z_m_r: 4482 ab.Put1(byte(op)) 4483 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4484 4485 case Zm2_r: 4486 ab.Put2(byte(op), o.op[z+1]) 4487 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4488 4489 case Zm_r_xm: 4490 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4491 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4492 4493 case Zm_r_xm_nr: 4494 ab.rexflag = 0 4495 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4496 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4497 4498 case Zm_r_i_xm: 4499 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4500 ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3()) 4501 ab.Put1(byte(p.To.Offset)) 4502 4503 case Zibm_r, Zibr_m: 4504 ab.PutOpBytesLit(z, &o.op) 4505 if yt.zcase == Zibr_m { 4506 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4507 } else { 4508 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4509 } 4510 switch { 4511 default: 4512 ab.Put1(byte(p.From.Offset)) 4513 case yt.args[0] == Yi32 && o.prefix == Pe: 4514 ab.PutInt16(int16(p.From.Offset)) 4515 case yt.args[0] == Yi32: 4516 ab.PutInt32(int32(p.From.Offset)) 4517 } 4518 4519 case Zaut_r: 4520 ab.Put1(0x8d) // leal 4521 if p.From.Type != obj.TYPE_ADDR { 4522 ctxt.Diag("asmins: Zaut sb type ADDR") 4523 } 4524 p.From.Type = obj.TYPE_MEM 4525 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4526 p.From.Type = obj.TYPE_ADDR 4527 4528 case Zm_o: 4529 ab.Put1(byte(op)) 4530 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4531 4532 case Zr_m: 4533 ab.Put1(byte(op)) 4534 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4535 4536 case Zvex: 4537 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4538 4539 case Zvex_rm_v_r: 4540 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4541 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4542 4543 case Zvex_rm_v_ro: 4544 ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) 4545 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4546 4547 case Zvex_i_rm_vo: 4548 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4549 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2])) 4550 ab.Put1(byte(p.From.Offset)) 4551 4552 case Zvex_i_r_v: 4553 ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) 4554 regnum := byte(0x7) 4555 if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 { 4556 regnum &= byte(p.GetFrom3().Reg - REG_X0) 4557 } else { 4558 regnum &= byte(p.GetFrom3().Reg - REG_Y0) 4559 } 4560 ab.Put1(o.op[z+2] | regnum) 4561 ab.Put1(byte(p.From.Offset)) 4562 4563 case Zvex_i_rm_v_r: 4564 imm, from, from3, to := unpackOps4(p) 4565 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4566 ab.asmand(ctxt, cursym, p, from, to) 4567 ab.Put1(byte(imm.Offset)) 4568 4569 case Zvex_i_rm_r: 4570 ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1]) 4571 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4572 ab.Put1(byte(p.From.Offset)) 4573 4574 case Zvex_v_rm_r: 4575 ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1]) 4576 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4577 4578 case Zvex_r_v_rm: 4579 ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) 4580 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4581 4582 case Zvex_rm_r_vo: 4583 ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) 4584 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) 4585 4586 case Zvex_i_r_rm: 4587 ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) 4588 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4589 ab.Put1(byte(p.From.Offset)) 4590 4591 case Zvex_hr_rm_v_r: 4592 hr, from, from3, to := unpackOps4(p) 4593 ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) 4594 ab.asmand(ctxt, cursym, p, from, to) 4595 ab.Put1(byte(regIndex(hr.Reg) << 4)) 4596 4597 case Zevex_k_rmo: 4598 ab.evex = newEVEXBits(z, &o.op) 4599 ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From) 4600 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3])) 4601 4602 case Zevex_i_rm_vo: 4603 ab.evex = newEVEXBits(z, &o.op) 4604 ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil) 4605 ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3])) 4606 ab.Put1(byte(p.From.Offset)) 4607 4608 case Zevex_i_rm_k_vo: 4609 imm, from, kmask, to := unpackOps4(p) 4610 ab.evex = newEVEXBits(z, &o.op) 4611 ab.asmevex(ctxt, p, from, to, nil, kmask) 4612 ab.asmando(ctxt, cursym, p, from, int(o.op[z+3])) 4613 ab.Put1(byte(imm.Offset)) 4614 4615 case Zevex_i_r_rm: 4616 ab.evex = newEVEXBits(z, &o.op) 4617 ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil) 4618 ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) 4619 ab.Put1(byte(p.From.Offset)) 4620 4621 case Zevex_i_r_k_rm: 4622 imm, from, kmask, to := unpackOps4(p) 4623 ab.evex = newEVEXBits(z, &o.op) 4624 ab.asmevex(ctxt, p, to, nil, from, kmask) 4625 ab.asmand(ctxt, cursym, p, to, from) 4626 ab.Put1(byte(imm.Offset)) 4627 4628 case Zevex_i_rm_r: 4629 ab.evex = newEVEXBits(z, &o.op) 4630 ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil) 4631 ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To) 4632 ab.Put1(byte(p.From.Offset)) 4633 4634 case Zevex_i_rm_k_r: 4635 imm, from, kmask, to := unpackOps4(p) 4636 ab.evex = newEVEXBits(z, &o.op) 4637 ab.asmevex(ctxt, p, from, nil, to, kmask) 4638 ab.asmand(ctxt, cursym, p, from, to) 4639 ab.Put1(byte(imm.Offset)) 4640 4641 case Zevex_i_rm_v_r: 4642 imm, from, from3, to := unpackOps4(p) 4643 ab.evex = newEVEXBits(z, &o.op) 4644 ab.asmevex(ctxt, p, from, from3, to, nil) 4645 ab.asmand(ctxt, cursym, p, from, to) 4646 ab.Put1(byte(imm.Offset)) 4647 4648 case Zevex_i_rm_v_k_r: 4649 imm, from, from3, kmask, to := unpackOps5(p) 4650 ab.evex = newEVEXBits(z, &o.op) 4651 ab.asmevex(ctxt, p, from, from3, to, kmask) 4652 ab.asmand(ctxt, cursym, p, from, to) 4653 ab.Put1(byte(imm.Offset)) 4654 4655 case Zevex_r_v_rm: 4656 ab.evex = newEVEXBits(z, &o.op) 4657 ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil) 4658 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4659 4660 case Zevex_rm_v_r: 4661 ab.evex = newEVEXBits(z, &o.op) 4662 ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil) 4663 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4664 4665 case Zevex_rm_k_r: 4666 ab.evex = newEVEXBits(z, &o.op) 4667 ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3()) 4668 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 4669 4670 case Zevex_r_k_rm: 4671 ab.evex = newEVEXBits(z, &o.op) 4672 ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3()) 4673 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4674 4675 case Zevex_rm_v_k_r: 4676 from, from3, kmask, to := unpackOps4(p) 4677 ab.evex = newEVEXBits(z, &o.op) 4678 ab.asmevex(ctxt, p, from, from3, to, kmask) 4679 ab.asmand(ctxt, cursym, p, from, to) 4680 4681 case Zevex_r_v_k_rm: 4682 from, from3, kmask, to := unpackOps4(p) 4683 ab.evex = newEVEXBits(z, &o.op) 4684 ab.asmevex(ctxt, p, to, from3, from, kmask) 4685 ab.asmand(ctxt, cursym, p, to, from) 4686 4687 case Zr_m_xm: 4688 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4689 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4690 4691 case Zr_m_xm_nr: 4692 ab.rexflag = 0 4693 ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4694 ab.asmand(ctxt, cursym, p, &p.To, &p.From) 4695 4696 case Zo_m: 4697 ab.Put1(byte(op)) 4698 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4699 4700 case Zcallindreg: 4701 r = obj.Addrel(cursym) 4702 r.Off = int32(p.Pc) 4703 r.Type = objabi.R_CALLIND 4704 r.Siz = 0 4705 fallthrough 4706 4707 case Zo_m64: 4708 ab.Put1(byte(op)) 4709 ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1) 4710 4711 case Zm_ibo: 4712 ab.Put1(byte(op)) 4713 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4714 ab.Put1(byte(vaddr(ctxt, p, &p.To, nil))) 4715 4716 case Zibo_m: 4717 ab.Put1(byte(op)) 4718 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4719 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4720 4721 case Zibo_m_xm: 4722 z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z) 4723 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4724 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4725 4726 case Z_ib, Zib_: 4727 if yt.zcase == Zib_ { 4728 a = &p.From 4729 } else { 4730 a = &p.To 4731 } 4732 ab.Put1(byte(op)) 4733 if p.As == AXABORT { 4734 ab.Put1(o.op[z+1]) 4735 } 4736 ab.Put1(byte(vaddr(ctxt, p, a, nil))) 4737 4738 case Zib_rp: 4739 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4740 ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil))) 4741 4742 case Zil_rp: 4743 ab.rexflag |= regrex[p.To.Reg] & Rxb 4744 ab.Put1(byte(op + reg[p.To.Reg])) 4745 if o.prefix == Pe { 4746 v = vaddr(ctxt, p, &p.From, nil) 4747 ab.PutInt16(int16(v)) 4748 } else { 4749 ab.relput4(ctxt, cursym, p, &p.From) 4750 } 4751 4752 case Zo_iw: 4753 ab.Put1(byte(op)) 4754 if p.From.Type != obj.TYPE_NONE { 4755 v = vaddr(ctxt, p, &p.From, nil) 4756 ab.PutInt16(int16(v)) 4757 } 4758 4759 case Ziq_rp: 4760 v = vaddr(ctxt, p, &p.From, &rel) 4761 l = int(v >> 32) 4762 if l == 0 && rel.Siz != 8 { 4763 ab.rexflag &^= (0x40 | Rxw) 4764 4765 ab.rexflag |= regrex[p.To.Reg] & Rxb 4766 ab.Put1(byte(0xb8 + reg[p.To.Reg])) 4767 if rel.Type != 0 { 4768 r = obj.Addrel(cursym) 4769 *r = rel 4770 r.Off = int32(p.Pc + int64(ab.Len())) 4771 } 4772 4773 ab.PutInt32(int32(v)) 4774 } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend 4775 ab.Put1(0xc7) 4776 ab.asmando(ctxt, cursym, p, &p.To, 0) 4777 4778 ab.PutInt32(int32(v)) // need all 8 4779 } else { 4780 ab.rexflag |= regrex[p.To.Reg] & Rxb 4781 ab.Put1(byte(op + reg[p.To.Reg])) 4782 if rel.Type != 0 { 4783 r = obj.Addrel(cursym) 4784 *r = rel 4785 r.Off = int32(p.Pc + int64(ab.Len())) 4786 } 4787 4788 ab.PutInt64(v) 4789 } 4790 4791 case Zib_rr: 4792 ab.Put1(byte(op)) 4793 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4794 ab.Put1(byte(vaddr(ctxt, p, &p.From, nil))) 4795 4796 case Z_il, Zil_: 4797 if yt.zcase == Zil_ { 4798 a = &p.From 4799 } else { 4800 a = &p.To 4801 } 4802 ab.Put1(byte(op)) 4803 if o.prefix == Pe { 4804 v = vaddr(ctxt, p, a, nil) 4805 ab.PutInt16(int16(v)) 4806 } else { 4807 ab.relput4(ctxt, cursym, p, a) 4808 } 4809 4810 case Zm_ilo, Zilo_m: 4811 ab.Put1(byte(op)) 4812 if yt.zcase == Zilo_m { 4813 a = &p.From 4814 ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1])) 4815 } else { 4816 a = &p.To 4817 ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1])) 4818 } 4819 4820 if o.prefix == Pe { 4821 v = vaddr(ctxt, p, a, nil) 4822 ab.PutInt16(int16(v)) 4823 } else { 4824 ab.relput4(ctxt, cursym, p, a) 4825 } 4826 4827 case Zil_rr: 4828 ab.Put1(byte(op)) 4829 ab.asmand(ctxt, cursym, p, &p.To, &p.To) 4830 if o.prefix == Pe { 4831 v = vaddr(ctxt, p, &p.From, nil) 4832 ab.PutInt16(int16(v)) 4833 } else { 4834 ab.relput4(ctxt, cursym, p, &p.From) 4835 } 4836 4837 case Z_rp: 4838 ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40) 4839 ab.Put1(byte(op + reg[p.To.Reg])) 4840 4841 case Zrp_: 4842 ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40) 4843 ab.Put1(byte(op + reg[p.From.Reg])) 4844 4845 case Zcallcon, Zjmpcon: 4846 if yt.zcase == Zcallcon { 4847 ab.Put1(byte(op)) 4848 } else { 4849 ab.Put1(o.op[z+1]) 4850 } 4851 r = obj.Addrel(cursym) 4852 r.Off = int32(p.Pc + int64(ab.Len())) 4853 r.Type = objabi.R_PCREL 4854 r.Siz = 4 4855 r.Add = p.To.Offset 4856 ab.PutInt32(0) 4857 4858 case Zcallind: 4859 ab.Put2(byte(op), o.op[z+1]) 4860 r = obj.Addrel(cursym) 4861 r.Off = int32(p.Pc + int64(ab.Len())) 4862 if ctxt.Arch.Family == sys.AMD64 { 4863 r.Type = objabi.R_PCREL 4864 } else { 4865 r.Type = objabi.R_ADDR 4866 } 4867 r.Siz = 4 4868 r.Add = p.To.Offset 4869 r.Sym = p.To.Sym 4870 ab.PutInt32(0) 4871 4872 case Zcall, Zcallduff: 4873 if p.To.Sym == nil { 4874 ctxt.Diag("call without target") 4875 ctxt.DiagFlush() 4876 log.Fatalf("bad code") 4877 } 4878 4879 if yt.zcase == Zcallduff && ctxt.Flag_dynlink { 4880 ctxt.Diag("directly calling duff when dynamically linking Go") 4881 } 4882 4883 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4884 // Maintain BP around call, since duffcopy/duffzero can't do it 4885 // (the call jumps into the middle of the function). 4886 // This makes it possible to see call sites for duffcopy/duffzero in 4887 // BP-based profiling tools like Linux perf (which is the 4888 // whole point of maintaining frame pointers in Go). 4889 // MOVQ BP, -16(SP) 4890 // LEAQ -16(SP), BP 4891 ab.Put(bpduff1) 4892 } 4893 ab.Put1(byte(op)) 4894 r = obj.Addrel(cursym) 4895 r.Off = int32(p.Pc + int64(ab.Len())) 4896 r.Sym = p.To.Sym 4897 r.Add = p.To.Offset 4898 r.Type = objabi.R_CALL 4899 r.Siz = 4 4900 ab.PutInt32(0) 4901 4902 if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 { 4903 // Pop BP pushed above. 4904 // MOVQ 0(BP), BP 4905 ab.Put(bpduff2) 4906 } 4907 4908 // TODO: jump across functions needs reloc 4909 case Zbr, Zjmp, Zloop: 4910 if p.As == AXBEGIN { 4911 ab.Put1(byte(op)) 4912 } 4913 if p.To.Sym != nil { 4914 if yt.zcase != Zjmp { 4915 ctxt.Diag("branch to ATEXT") 4916 ctxt.DiagFlush() 4917 log.Fatalf("bad code") 4918 } 4919 4920 ab.Put1(o.op[z+1]) 4921 r = obj.Addrel(cursym) 4922 r.Off = int32(p.Pc + int64(ab.Len())) 4923 r.Sym = p.To.Sym 4924 // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that 4925 // it can point to a trampoline instead of the destination itself. 4926 r.Type = objabi.R_CALL 4927 r.Siz = 4 4928 ab.PutInt32(0) 4929 break 4930 } 4931 4932 // Assumes q is in this function. 4933 // TODO: Check in input, preserve in brchain. 4934 4935 // Fill in backward jump now. 4936 q = p.To.Target() 4937 4938 if q == nil { 4939 ctxt.Diag("jmp/branch/loop without target") 4940 ctxt.DiagFlush() 4941 log.Fatalf("bad code") 4942 } 4943 4944 if p.Back&branchBackwards != 0 { 4945 v = q.Pc - (p.Pc + 2) 4946 if v >= -128 && p.As != AXBEGIN { 4947 if p.As == AJCXZL { 4948 ab.Put1(0x67) 4949 } 4950 ab.Put2(byte(op), byte(v)) 4951 } else if yt.zcase == Zloop { 4952 ctxt.Diag("loop too far: %v", p) 4953 } else { 4954 v -= 5 - 2 4955 if p.As == AXBEGIN { 4956 v-- 4957 } 4958 if yt.zcase == Zbr { 4959 ab.Put1(0x0f) 4960 v-- 4961 } 4962 4963 ab.Put1(o.op[z+1]) 4964 ab.PutInt32(int32(v)) 4965 } 4966 4967 break 4968 } 4969 4970 // Annotate target; will fill in later. 4971 p.Forwd = q.Rel 4972 4973 q.Rel = p 4974 if p.Back&branchShort != 0 && p.As != AXBEGIN { 4975 if p.As == AJCXZL { 4976 ab.Put1(0x67) 4977 } 4978 ab.Put2(byte(op), 0) 4979 } else if yt.zcase == Zloop { 4980 ctxt.Diag("loop too far: %v", p) 4981 } else { 4982 if yt.zcase == Zbr { 4983 ab.Put1(0x0f) 4984 } 4985 ab.Put1(o.op[z+1]) 4986 ab.PutInt32(0) 4987 } 4988 4989 case Zbyte: 4990 v = vaddr(ctxt, p, &p.From, &rel) 4991 if rel.Siz != 0 { 4992 rel.Siz = uint8(op) 4993 r = obj.Addrel(cursym) 4994 *r = rel 4995 r.Off = int32(p.Pc + int64(ab.Len())) 4996 } 4997 4998 ab.Put1(byte(v)) 4999 if op > 1 { 5000 ab.Put1(byte(v >> 8)) 5001 if op > 2 { 5002 ab.PutInt16(int16(v >> 16)) 5003 if op > 4 { 5004 ab.PutInt32(int32(v >> 32)) 5005 } 5006 } 5007 } 5008 } 5009 5010 return 5011 } 5012 } 5013 f3t = Ynone * Ymax 5014 if p.GetFrom3() != nil { 5015 f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax 5016 } 5017 for mo := ymovtab; mo[0].as != 0; mo = mo[1:] { 5018 var pp obj.Prog 5019 var t []byte 5020 if p.As == mo[0].as { 5021 if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 { 5022 t = mo[0].op[:] 5023 switch mo[0].code { 5024 default: 5025 ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p) 5026 5027 case movLit: 5028 for z = 0; t[z] != 0; z++ { 5029 ab.Put1(t[z]) 5030 } 5031 5032 case movRegMem: 5033 ab.Put1(t[0]) 5034 ab.asmando(ctxt, cursym, p, &p.To, int(t[1])) 5035 5036 case movMemReg: 5037 ab.Put1(t[0]) 5038 ab.asmando(ctxt, cursym, p, &p.From, int(t[1])) 5039 5040 case movRegMem2op: // r,m - 2op 5041 ab.Put2(t[0], t[1]) 5042 ab.asmando(ctxt, cursym, p, &p.To, int(t[2])) 5043 ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40) 5044 5045 case movMemReg2op: 5046 ab.Put2(t[0], t[1]) 5047 ab.asmando(ctxt, cursym, p, &p.From, int(t[2])) 5048 ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40) 5049 5050 case movFullPtr: 5051 if t[0] != 0 { 5052 ab.Put1(t[0]) 5053 } 5054 switch p.To.Index { 5055 default: 5056 goto bad 5057 5058 case REG_DS: 5059 ab.Put1(0xc5) 5060 5061 case REG_SS: 5062 ab.Put2(0x0f, 0xb2) 5063 5064 case REG_ES: 5065 ab.Put1(0xc4) 5066 5067 case REG_FS: 5068 ab.Put2(0x0f, 0xb4) 5069 5070 case REG_GS: 5071 ab.Put2(0x0f, 0xb5) 5072 } 5073 5074 ab.asmand(ctxt, cursym, p, &p.From, &p.To) 5075 5076 case movDoubleShift: 5077 if t[0] == Pw { 5078 if ctxt.Arch.Family != sys.AMD64 { 5079 ctxt.Diag("asmins: illegal 64: %v", p) 5080 } 5081 ab.rexflag |= Pw 5082 t = t[1:] 5083 } else if t[0] == Pe { 5084 ab.Put1(Pe) 5085 t = t[1:] 5086 } 5087 5088 switch p.From.Type { 5089 default: 5090 goto bad 5091 5092 case obj.TYPE_CONST: 5093 ab.Put2(0x0f, t[0]) 5094 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5095 ab.Put1(byte(p.From.Offset)) 5096 5097 case obj.TYPE_REG: 5098 switch p.From.Reg { 5099 default: 5100 goto bad 5101 5102 case REG_CL, REG_CX: 5103 ab.Put2(0x0f, t[1]) 5104 ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0) 5105 } 5106 } 5107 5108 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5109 // where you load the TLS base register into a register and then index off that 5110 // register to access the actual TLS variables. Systems that allow direct TLS access 5111 // are handled in prefixof above and should not be listed here. 5112 case movTLSReg: 5113 if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL { 5114 ctxt.Diag("invalid load of TLS: %v", p) 5115 } 5116 5117 if ctxt.Arch.Family == sys.I386 { 5118 // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, 5119 // where you load the TLS base register into a register and then index off that 5120 // register to access the actual TLS variables. Systems that allow direct TLS access 5121 // are handled in prefixof above and should not be listed here. 5122 switch ctxt.Headtype { 5123 default: 5124 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5125 5126 case objabi.Hlinux, objabi.Hfreebsd: 5127 if ctxt.Flag_shared { 5128 // Note that this is not generating the same insns as the other cases. 5129 // MOV TLS, dst 5130 // becomes 5131 // call __x86.get_pc_thunk.dst 5132 // movl (gotpc + g@gotntpoff)(dst), dst 5133 // which is encoded as 5134 // call __x86.get_pc_thunk.dst 5135 // movq 0(dst), dst 5136 // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access 5137 // is g, which we can't check here, but will when we assemble the second 5138 // instruction. 5139 dst := p.To.Reg 5140 ab.Put1(0xe8) 5141 r = obj.Addrel(cursym) 5142 r.Off = int32(p.Pc + int64(ab.Len())) 5143 r.Type = objabi.R_CALL 5144 r.Siz = 4 5145 r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))) 5146 ab.PutInt32(0) 5147 5148 ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3))) 5149 r = obj.Addrel(cursym) 5150 r.Off = int32(p.Pc + int64(ab.Len())) 5151 r.Type = objabi.R_TLS_IE 5152 r.Siz = 4 5153 r.Add = 2 5154 ab.PutInt32(0) 5155 } else { 5156 // ELF TLS base is 0(GS). 5157 pp.From = p.From 5158 5159 pp.From.Type = obj.TYPE_MEM 5160 pp.From.Reg = REG_GS 5161 pp.From.Offset = 0 5162 pp.From.Index = REG_NONE 5163 pp.From.Scale = 0 5164 ab.Put2(0x65, // GS 5165 0x8B) 5166 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5167 } 5168 case objabi.Hplan9: 5169 pp.From = obj.Addr{} 5170 pp.From.Type = obj.TYPE_MEM 5171 pp.From.Name = obj.NAME_EXTERN 5172 pp.From.Sym = plan9privates 5173 pp.From.Offset = 0 5174 pp.From.Index = REG_NONE 5175 ab.Put1(0x8B) 5176 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5177 } 5178 break 5179 } 5180 5181 switch ctxt.Headtype { 5182 default: 5183 log.Fatalf("unknown TLS base location for %v", ctxt.Headtype) 5184 5185 case objabi.Hlinux, objabi.Hfreebsd: 5186 if !ctxt.Flag_shared { 5187 log.Fatalf("unknown TLS base location for linux/freebsd without -shared") 5188 } 5189 // Note that this is not generating the same insn as the other cases. 5190 // MOV TLS, R_to 5191 // becomes 5192 // movq g@gottpoff(%rip), R_to 5193 // which is encoded as 5194 // movq 0(%rip), R_to 5195 // and a R_TLS_IE reloc. This all assumes the only tls variable we access 5196 // is g, which we can't check here, but will when we assemble the second 5197 // instruction. 5198 ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr) 5199 5200 ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3))) 5201 r = obj.Addrel(cursym) 5202 r.Off = int32(p.Pc + int64(ab.Len())) 5203 r.Type = objabi.R_TLS_IE 5204 r.Siz = 4 5205 r.Add = -4 5206 ab.PutInt32(0) 5207 5208 case objabi.Hplan9: 5209 pp.From = obj.Addr{} 5210 pp.From.Type = obj.TYPE_MEM 5211 pp.From.Name = obj.NAME_EXTERN 5212 pp.From.Sym = plan9privates 5213 pp.From.Offset = 0 5214 pp.From.Index = REG_NONE 5215 ab.rexflag |= Pw 5216 ab.Put1(0x8B) 5217 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5218 5219 case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. 5220 // TLS base is 0(FS). 5221 pp.From = p.From 5222 5223 pp.From.Type = obj.TYPE_MEM 5224 pp.From.Name = obj.NAME_NONE 5225 pp.From.Reg = REG_NONE 5226 pp.From.Offset = 0 5227 pp.From.Index = REG_NONE 5228 pp.From.Scale = 0 5229 ab.rexflag |= Pw 5230 ab.Put2(0x64, // FS 5231 0x8B) 5232 ab.asmand(ctxt, cursym, p, &pp.From, &p.To) 5233 } 5234 } 5235 return 5236 } 5237 } 5238 } 5239 goto bad 5240 5241bad: 5242 if ctxt.Arch.Family != sys.AMD64 { 5243 // here, the assembly has failed. 5244 // if it's a byte instruction that has 5245 // unaddressable registers, try to 5246 // exchange registers and reissue the 5247 // instruction with the operands renamed. 5248 pp := *p 5249 5250 unbytereg(&pp.From, &pp.Ft) 5251 unbytereg(&pp.To, &pp.Tt) 5252 5253 z := int(p.From.Reg) 5254 if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5255 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5256 // For now, different to keep bit-for-bit compatibility. 5257 if ctxt.Arch.Family == sys.I386 { 5258 breg := byteswapreg(ctxt, &p.To) 5259 if breg != REG_AX { 5260 ab.Put1(0x87) // xchg lhs,bx 5261 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5262 subreg(&pp, z, breg) 5263 ab.doasm(ctxt, cursym, &pp) 5264 ab.Put1(0x87) // xchg lhs,bx 5265 ab.asmando(ctxt, cursym, p, &p.From, reg[breg]) 5266 } else { 5267 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5268 subreg(&pp, z, REG_AX) 5269 ab.doasm(ctxt, cursym, &pp) 5270 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5271 } 5272 return 5273 } 5274 5275 if isax(&p.To) || p.To.Type == obj.TYPE_NONE { 5276 // We certainly don't want to exchange 5277 // with AX if the op is MUL or DIV. 5278 ab.Put1(0x87) // xchg lhs,bx 5279 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5280 subreg(&pp, z, REG_BX) 5281 ab.doasm(ctxt, cursym, &pp) 5282 ab.Put1(0x87) // xchg lhs,bx 5283 ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX]) 5284 } else { 5285 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5286 subreg(&pp, z, REG_AX) 5287 ab.doasm(ctxt, cursym, &pp) 5288 ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax 5289 } 5290 return 5291 } 5292 5293 z = int(p.To.Reg) 5294 if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI { 5295 // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base. 5296 // For now, different to keep bit-for-bit compatibility. 5297 if ctxt.Arch.Family == sys.I386 { 5298 breg := byteswapreg(ctxt, &p.From) 5299 if breg != REG_AX { 5300 ab.Put1(0x87) //xchg rhs,bx 5301 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5302 subreg(&pp, z, breg) 5303 ab.doasm(ctxt, cursym, &pp) 5304 ab.Put1(0x87) // xchg rhs,bx 5305 ab.asmando(ctxt, cursym, p, &p.To, reg[breg]) 5306 } else { 5307 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5308 subreg(&pp, z, REG_AX) 5309 ab.doasm(ctxt, cursym, &pp) 5310 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5311 } 5312 return 5313 } 5314 5315 if isax(&p.From) { 5316 ab.Put1(0x87) // xchg rhs,bx 5317 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5318 subreg(&pp, z, REG_BX) 5319 ab.doasm(ctxt, cursym, &pp) 5320 ab.Put1(0x87) // xchg rhs,bx 5321 ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX]) 5322 } else { 5323 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5324 subreg(&pp, z, REG_AX) 5325 ab.doasm(ctxt, cursym, &pp) 5326 ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax 5327 } 5328 return 5329 } 5330 } 5331 5332 ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p) 5333} 5334 5335// byteswapreg returns a byte-addressable register (AX, BX, CX, DX) 5336// which is not referenced in a. 5337// If a is empty, it returns BX to account for MULB-like instructions 5338// that might use DX and AX. 5339func byteswapreg(ctxt *obj.Link, a *obj.Addr) int { 5340 cana, canb, canc, cand := true, true, true, true 5341 if a.Type == obj.TYPE_NONE { 5342 cana, cand = false, false 5343 } 5344 5345 if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) { 5346 switch a.Reg { 5347 case REG_NONE: 5348 cana, cand = false, false 5349 case REG_AX, REG_AL, REG_AH: 5350 cana = false 5351 case REG_BX, REG_BL, REG_BH: 5352 canb = false 5353 case REG_CX, REG_CL, REG_CH: 5354 canc = false 5355 case REG_DX, REG_DL, REG_DH: 5356 cand = false 5357 } 5358 } 5359 5360 if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR { 5361 switch a.Index { 5362 case REG_AX: 5363 cana = false 5364 case REG_BX: 5365 canb = false 5366 case REG_CX: 5367 canc = false 5368 case REG_DX: 5369 cand = false 5370 } 5371 } 5372 5373 switch { 5374 case cana: 5375 return REG_AX 5376 case canb: 5377 return REG_BX 5378 case canc: 5379 return REG_CX 5380 case cand: 5381 return REG_DX 5382 default: 5383 ctxt.Diag("impossible byte register") 5384 ctxt.DiagFlush() 5385 log.Fatalf("bad code") 5386 return 0 5387 } 5388} 5389 5390func isbadbyte(a *obj.Addr) bool { 5391 return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB) 5392} 5393 5394func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { 5395 ab.Reset() 5396 5397 ab.rexflag = 0 5398 ab.vexflag = false 5399 ab.evexflag = false 5400 mark := ab.Len() 5401 ab.doasm(ctxt, cursym, p) 5402 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5403 // as befits the whole approach of the architecture, 5404 // the rex prefix must appear before the first opcode byte 5405 // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but 5406 // before the 0f opcode escape!), or it might be ignored. 5407 // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. 5408 if ctxt.Arch.Family != sys.AMD64 { 5409 ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt) 5410 } 5411 n := ab.Len() 5412 var np int 5413 for np = mark; np < n; np++ { 5414 c := ab.At(np) 5415 if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 { 5416 break 5417 } 5418 } 5419 ab.Insert(np, byte(0x40|ab.rexflag)) 5420 } 5421 5422 n := ab.Len() 5423 for i := len(cursym.R) - 1; i >= 0; i-- { 5424 r := &cursym.R[i] 5425 if int64(r.Off) < p.Pc { 5426 break 5427 } 5428 if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag { 5429 r.Off++ 5430 } 5431 if r.Type == objabi.R_PCREL { 5432 if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL { 5433 // PC-relative addressing is relative to the end of the instruction, 5434 // but the relocations applied by the linker are relative to the end 5435 // of the relocation. Because immediate instruction 5436 // arguments can follow the PC-relative memory reference in the 5437 // instruction encoding, the two may not coincide. In this case, 5438 // adjust addend so that linker can keep relocating relative to the 5439 // end of the relocation. 5440 r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz)) 5441 } else if ctxt.Arch.Family == sys.I386 { 5442 // On 386 PC-relative addressing (for non-call/jmp instructions) 5443 // assumes that the previous instruction loaded the PC of the end 5444 // of that instruction into CX, so the adjustment is relative to 5445 // that. 5446 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5447 } 5448 } 5449 if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 { 5450 // On 386, R_GOTPCREL makes the same assumptions as R_PCREL. 5451 r.Add += int64(r.Off) - p.Pc + int64(r.Siz) 5452 } 5453 5454 } 5455} 5456 5457// unpackOps4 extracts 4 operands from p. 5458func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) { 5459 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To 5460} 5461 5462// unpackOps5 extracts 5 operands from p. 5463func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) { 5464 return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To 5465} 5466