1 2template = """\ 3/* 4 * Copyright (c) 2019 Valve Corporation 5 * 6 * SPDX-License-Identifier: MIT 7 * 8 * This file was generated by aco_builder_h.py 9 */ 10 11#ifndef _ACO_BUILDER_ 12#define _ACO_BUILDER_ 13 14#include "aco_ir.h" 15 16namespace aco { 17enum dpp_ctrl { 18 _dpp_quad_perm = 0x000, 19 _dpp_row_sl = 0x100, 20 _dpp_row_sr = 0x110, 21 _dpp_row_rr = 0x120, 22 dpp_wf_sl1 = 0x130, 23 dpp_wf_rl1 = 0x134, 24 dpp_wf_sr1 = 0x138, 25 dpp_wf_rr1 = 0x13C, 26 dpp_row_mirror = 0x140, 27 dpp_row_half_mirror = 0x141, 28 dpp_row_bcast15 = 0x142, 29 dpp_row_bcast31 = 0x143, 30 _dpp_row_share = 0x150, 31 _dpp_row_xmask = 0x160, 32}; 33 34inline dpp_ctrl 35dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) 36{ 37 assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4); 38 return (dpp_ctrl)(lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6)); 39} 40 41inline dpp_ctrl 42dpp_row_sl(unsigned amount) 43{ 44 assert(amount > 0 && amount < 16); 45 return (dpp_ctrl)(((unsigned) _dpp_row_sl) | amount); 46} 47 48inline dpp_ctrl 49dpp_row_sr(unsigned amount) 50{ 51 assert(amount > 0 && amount < 16); 52 return (dpp_ctrl)(((unsigned) _dpp_row_sr) | amount); 53} 54 55inline dpp_ctrl 56dpp_row_rr(unsigned amount) 57{ 58 assert(amount > 0 && amount < 16); 59 return (dpp_ctrl)(((unsigned) _dpp_row_rr) | amount); 60} 61 62inline dpp_ctrl 63dpp_row_share(unsigned lane) 64{ 65 assert(lane < 16); 66 return (dpp_ctrl)(((unsigned) _dpp_row_share) | lane); 67} 68 69inline dpp_ctrl 70dpp_row_xmask(unsigned mask) 71{ 72 assert(mask < 16); 73 return (dpp_ctrl)(((unsigned) _dpp_row_xmask) | mask); 74} 75 76inline unsigned 77ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) 78{ 79 assert(and_mask < 32 && or_mask < 32 && xor_mask < 32); 80 return and_mask | (or_mask << 5) | (xor_mask << 10); 81} 82 83inline unsigned 84ds_pattern_rotate(unsigned delta, unsigned mask) 85{ 86 assert(delta < 32 && mask < 32); 87 return mask | (delta << 5) | 0xc000; 88} 89 90aco_ptr<Instruction> create_s_mov(Definition dst, Operand src); 91 92enum sendmsg { 93 sendmsg_none = 0, 94 sendmsg_gs = 2, /* gfx6 to gfx10.3 */ 95 sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */ 96 sendmsg_hs_tessfactor = 2, /* gfx11+ */ 97 sendmsg_dealloc_vgprs = 3, /* gfx11+ */ 98 sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */ 99 sendmsg_stall_wave_gen = 5, /* gfx9+ */ 100 sendmsg_halt_waves = 6, /* gfx9+ */ 101 sendmsg_ordered_ps_done = 7, /* gfx9+ */ 102 sendmsg_early_prim_dealloc = 8, /* gfx9 to gfx10 */ 103 sendmsg_gs_alloc_req = 9, /* gfx9+ */ 104 sendmsg_get_doorbell = 10, /* gfx9 to gfx10.3 */ 105 sendmsg_get_ddid = 11, /* gfx10 to gfx10.3 */ 106 sendmsg_id_mask = 0xf, 107}; 108 109/* gfx11+ */ 110enum sendmsg_rtn { 111 sendmsg_rtn_get_doorbell = 0, 112 sendmsg_rtn_get_ddid = 1, 113 sendmsg_rtn_get_tma = 2, 114 sendmsg_rtn_get_realtime = 3, 115 sendmsg_rtn_save_wave = 4, 116 sendmsg_rtn_get_tba = 5, 117 sendmsg_rtn_mask = 0xff, 118}; 119 120enum bperm_swiz { 121 bperm_b1_sign = 8, 122 bperm_b3_sign = 9, 123 bperm_b5_sign = 10, 124 bperm_b7_sign = 11, 125 bperm_0 = 12, 126 bperm_255 = 13, 127}; 128 129enum class alu_delay_wait { 130 NO_DEP = 0, 131 VALU_DEP_1 = 1, 132 VALU_DEP_2 = 2, 133 VALU_DEP_3 = 3, 134 VALU_DEP_4 = 4, 135 TRANS32_DEP_1 = 5, 136 TRANS32_DEP_2 = 6, 137 TRANS32_DEP_3 = 7, 138 FMA_ACCUM_CYCLE_1 = 8, 139 SALU_CYCLE_1 = 9, 140 SALU_CYCLE_2 = 10, 141 SALU_CYCLE_3 = 11, 142}; 143 144class Builder { 145public: 146 struct Result { 147 Instruction *instr; 148 149 Result(Instruction *instr_) : instr(instr_) {} 150 151 operator Instruction *() const { 152 return instr; 153 } 154 155 operator Temp() const { 156 return instr->definitions[0].getTemp(); 157 } 158 159 operator Operand() const { 160 return Operand((Temp)*this); 161 } 162 163 Definition& def(unsigned index) const { 164 return instr->definitions[index]; 165 } 166 167 aco_ptr<Instruction> get_ptr() const { 168 return aco_ptr<Instruction>(instr); 169 } 170 171 Instruction * operator * () const { 172 return instr; 173 } 174 175 Instruction * operator -> () const { 176 return instr; 177 } 178 }; 179 180 struct Op { 181 Operand op; 182 Op(Temp tmp) : op(tmp) {} 183 Op(Operand op_) : op(op_) {} 184 Op(Result res) : op((Temp)res) {} 185 }; 186 187 enum WaveSpecificOpcode { 188 s_cselect = (unsigned) aco_opcode::s_cselect_b64, 189 s_cmp_lg = (unsigned) aco_opcode::s_cmp_lg_u64, 190 s_and = (unsigned) aco_opcode::s_and_b64, 191 s_andn2 = (unsigned) aco_opcode::s_andn2_b64, 192 s_or = (unsigned) aco_opcode::s_or_b64, 193 s_orn2 = (unsigned) aco_opcode::s_orn2_b64, 194 s_not = (unsigned) aco_opcode::s_not_b64, 195 s_mov = (unsigned) aco_opcode::s_mov_b64, 196 s_wqm = (unsigned) aco_opcode::s_wqm_b64, 197 s_and_saveexec = (unsigned) aco_opcode::s_and_saveexec_b64, 198 s_or_saveexec = (unsigned) aco_opcode::s_or_saveexec_b64, 199 s_xnor = (unsigned) aco_opcode::s_xnor_b64, 200 s_xor = (unsigned) aco_opcode::s_xor_b64, 201 s_bcnt1_i32 = (unsigned) aco_opcode::s_bcnt1_i32_b64, 202 s_bitcmp1 = (unsigned) aco_opcode::s_bitcmp1_b64, 203 s_ff1_i32 = (unsigned) aco_opcode::s_ff1_i32_b64, 204 s_flbit_i32 = (unsigned) aco_opcode::s_flbit_i32_b64, 205 s_lshl = (unsigned) aco_opcode::s_lshl_b64, 206 }; 207 208 Program *program; 209 bool use_iterator; 210 bool start; // only when use_iterator == false 211 RegClass lm; 212 213 std::vector<aco_ptr<Instruction>> *instructions; 214 std::vector<aco_ptr<Instruction>>::iterator it; 215 bool is_precise = false; 216 bool is_sz_preserve = false; 217 bool is_inf_preserve = false; 218 bool is_nan_preserve = false; 219 bool is_nuw = false; 220 221 Builder(Program *pgm) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(NULL) {} 222 Builder(Program *pgm, Block *block) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(&block->instructions) {} 223 Builder(Program *pgm, std::vector<aco_ptr<Instruction>> *instrs) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(instrs) {} 224 225 Builder precise() const { 226 Builder res = *this; 227 res.is_precise = true; 228 return res; 229 }; 230 231 Builder nuw() const { 232 Builder res = *this; 233 res.is_nuw = true; 234 return res; 235 } 236 237 void moveEnd(Block *block) { 238 instructions = &block->instructions; 239 } 240 241 void reset() { 242 use_iterator = false; 243 start = false; 244 instructions = NULL; 245 } 246 247 void reset(Block *block) { 248 use_iterator = false; 249 start = false; 250 instructions = &block->instructions; 251 } 252 253 void reset(std::vector<aco_ptr<Instruction>> *instrs) { 254 use_iterator = false; 255 start = false; 256 instructions = instrs; 257 } 258 259 void reset(std::vector<aco_ptr<Instruction>> *instrs, std::vector<aco_ptr<Instruction>>::iterator instr_it) { 260 use_iterator = true; 261 start = false; 262 instructions = instrs; 263 it = instr_it; 264 } 265 266 Result insert(aco_ptr<Instruction> instr) { 267 Instruction *instr_ptr = instr.get(); 268 if (instructions) { 269 if (use_iterator) { 270 it = instructions->emplace(it, std::move(instr)); 271 it = std::next(it); 272 } else if (!start) { 273 instructions->emplace_back(std::move(instr)); 274 } else { 275 instructions->emplace(instructions->begin(), std::move(instr)); 276 } 277 } 278 return Result(instr_ptr); 279 } 280 281 Result insert(Instruction* instr) { 282 if (instructions) { 283 if (use_iterator) { 284 it = instructions->emplace(it, aco_ptr<Instruction>(instr)); 285 it = std::next(it); 286 } else if (!start) { 287 instructions->emplace_back(aco_ptr<Instruction>(instr)); 288 } else { 289 instructions->emplace(instructions->begin(), aco_ptr<Instruction>(instr)); 290 } 291 } 292 return Result(instr); 293 } 294 295 Temp tmp(RegClass rc) { 296 return program->allocateTmp(rc); 297 } 298 299 Temp tmp(RegType type, unsigned size) { 300 return tmp(RegClass(type, size)); 301 } 302 303 Definition def(RegClass rc) { 304 return Definition(program->allocateTmp(rc)); 305 } 306 307 Definition def(RegType type, unsigned size) { 308 return def(RegClass(type, size)); 309 } 310 311 Definition def(RegClass rc, PhysReg reg) { 312 return Definition(program->allocateId(rc), reg, rc); 313 } 314 315 inline aco_opcode w64or32(WaveSpecificOpcode opcode) const { 316 if (program->wave_size == 64) 317 return (aco_opcode) opcode; 318 319 switch (opcode) { 320 case s_cselect: 321 return aco_opcode::s_cselect_b32; 322 case s_cmp_lg: 323 return aco_opcode::s_cmp_lg_u32; 324 case s_and: 325 return aco_opcode::s_and_b32; 326 case s_andn2: 327 return aco_opcode::s_andn2_b32; 328 case s_or: 329 return aco_opcode::s_or_b32; 330 case s_orn2: 331 return aco_opcode::s_orn2_b32; 332 case s_not: 333 return aco_opcode::s_not_b32; 334 case s_mov: 335 return aco_opcode::s_mov_b32; 336 case s_wqm: 337 return aco_opcode::s_wqm_b32; 338 case s_and_saveexec: 339 return aco_opcode::s_and_saveexec_b32; 340 case s_or_saveexec: 341 return aco_opcode::s_or_saveexec_b32; 342 case s_xnor: 343 return aco_opcode::s_xnor_b32; 344 case s_xor: 345 return aco_opcode::s_xor_b32; 346 case s_bcnt1_i32: 347 return aco_opcode::s_bcnt1_i32_b32; 348 case s_bitcmp1: 349 return aco_opcode::s_bitcmp1_b32; 350 case s_ff1_i32: 351 return aco_opcode::s_ff1_i32_b32; 352 case s_flbit_i32: 353 return aco_opcode::s_flbit_i32_b32; 354 case s_lshl: 355 return aco_opcode::s_lshl_b32; 356 default: 357 unreachable("Unsupported wave specific opcode."); 358 } 359 } 360 361% for fixed in ['m0', 'vcc', 'exec', 'scc']: 362 Operand ${fixed}(Temp tmp) { 363 % if fixed == 'vcc' or fixed == 'exec': 364 //vcc_hi and exec_hi can still be used in wave32 365 assert(tmp.type() == RegType::sgpr && tmp.bytes() <= 8); 366 % endif 367 Operand op(tmp); 368 op.setFixed(aco::${fixed}); 369 return op; 370 } 371 372 Definition ${fixed}(Definition def) { 373 % if fixed == 'vcc' or fixed == 'exec': 374 //vcc_hi and exec_hi can still be used in wave32 375 assert(def.regClass().type() == RegType::sgpr && def.bytes() <= 8); 376 % endif 377 def.setFixed(aco::${fixed}); 378 return def; 379 } 380 381% endfor 382 383 Operand set16bit(Operand op) { 384 op.set16bit(true); 385 return op; 386 } 387 388 Operand set24bit(Operand op) { 389 op.set24bit(true); 390 return op; 391 } 392 393 /* hand-written helpers */ 394 Temp as_uniform(Op op) 395 { 396 assert(op.op.isTemp()); 397 if (op.op.getTemp().type() == RegType::vgpr) 398 return pseudo(aco_opcode::p_as_uniform, def(RegType::sgpr, op.op.size()), op); 399 else 400 return op.op.getTemp(); 401 } 402 403 Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool tmpu24=false, bool tmpi24=false) 404 { 405 assert(tmp.type() == RegType::vgpr); 406 /* Assume 24bit if high 8 bits of tmp don't impact the result. */ 407 if ((imm & 0xff) == 0) { 408 tmpu24 = true; 409 tmpi24 = true; 410 } 411 tmpu24 &= imm <= 0xffffffu; 412 tmpi24 &= imm <= 0x7fffffu || imm >= 0xff800000u; 413 bool has_lshl_add = program->gfx_level >= GFX9; 414 /* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles), 415 * compared to 4x the latency on <GFX10. */ 416 unsigned mul_cost = program->gfx_level >= GFX10 ? 1 : (4 + Operand::c32(imm).isLiteral()); 417 if (imm == 0) { 418 return copy(dst, Operand::zero()); 419 } else if (imm == 1) { 420 return copy(dst, Operand(tmp)); 421 } else if (imm == 0xffffffff) { 422 return vsub32(dst, Operand::zero(), tmp); 423 } else if (util_is_power_of_two_or_zero(imm)) { 424 return vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(ffs(imm) - 1u), tmp); 425 } else if (tmpu24) { 426 return vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32(imm), tmp); 427 } else if (tmpi24) { 428 return vop2(aco_opcode::v_mul_i32_i24, dst, Operand::c32(imm), tmp); 429 } else if (util_is_power_of_two_nonzero(imm - 1u)) { 430 return vadd32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm - 1u) - 1u), tmp), tmp); 431 } else if (mul_cost > 2 && util_is_power_of_two_nonzero(imm + 1u)) { 432 return vsub32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm + 1u) - 1u), tmp), tmp); 433 } 434 435 unsigned instrs_required = util_bitcount(imm); 436 if (!has_lshl_add) { 437 instrs_required = util_bitcount(imm) - (imm & 0x1); /* shifts */ 438 instrs_required += util_bitcount(imm) - 1; /* additions */ 439 } 440 if (instrs_required < mul_cost) { 441 Result res(NULL); 442 Temp cur; 443 while (imm) { 444 unsigned shift = u_bit_scan(&imm); 445 Definition tmp_dst = imm ? def(v1) : dst; 446 447 if (shift && cur.id()) 448 res = vadd32(Definition(tmp_dst), vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(shift), tmp), cur); 449 else if (shift) 450 res = vop2(aco_opcode::v_lshlrev_b32, Definition(tmp_dst), Operand::c32(shift), tmp); 451 else if (cur.id()) 452 res = vadd32(Definition(tmp_dst), tmp, cur); 453 else 454 tmp_dst = Definition(tmp); 455 456 cur = tmp_dst.getTemp(); 457 } 458 return res; 459 } 460 461 Temp imm_tmp = copy(def(s1), Operand::c32(imm)); 462 return vop3(aco_opcode::v_mul_lo_u32, dst, imm_tmp, tmp); 463 } 464 465 Result v_mul24_imm(Definition dst, Temp tmp, uint32_t imm) 466 { 467 return v_mul_imm(dst, tmp, imm & 0xffffffu, true); 468 } 469 470 Result copy(Definition dst, Op op) { 471 return pseudo(aco_opcode::p_parallelcopy, dst, op); 472 } 473 474 Result vadd32(Definition dst, Op a, Op b, bool carry_out=false, Op carry_in=Op(Operand(s2)), bool post_ra=false) { 475 if (b.op.isConstant() || b.op.regClass().type() != RegType::vgpr) 476 std::swap(a, b); 477 if (!post_ra && (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr)) 478 b = copy(def(v1), b); 479 480 if (!carry_in.op.isUndefined()) 481 return vop2(aco_opcode::v_addc_co_u32, Definition(dst), def(lm), a, b, carry_in); 482 else if (program->gfx_level >= GFX10 && carry_out) 483 return vop3(aco_opcode::v_add_co_u32_e64, Definition(dst), def(lm), a, b); 484 else if (program->gfx_level < GFX9 || carry_out) 485 return vop2(aco_opcode::v_add_co_u32, Definition(dst), def(lm), a, b); 486 else 487 return vop2(aco_opcode::v_add_u32, Definition(dst), a, b); 488 } 489 490 Result vsub32(Definition dst, Op a, Op b, bool carry_out=false, Op borrow=Op(Operand(s2))) 491 { 492 if (!borrow.op.isUndefined() || program->gfx_level < GFX9) 493 carry_out = true; 494 495 bool reverse = !b.op.isTemp() || b.op.regClass().type() != RegType::vgpr; 496 if (reverse) 497 std::swap(a, b); 498 if (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr) 499 b = copy(def(v1), b); 500 501 aco_opcode op; 502 Temp carry; 503 if (carry_out) { 504 carry = tmp(lm); 505 if (borrow.op.isUndefined()) 506 op = reverse ? aco_opcode::v_subrev_co_u32 : aco_opcode::v_sub_co_u32; 507 else 508 op = reverse ? aco_opcode::v_subbrev_co_u32 : aco_opcode::v_subb_co_u32; 509 } else { 510 op = reverse ? aco_opcode::v_subrev_u32 : aco_opcode::v_sub_u32; 511 } 512 bool vop3 = false; 513 if (program->gfx_level >= GFX10 && op == aco_opcode::v_subrev_co_u32) { 514 vop3 = true; 515 op = aco_opcode::v_subrev_co_u32_e64; 516 } else if (program->gfx_level >= GFX10 && op == aco_opcode::v_sub_co_u32) { 517 vop3 = true; 518 op = aco_opcode::v_sub_co_u32_e64; 519 } 520 521 int num_ops = borrow.op.isUndefined() ? 2 : 3; 522 int num_defs = carry_out ? 2 : 1; 523 aco_ptr<Instruction> sub; 524 if (vop3) 525 sub.reset(create_instruction(op, Format::VOP3, num_ops, num_defs)); 526 else 527 sub.reset(create_instruction(op, Format::VOP2, num_ops, num_defs)); 528 sub->operands[0] = a.op; 529 sub->operands[1] = b.op; 530 if (!borrow.op.isUndefined()) 531 sub->operands[2] = borrow.op; 532 sub->definitions[0] = dst; 533 if (carry_out) 534 sub->definitions[1] = Definition(carry); 535 536 return insert(std::move(sub)); 537 } 538 539 Result readlane(Definition dst, Op vsrc, Op lane) 540 { 541 if (program->gfx_level >= GFX8) 542 return vop3(aco_opcode::v_readlane_b32_e64, dst, vsrc, lane); 543 else 544 return vop2(aco_opcode::v_readlane_b32, dst, vsrc, lane); 545 } 546 Result writelane(Definition dst, Op val, Op lane, Op vsrc) { 547 if (program->gfx_level >= GFX8) 548 return vop3(aco_opcode::v_writelane_b32_e64, dst, val, lane, vsrc); 549 else 550 return vop2(aco_opcode::v_writelane_b32, dst, val, lane, vsrc); 551 } 552<% 553import itertools 554formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8), (2, 6), (3, 6), (1, 7)]), 555 ("sop1", [Format.SOP1], [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]), 556 ("sop2", [Format.SOP2], itertools.product([1, 2], [2, 3])), 557 ("sopk", [Format.SOPK], itertools.product([0, 1, 2], [0, 1])), 558 ("sopp", [Format.SOPP], itertools.product([0, 1], [0, 1])), 559 ("sopc", [Format.SOPC], [(1, 2)]), 560 ("smem", [Format.SMEM], [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (1, 1), (0, 0)]), 561 ("ds", [Format.DS], [(1, 0), (1, 1), (1, 2), (1, 3), (0, 3), (0, 4)]), 562 ("ldsdir", [Format.LDSDIR], [(1, 1)]), 563 ("mubuf", [Format.MUBUF], [(0, 4), (1, 3), (1, 4)]), 564 ("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]), 565 ("mimg", [Format.MIMG], itertools.product([0, 1], [3, 4, 5, 6, 7])), 566 ("exp", [Format.EXP], [(0, 4), (0, 5)]), 567 ("branch", [Format.PSEUDO_BRANCH], itertools.product([1], [0, 1])), 568 ("barrier", [Format.PSEUDO_BARRIER], [(0, 0)]), 569 ("reduction", [Format.PSEUDO_REDUCTION], [(3, 3)]), 570 ("vop1", [Format.VOP1], [(0, 0), (1, 1), (2, 2)]), 571 ("vop1_sdwa", [Format.VOP1, Format.SDWA], [(1, 1)]), 572 ("vop2", [Format.VOP2], itertools.product([1, 2], [2, 3])), 573 ("vop2_sdwa", [Format.VOP2, Format.SDWA], itertools.product([1, 2], [2, 3])), 574 ("vopc", [Format.VOPC], itertools.product([1, 2], [2])), 575 ("vopc_sdwa", [Format.VOPC, Format.SDWA], itertools.product([1, 2], [2])), 576 ("vop3", [Format.VOP3], [(1, 3), (1, 2), (1, 1), (2, 2)]), 577 ("vop3p", [Format.VOP3P], [(1, 2), (1, 3)]), 578 ("vopd", [Format.VOPD], [(2, 2), (2, 3), (2, 4), (2, 5), (2, 6)]), 579 ("vinterp_inreg", [Format.VINTERP_INREG], [(1, 3)]), 580 ("vintrp", [Format.VINTRP], [(1, 2), (1, 3)]), 581 ("vop1_dpp", [Format.VOP1, Format.DPP16], [(1, 1)]), 582 ("vop2_dpp", [Format.VOP2, Format.DPP16], itertools.product([1, 2], [2, 3])), 583 ("vopc_dpp", [Format.VOPC, Format.DPP16], itertools.product([1, 2], [2])), 584 ("vop3_dpp", [Format.VOP3, Format.DPP16], [(1, 3), (1, 2), (1, 1), (2, 2)]), 585 ("vop3p_dpp", [Format.VOP3P, Format.DPP16], [(1, 2), (1, 3)]), 586 ("vop1_dpp8", [Format.VOP1, Format.DPP8], [(1, 1)]), 587 ("vop2_dpp8", [Format.VOP2, Format.DPP8], itertools.product([1, 2], [2, 3])), 588 ("vopc_dpp8", [Format.VOPC, Format.DPP8], itertools.product([1, 2], [2])), 589 ("vop3_dpp8", [Format.VOP3, Format.DPP8], [(1, 3), (1, 2), (1, 1), (2, 2)]), 590 ("vop3p_dpp8", [Format.VOP3P, Format.DPP8], [(1, 2), (1, 3)]), 591 ("vop1_e64", [Format.VOP1, Format.VOP3], itertools.product([1], [1])), 592 ("vop2_e64", [Format.VOP2, Format.VOP3], itertools.product([1, 2], [2, 3])), 593 ("vopc_e64", [Format.VOPC, Format.VOP3], itertools.product([1, 2], [2])), 594 ("vop1_e64_dpp", [Format.VOP1, Format.VOP3, Format.DPP16], itertools.product([1], [1])), 595 ("vop2_e64_dpp", [Format.VOP2, Format.VOP3, Format.DPP16], itertools.product([1, 2], [2, 3])), 596 ("vopc_e64_dpp", [Format.VOPC, Format.VOP3, Format.DPP16], itertools.product([1, 2], [2])), 597 ("vop1_e64_dpp8", [Format.VOP1, Format.VOP3, Format.DPP8], itertools.product([1], [1])), 598 ("vop2_e64_dpp8", [Format.VOP2, Format.VOP3, Format.DPP8], itertools.product([1, 2], [2, 3])), 599 ("vopc_e64_dpp8", [Format.VOPC, Format.VOP3, Format.DPP8], itertools.product([1, 2], [2])), 600 ("flat", [Format.FLAT], [(0, 3), (1, 2), (1, 3)]), 601 ("global", [Format.GLOBAL], [(0, 3), (1, 2), (1, 3)]), 602 ("scratch", [Format.SCRATCH], [(0, 3), (1, 2), (1, 3)])] 603formats = [(f if len(f) == 5 else f + ('',)) for f in formats] 604%>\\ 605% for name, formats, shapes, extra_field_setup in formats: 606 % for num_definitions, num_operands in shapes: 607 <% 608 args = ['aco_opcode opcode'] 609 for i in range(num_definitions): 610 args.append('Definition def%d' % i) 611 for i in range(num_operands): 612 args.append('Op op%d' % i) 613 for f in formats: 614 args += f.get_builder_field_decls() 615 %>\\ 616 617 Result ${name}(${', '.join(args)}) 618 { 619 Instruction* instr = create_instruction(opcode, (Format)(${'|'.join('(int)Format::%s' % f.name for f in formats)}), ${num_operands}, ${num_definitions}); 620 % for i in range(num_definitions): 621 instr->definitions[${i}] = def${i}; 622 instr->definitions[${i}].setPrecise(is_precise); 623 instr->definitions[${i}].setSZPreserve(is_sz_preserve); 624 instr->definitions[${i}].setInfPreserve(is_inf_preserve); 625 instr->definitions[${i}].setNaNPreserve(is_nan_preserve); 626 instr->definitions[${i}].setNUW(is_nuw); 627 % endfor 628 % for i in range(num_operands): 629 instr->operands[${i}] = op${i}.op; 630 % endfor 631 % for f in formats: 632 % for dest, field_name in zip(f.get_builder_field_dests(), f.get_builder_field_names()): 633 instr->${f.get_accessor()}().${dest} = ${field_name}; 634 % endfor 635 ${f.get_builder_initialization(num_operands)} 636 % endfor 637 ${extra_field_setup} 638 return insert(instr); 639 } 640 641 % if name == 'sop1' or name == 'sop2' or name == 'sopc': 642 <% 643 args[0] = 'WaveSpecificOpcode opcode' 644 params = [] 645 for i in range(num_definitions): 646 params.append('def%d' % i) 647 for i in range(num_operands): 648 params.append('op%d' % i) 649 %>\\ 650 651 inline Result ${name}(${', '.join(args)}) 652 { 653 return ${name}(w64or32(opcode), ${', '.join(params)}); 654 } 655 656 % endif 657 % endfor 658% endfor 659}; 660 661void hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset); 662 663} // namespace aco 664 665#endif /* _ACO_BUILDER_ */""" 666 667from aco_opcodes import Format 668from mako.template import Template 669 670print(Template(template).render(Format=Format)) 671