1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #ifndef ACO_IR_H
8 #define ACO_IR_H
9
10 #include "aco_opcodes.h"
11 #include "aco_shader_info.h"
12 #include "aco_util.h"
13
14 #include "util/compiler.h"
15
16 #include "ac_binary.h"
17 #include "ac_hw_stage.h"
18 #include "ac_shader_util.h"
19 #include "amd_family.h"
20 #include <algorithm>
21 #include <bitset>
22 #include <memory>
23 #include <vector>
24
25 typedef struct nir_shader nir_shader;
26
27 namespace aco {
28
29 extern uint64_t debug_flags;
30
31 enum {
32 DEBUG_VALIDATE_IR = 0x1,
33 DEBUG_VALIDATE_RA = 0x2,
34 DEBUG_VALIDATE_LIVE_VARS = 0x4,
35 DEBUG_FORCE_WAITCNT = 0x8,
36 DEBUG_NO_VN = 0x10,
37 DEBUG_NO_OPT = 0x20,
38 DEBUG_NO_SCHED = 0x40,
39 DEBUG_PERF_INFO = 0x80,
40 DEBUG_LIVE_INFO = 0x100,
41 DEBUG_FORCE_WAITDEPS = 0x200,
42 DEBUG_NO_VALIDATE_IR = 0x400,
43 DEBUG_NO_SCHED_ILP = 0x800,
44 DEBUG_NO_SCHED_VOPD = 0x1000,
45 };
46
47 enum storage_class : uint8_t {
48 storage_none = 0x0, /* no synchronization and can be reordered around aliasing stores */
49 storage_buffer = 0x1, /* SSBOs and global memory */
50 storage_gds = 0x2,
51 storage_image = 0x4,
52 storage_shared = 0x8, /* or TCS output */
53 storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
54 storage_task_payload = 0x20, /* Task-Mesh payload */
55 storage_scratch = 0x40,
56 storage_vgpr_spill = 0x80,
57 storage_count = 8, /* not counting storage_none */
58 };
59
60 enum memory_semantics : uint8_t {
61 semantic_none = 0x0,
62 /* for loads: don't move any access after this load to before this load (even other loads)
63 * for barriers: don't move any access after the barrier to before any
64 * atomics/control_barriers/sendmsg_gs_done/position-primitive-export before the barrier */
65 semantic_acquire = 0x1,
66 /* for stores: don't move any access before this store to after this store
67 * for barriers: don't move any access before the barrier to after any
68 * atomics/control_barriers/sendmsg_gs_done/position-primitive-export after the barrier */
69 semantic_release = 0x2,
70
71 /* the rest are for load/stores/atomics only */
72 /* cannot be DCE'd or CSE'd */
73 semantic_volatile = 0x4,
74 /* does not interact with barriers and assumes this lane is the only lane
75 * accessing this memory */
76 semantic_private = 0x8,
77 /* this operation can be reordered around operations of the same storage.
78 * says nothing about barriers */
79 semantic_can_reorder = 0x10,
80 /* this is a atomic instruction (may only read or write memory) */
81 semantic_atomic = 0x20,
82 /* this is instruction both reads and writes memory */
83 semantic_rmw = 0x40,
84
85 semantic_acqrel = semantic_acquire | semantic_release,
86 semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw,
87 };
88
89 enum sync_scope : uint8_t {
90 scope_invocation = 0,
91 scope_subgroup = 1,
92 scope_workgroup = 2,
93 scope_queuefamily = 3,
94 scope_device = 4,
95 };
96
97 struct memory_sync_info {
memory_sync_infomemory_sync_info98 memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {}
99 memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation)
100 : storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_)
101 {}
102
103 storage_class storage : 8;
104 memory_semantics semantics : 8;
105 sync_scope scope : 8;
106
107 bool operator==(const memory_sync_info& rhs) const
108 {
109 return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope;
110 }
111
can_reordermemory_sync_info112 bool can_reorder() const
113 {
114 if (semantics & semantic_acqrel)
115 return false;
116 /* Also check storage so that zero-initialized memory_sync_info can be
117 * reordered. */
118 return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
119 }
120 };
121 static_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
122
123 enum fp_round {
124 fp_round_ne = 0,
125 fp_round_pi = 1,
126 fp_round_ni = 2,
127 fp_round_tz = 3,
128 };
129
130 enum fp_denorm {
131 /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
132 * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
133 fp_denorm_flush = 0x0,
134 fp_denorm_keep_in = 0x1,
135 fp_denorm_keep_out = 0x2,
136 fp_denorm_keep = 0x3,
137 };
138
139 struct float_mode {
140 /* matches encoding of the MODE register */
141 union {
142 struct {
143 fp_round round32 : 2;
144 fp_round round16_64 : 2;
145 unsigned denorm32 : 2;
146 unsigned denorm16_64 : 2;
147 };
148 struct {
149 uint8_t round : 4;
150 uint8_t denorm : 4;
151 };
152 uint8_t val = 0;
153 };
154 /* if false, optimizations which may remove denormal flushing can be done */
155 bool must_flush_denorms32 : 1;
156 bool must_flush_denorms16_64 : 1;
157 bool care_about_round32 : 1;
158 bool care_about_round16_64 : 1;
159
160 /* Returns true if instructions using the mode "other" can safely use the
161 * current one instead. */
canReplacefloat_mode162 bool canReplace(float_mode other) const noexcept
163 {
164 return val == other.val && (must_flush_denorms32 || !other.must_flush_denorms32) &&
165 (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
166 (care_about_round32 || !other.care_about_round32) &&
167 (care_about_round16_64 || !other.care_about_round16_64);
168 }
169 };
170
171 enum wait_type {
172 wait_type_exp = 0,
173 wait_type_lgkm = 1,
174 wait_type_vm = 2,
175 /* GFX10+ */
176 wait_type_vs = 3,
177 /* GFX12+ */
178 wait_type_sample = 4,
179 wait_type_bvh = 5,
180 wait_type_km = 6,
181 wait_type_num = 7,
182 };
183
184 struct Instruction;
185
186 struct wait_imm {
187 static const uint8_t unset_counter = 0xff;
188
189 uint8_t exp;
190 uint8_t lgkm;
191 uint8_t vm;
192 uint8_t vs;
193 uint8_t sample;
194 uint8_t bvh;
195 uint8_t km;
196
197 wait_imm();
198 wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
199
200 uint16_t pack(enum amd_gfx_level chip) const;
201
202 static wait_imm max(enum amd_gfx_level gfx_level);
203
204 bool unpack(enum amd_gfx_level gfx_level, const Instruction* instr);
205
206 bool combine(const wait_imm& other);
207
208 bool empty() const;
209
210 void print(FILE* output) const;
211
212 uint8_t& operator[](size_t i)
213 {
214 assert(i < wait_type_num);
215 return *((uint8_t*)this + i);
216 }
217
218 const uint8_t& operator[](size_t i) const
219 {
220 assert(i < wait_type_num);
221 return *((uint8_t*)this + i);
222 }
223 };
224 static_assert(offsetof(wait_imm, exp) == wait_type_exp);
225 static_assert(offsetof(wait_imm, lgkm) == wait_type_lgkm);
226 static_assert(offsetof(wait_imm, vm) == wait_type_vm);
227 static_assert(offsetof(wait_imm, vs) == wait_type_vs);
228 static_assert(offsetof(wait_imm, sample) == wait_type_sample);
229 static_assert(offsetof(wait_imm, bvh) == wait_type_bvh);
230 static_assert(offsetof(wait_imm, km) == wait_type_km);
231
232 /* s_wait_event immediate bits. */
233 enum wait_event_imm : uint16_t {
234 /* If this bit is 0, await that the export buffer space has been allocated.
235 * In Primitive Ordered Pixel Shading, export ready means that the overlapped waves have exited
236 * their ordered sections (by performing the `done` export), and that the current wave may enter
237 * its ordered section.
238 */
239 wait_event_imm_dont_wait_export_ready_gfx11 = 0x1,
240 wait_event_imm_wait_export_ready_gfx12 = 0x2,
241 };
242
243 constexpr Format
asVOP3(Format format)244 asVOP3(Format format)
245 {
246 return (Format)((uint32_t)Format::VOP3 | (uint32_t)format);
247 };
248
249 constexpr Format
asSDWA(Format format)250 asSDWA(Format format)
251 {
252 assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
253 return (Format)((uint32_t)Format::SDWA | (uint32_t)format);
254 }
255
256 constexpr Format
withoutDPP(Format format)257 withoutDPP(Format format)
258 {
259 return (Format)((uint32_t)format & ~((uint32_t)Format::DPP16 | (uint32_t)Format::DPP8));
260 }
261
262 constexpr Format
withoutVOP3(Format format)263 withoutVOP3(Format format)
264 {
265 return (Format)((uint32_t)format & ~((uint32_t)Format::VOP3));
266 }
267
268 enum class RegType {
269 sgpr,
270 vgpr,
271 };
272
273 struct RegClass {
274
275 enum RC : uint8_t {
276 s1 = 1,
277 s2 = 2,
278 s3 = 3,
279 s4 = 4,
280 s6 = 6,
281 s8 = 8,
282 s16 = 16,
283 v1 = s1 | (1 << 5),
284 v2 = s2 | (1 << 5),
285 v3 = s3 | (1 << 5),
286 v4 = s4 | (1 << 5),
287 v5 = 5 | (1 << 5),
288 v6 = 6 | (1 << 5),
289 v7 = 7 | (1 << 5),
290 v8 = 8 | (1 << 5),
291 /* byte-sized register class */
292 v1b = v1 | (1 << 7),
293 v2b = v2 | (1 << 7),
294 v3b = v3 | (1 << 7),
295 v4b = v4 | (1 << 7),
296 v6b = v6 | (1 << 7),
297 v8b = v8 | (1 << 7),
298 /* these are used for WWM and spills to vgpr */
299 v1_linear = v1 | (1 << 6),
300 v2_linear = v2 | (1 << 6),
301 };
302
303 RegClass() = default;
RegClassRegClass304 constexpr RegClass(RC rc_) : rc(rc_) {}
RegClassRegClass305 constexpr RegClass(RegType type, unsigned size)
306 : rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size))
307 {}
308
RCRegClass309 constexpr operator RC() const { return rc; }
310 explicit operator bool() = delete;
311
typeRegClass312 constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
is_linear_vgprRegClass313 constexpr bool is_linear_vgpr() const { return rc & (1 << 6); };
is_subdwordRegClass314 constexpr bool is_subdword() const { return rc & (1 << 7); }
bytesRegClass315 constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); }
316 // TODO: use size() less in favor of bytes()
sizeRegClass317 constexpr unsigned size() const { return (bytes() + 3) >> 2; }
is_linearRegClass318 constexpr bool is_linear() const { return rc <= RC::s16 || is_linear_vgpr(); }
as_linearRegClass319 constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); }
as_subdwordRegClass320 constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); }
321
getRegClass322 static constexpr RegClass get(RegType type, unsigned bytes)
323 {
324 if (type == RegType::sgpr) {
325 return RegClass(type, DIV_ROUND_UP(bytes, 4u));
326 } else {
327 return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u);
328 }
329 }
330
resizeRegClass331 constexpr RegClass resize(unsigned bytes) const
332 {
333 if (is_linear_vgpr()) {
334 assert(bytes % 4u == 0);
335 return get(RegType::vgpr, bytes).as_linear();
336 }
337 return get(type(), bytes);
338 }
339
340 private:
341 RC rc;
342 };
343
344 /* transitional helper expressions */
345 static constexpr RegClass s1{RegClass::s1};
346 static constexpr RegClass s2{RegClass::s2};
347 static constexpr RegClass s3{RegClass::s3};
348 static constexpr RegClass s4{RegClass::s4};
349 static constexpr RegClass s8{RegClass::s8};
350 static constexpr RegClass s16{RegClass::s16};
351 static constexpr RegClass v1{RegClass::v1};
352 static constexpr RegClass v2{RegClass::v2};
353 static constexpr RegClass v3{RegClass::v3};
354 static constexpr RegClass v4{RegClass::v4};
355 static constexpr RegClass v5{RegClass::v5};
356 static constexpr RegClass v6{RegClass::v6};
357 static constexpr RegClass v7{RegClass::v7};
358 static constexpr RegClass v8{RegClass::v8};
359 static constexpr RegClass v1b{RegClass::v1b};
360 static constexpr RegClass v2b{RegClass::v2b};
361 static constexpr RegClass v3b{RegClass::v3b};
362 static constexpr RegClass v4b{RegClass::v4b};
363 static constexpr RegClass v6b{RegClass::v6b};
364 static constexpr RegClass v8b{RegClass::v8b};
365
366 /**
367 * Temp Class
368 * Each temporary virtual register has a
369 * register class (i.e. size and type)
370 * and SSA id.
371 */
372 struct Temp {
TempTemp373 Temp() noexcept : id_(0), reg_class(0) {}
TempTemp374 constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {}
375
idTemp376 constexpr uint32_t id() const noexcept { return id_; }
regClassTemp377 constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
378
bytesTemp379 constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
sizeTemp380 constexpr unsigned size() const noexcept { return regClass().size(); }
typeTemp381 constexpr RegType type() const noexcept { return regClass().type(); }
is_linearTemp382 constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
383
384 constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); }
385 constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
386 constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
387
388 private:
389 uint32_t id_ : 24;
390 uint32_t reg_class : 8;
391 };
392
393 /**
394 * PhysReg
395 * Represents the physical register for each
396 * Operand and Definition.
397 */
398 struct PhysReg {
399 constexpr PhysReg() = default;
PhysRegPhysReg400 explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
regPhysReg401 constexpr unsigned reg() const { return reg_b >> 2; }
bytePhysReg402 constexpr unsigned byte() const { return reg_b & 0x3; }
403 constexpr operator unsigned() const { return reg(); }
404 constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
405 constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
406 constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; }
advancePhysReg407 constexpr PhysReg advance(int bytes) const
408 {
409 PhysReg res = *this;
410 res.reg_b += bytes;
411 return res;
412 }
413
414 uint16_t reg_b = 0;
415 };
416
417 /* helper expressions for special registers */
418 static constexpr PhysReg m0{124};
419 static constexpr PhysReg flat_scr_lo{102}; /* GFX8-GFX9, encoded differently on GFX6-7 */
420 static constexpr PhysReg flat_scr_hi{103}; /* GFX8-GFX9, encoded differently on GFX6-7 */
421 static constexpr PhysReg vcc{106};
422 static constexpr PhysReg vcc_hi{107};
423 static constexpr PhysReg tba{108}; /* GFX6-GFX8 */
424 static constexpr PhysReg tma{110}; /* GFX6-GFX8 */
425 static constexpr PhysReg ttmp0{112};
426 static constexpr PhysReg ttmp1{113};
427 static constexpr PhysReg ttmp2{114};
428 static constexpr PhysReg ttmp3{115};
429 static constexpr PhysReg ttmp4{116};
430 static constexpr PhysReg ttmp5{117};
431 static constexpr PhysReg ttmp6{118};
432 static constexpr PhysReg ttmp7{119};
433 static constexpr PhysReg ttmp8{120};
434 static constexpr PhysReg ttmp9{121};
435 static constexpr PhysReg ttmp10{122};
436 static constexpr PhysReg ttmp11{123};
437 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
438 static constexpr PhysReg exec{126};
439 static constexpr PhysReg exec_lo{126};
440 static constexpr PhysReg exec_hi{127};
441 static constexpr PhysReg pops_exiting_wave_id{239}; /* GFX9-GFX10.3 */
442 static constexpr PhysReg scc{253};
443
444 /**
445 * Operand Class
446 * Initially, each Operand refers to either
447 * a temporary virtual register
448 * or to a constant value
449 * Temporary registers get mapped to physical register during RA
450 * Constant values are inlined into the instruction sequence.
451 */
452 class Operand final {
453 public:
Operand()454 constexpr Operand()
455 : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false),
456 isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), isClobbered_(false),
457 isCopyKill_(false), is16bit_(false), is24bit_(false), signext(false)
458 {}
459
Operand(Temp r)460 explicit Operand(Temp r) noexcept
461 {
462 data_.temp = r;
463 if (r.id()) {
464 isTemp_ = true;
465 } else {
466 isUndef_ = true;
467 setFixed(PhysReg{128});
468 }
469 };
Operand(Temp r,PhysReg reg)470 explicit Operand(Temp r, PhysReg reg) noexcept
471 {
472 assert(r.id()); /* Don't allow fixing an undef to a register */
473 data_.temp = r;
474 isTemp_ = true;
475 setFixed(reg);
476 };
477
478 /* 8-bit constant */
c8(uint8_t v)479 static Operand c8(uint8_t v) noexcept
480 {
481 /* 8-bit constants are only used for copies and copies from any 8-bit
482 * constant can be implemented with a SDWA v_mul_u32_u24. So consider all
483 * to be inline constants. */
484 Operand op;
485 op.control_ = 0;
486 op.data_.i = v;
487 op.isConstant_ = true;
488 op.constSize = 0;
489 op.setFixed(PhysReg{0u});
490 return op;
491 };
492
493 /* 16-bit constant */
c16(uint16_t v)494 static Operand c16(uint16_t v) noexcept
495 {
496 Operand op;
497 op.control_ = 0;
498 op.data_.i = v;
499 op.isConstant_ = true;
500 op.constSize = 1;
501 if (v <= 64)
502 op.setFixed(PhysReg{128u + v});
503 else if (v >= 0xFFF0) /* [-16 .. -1] */
504 op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)});
505 else if (v == 0x3800) /* 0.5 */
506 op.setFixed(PhysReg{240});
507 else if (v == 0xB800) /* -0.5 */
508 op.setFixed(PhysReg{241});
509 else if (v == 0x3C00) /* 1.0 */
510 op.setFixed(PhysReg{242});
511 else if (v == 0xBC00) /* -1.0 */
512 op.setFixed(PhysReg{243});
513 else if (v == 0x4000) /* 2.0 */
514 op.setFixed(PhysReg{244});
515 else if (v == 0xC000) /* -2.0 */
516 op.setFixed(PhysReg{245});
517 else if (v == 0x4400) /* 4.0 */
518 op.setFixed(PhysReg{246});
519 else if (v == 0xC400) /* -4.0 */
520 op.setFixed(PhysReg{247});
521 else if (v == 0x3118) /* 1/2 PI */
522 op.setFixed(PhysReg{248});
523 else /* Literal Constant */
524 op.setFixed(PhysReg{255});
525 return op;
526 }
527
528 /* 32-bit constant */
c32(uint32_t v)529 static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); }
530
531 /* 64-bit constant */
c64(uint64_t v)532 static Operand c64(uint64_t v) noexcept
533 {
534 Operand op;
535 op.control_ = 0;
536 op.isConstant_ = true;
537 op.constSize = 3;
538 if (v <= 64) {
539 op.data_.i = (uint32_t)v;
540 op.setFixed(PhysReg{128 + (uint32_t)v});
541 } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
542 op.data_.i = (uint32_t)v;
543 op.setFixed(PhysReg{192 - (uint32_t)v});
544 } else if (v == 0x3FE0000000000000) { /* 0.5 */
545 op.data_.i = 0x3f000000;
546 op.setFixed(PhysReg{240});
547 } else if (v == 0xBFE0000000000000) { /* -0.5 */
548 op.data_.i = 0xbf000000;
549 op.setFixed(PhysReg{241});
550 } else if (v == 0x3FF0000000000000) { /* 1.0 */
551 op.data_.i = 0x3f800000;
552 op.setFixed(PhysReg{242});
553 } else if (v == 0xBFF0000000000000) { /* -1.0 */
554 op.data_.i = 0xbf800000;
555 op.setFixed(PhysReg{243});
556 } else if (v == 0x4000000000000000) { /* 2.0 */
557 op.data_.i = 0x40000000;
558 op.setFixed(PhysReg{244});
559 } else if (v == 0xC000000000000000) { /* -2.0 */
560 op.data_.i = 0xc0000000;
561 op.setFixed(PhysReg{245});
562 } else if (v == 0x4010000000000000) { /* 4.0 */
563 op.data_.i = 0x40800000;
564 op.setFixed(PhysReg{246});
565 } else if (v == 0xC010000000000000) { /* -4.0 */
566 op.data_.i = 0xc0800000;
567 op.setFixed(PhysReg{247});
568 } else { /* Literal Constant: we don't know if it is a long or double.*/
569 op.signext = v >> 63;
570 op.data_.i = v & 0xffffffffu;
571 op.setFixed(PhysReg{255});
572 assert(op.constantValue64() == v &&
573 "attempt to create a unrepresentable 64-bit literal constant");
574 }
575 return op;
576 }
577
578 /* 32-bit constant stored as a 32-bit or 64-bit operand */
c32_or_c64(uint32_t v,bool is64bit)579 static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept
580 {
581 Operand op;
582 op.control_ = 0;
583 op.data_.i = v;
584 op.isConstant_ = true;
585 op.constSize = is64bit ? 3 : 2;
586 if (v <= 64)
587 op.setFixed(PhysReg{128 + v});
588 else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
589 op.setFixed(PhysReg{192 - v});
590 else if (v == 0x3f000000) /* 0.5 */
591 op.setFixed(PhysReg{240});
592 else if (v == 0xbf000000) /* -0.5 */
593 op.setFixed(PhysReg{241});
594 else if (v == 0x3f800000) /* 1.0 */
595 op.setFixed(PhysReg{242});
596 else if (v == 0xbf800000) /* -1.0 */
597 op.setFixed(PhysReg{243});
598 else if (v == 0x40000000) /* 2.0 */
599 op.setFixed(PhysReg{244});
600 else if (v == 0xc0000000) /* -2.0 */
601 op.setFixed(PhysReg{245});
602 else if (v == 0x40800000) /* 4.0 */
603 op.setFixed(PhysReg{246});
604 else if (v == 0xc0800000) /* -4.0 */
605 op.setFixed(PhysReg{247});
606 else { /* Literal Constant */
607 assert(!is64bit && "attempt to create a 64-bit literal constant");
608 op.setFixed(PhysReg{255});
609 }
610 return op;
611 }
612
literal32(uint32_t v)613 static Operand literal32(uint32_t v) noexcept
614 {
615 Operand op;
616 op.control_ = 0;
617 op.data_.i = v;
618 op.isConstant_ = true;
619 op.constSize = 2;
620 op.setFixed(PhysReg{255});
621 return op;
622 }
623
Operand(RegClass type)624 explicit Operand(RegClass type) noexcept
625 {
626 isUndef_ = true;
627 data_.temp = Temp(0, type);
628 setFixed(PhysReg{128});
629 };
Operand(PhysReg reg,RegClass type)630 explicit Operand(PhysReg reg, RegClass type) noexcept
631 {
632 data_.temp = Temp(0, type);
633 setFixed(reg);
634 }
635
636 static Operand zero(unsigned bytes = 4) noexcept
637 {
638 if (bytes == 8)
639 return Operand::c64(0);
640 else if (bytes == 4)
641 return Operand::c32(0);
642 else if (bytes == 2)
643 return Operand::c16(0);
644 assert(bytes == 1);
645 return Operand::c8(0);
646 }
647
648 /* This is useful over the constructors when you want to take a gfx level
649 * for 1/2 PI or an unknown operand size.
650 */
get_const(enum amd_gfx_level chip,uint64_t val,unsigned bytes)651 static Operand get_const(enum amd_gfx_level chip, uint64_t val, unsigned bytes)
652 {
653 if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
654 /* 1/2 PI can be an inline constant on GFX8+ */
655 Operand op = Operand::c32(val);
656 op.setFixed(PhysReg{248});
657 return op;
658 }
659
660 if (bytes == 8)
661 return Operand::c64(val);
662 else if (bytes == 4)
663 return Operand::c32(val);
664 else if (bytes == 2)
665 return Operand::c16(val);
666 assert(bytes == 1);
667 return Operand::c8(val);
668 }
669
670 static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false,
671 bool sext = false)
672 {
673 if (bytes <= 4)
674 return true;
675
676 if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000)
677 return true;
678 uint64_t upper33 = val & 0xFFFFFFFF80000000;
679 if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0))
680 return true;
681
682 return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 || /* [-16 .. 64] */
683 val == 0x3FE0000000000000 || /* 0.5 */
684 val == 0xBFE0000000000000 || /* -0.5 */
685 val == 0x3FF0000000000000 || /* 1.0 */
686 val == 0xBFF0000000000000 || /* -1.0 */
687 val == 0x4000000000000000 || /* 2.0 */
688 val == 0xC000000000000000 || /* -2.0 */
689 val == 0x4010000000000000 || /* 4.0 */
690 val == 0xC010000000000000; /* -4.0 */
691 }
692
isTemp()693 constexpr bool isTemp() const noexcept { return isTemp_; }
694
setTemp(Temp t)695 constexpr void setTemp(Temp t) noexcept
696 {
697 assert(!isConstant_);
698 if (t.id() != 0)
699 isTemp_ = true;
700 data_.temp = t;
701 }
702
getTemp()703 constexpr Temp getTemp() const noexcept { return data_.temp; }
704
tempId()705 constexpr uint32_t tempId() const noexcept { return data_.temp.id(); }
706
hasRegClass()707 constexpr bool hasRegClass() const noexcept { return !isConstant(); }
708
regClass()709 constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); }
710
bytes()711 constexpr unsigned bytes() const noexcept
712 {
713 if (isConstant())
714 return 1 << constSize;
715 else
716 return data_.temp.bytes();
717 }
718
size()719 constexpr unsigned size() const noexcept
720 {
721 if (isConstant())
722 return constSize > 2 ? 2 : 1;
723 else
724 return data_.temp.size();
725 }
726
isFixed()727 constexpr bool isFixed() const noexcept { return isFixed_; }
728
physReg()729 constexpr PhysReg physReg() const noexcept { return reg_; }
730
setFixed(PhysReg reg)731 constexpr void setFixed(PhysReg reg) noexcept
732 {
733 isFixed_ = reg != unsigned(-1);
734 reg_ = reg;
735 }
736
isConstant()737 constexpr bool isConstant() const noexcept { return isConstant_; }
738
isLiteral()739 constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; }
740
isUndefined()741 constexpr bool isUndefined() const noexcept { return isUndef_; }
742
constantValue()743 constexpr uint32_t constantValue() const noexcept { return data_.i; }
744
constantEquals(uint32_t cmp)745 constexpr bool constantEquals(uint32_t cmp) const noexcept
746 {
747 return isConstant() && constantValue() == cmp;
748 }
749
constantValue64()750 constexpr uint64_t constantValue64() const noexcept
751 {
752 if (constSize == 3) {
753 if (reg_ <= 192)
754 return reg_ - 128;
755 else if (reg_ <= 208)
756 return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
757
758 switch (reg_) {
759 case 240: return 0x3FE0000000000000;
760 case 241: return 0xBFE0000000000000;
761 case 242: return 0x3FF0000000000000;
762 case 243: return 0xBFF0000000000000;
763 case 244: return 0x4000000000000000;
764 case 245: return 0xC000000000000000;
765 case 246: return 0x4010000000000000;
766 case 247: return 0xC010000000000000;
767 case 255:
768 return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
769 }
770 unreachable("invalid register for 64-bit constant");
771 } else {
772 return data_.i;
773 }
774 }
775
776 /* Value if this were used with vop3/opsel or vop3p. */
constantValue16(bool opsel)777 constexpr uint16_t constantValue16(bool opsel) const noexcept
778 {
779 assert(bytes() == 2 || bytes() == 4);
780 if (opsel) {
781 if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral())
782 return int16_t(data_.i) >>
783 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
784 else
785 return data_.i >> 16;
786 }
787 return data_.i;
788 }
789
isOfType(RegType type)790 constexpr bool isOfType(RegType type) const noexcept
791 {
792 return hasRegClass() && regClass().type() == type;
793 }
794
795 /* Indicates that the killed operand's live range intersects with the
796 * instruction's definitions. Unlike isKill() and isFirstKill(), this is
797 * not set by liveness analysis. */
setLateKill(bool flag)798 constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; }
799
isLateKill()800 constexpr bool isLateKill() const noexcept { return isLateKill_; }
801
802 /* Indicates that the Operand's register gets clobbered by the instruction. */
setClobbered(bool flag)803 constexpr void setClobbered(bool flag) noexcept { isClobbered_ = flag; }
isClobbered()804 constexpr bool isClobbered() const noexcept { return isClobbered_; }
805
806 /* Indicates that the Operand must be copied in order to satisfy register
807 * constraints. The copy is immediately killed by the instruction.
808 */
setCopyKill(bool flag)809 constexpr void setCopyKill(bool flag) noexcept
810 {
811 isCopyKill_ = flag;
812 if (flag)
813 setKill(flag);
814 }
isCopyKill()815 constexpr bool isCopyKill() const noexcept { return isCopyKill_; }
816
setKill(bool flag)817 constexpr void setKill(bool flag) noexcept
818 {
819 isKill_ = flag;
820 if (!flag) {
821 setFirstKill(false);
822 setCopyKill(false);
823 }
824 }
825
isKill()826 constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); }
827
setFirstKill(bool flag)828 constexpr void setFirstKill(bool flag) noexcept
829 {
830 isFirstKill_ = flag;
831 if (flag)
832 setKill(flag);
833 }
834
835 /* When there are multiple operands killing the same temporary,
836 * isFirstKill() is only returns true for the first one. */
isFirstKill()837 constexpr bool isFirstKill() const noexcept { return isFirstKill_; }
838
isKillBeforeDef()839 constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); }
840
isFirstKillBeforeDef()841 constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); }
842
843 constexpr bool operator==(Operand other) const noexcept
844 {
845 if (other.size() != size())
846 return false;
847 if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
848 return false;
849 if (isFixed() && other.isFixed() && physReg() != other.physReg())
850 return false;
851 if (isLiteral())
852 return other.isLiteral() && other.constantValue() == constantValue();
853 else if (isConstant())
854 return other.isConstant() && other.physReg() == physReg();
855 else if (isUndefined())
856 return other.isUndefined() && other.regClass() == regClass();
857 else
858 return other.isTemp() && other.getTemp() == getTemp();
859 }
860
861 constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); }
862
set16bit(bool flag)863 constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; }
864
is16bit()865 constexpr bool is16bit() const noexcept { return is16bit_; }
866
set24bit(bool flag)867 constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; }
868
is24bit()869 constexpr bool is24bit() const noexcept { return is24bit_; }
870
871 private:
872 union {
873 Temp temp;
874 uint32_t i;
875 float f;
876 } data_ = {Temp(0, s1)};
877 PhysReg reg_;
878 union {
879 struct {
880 uint8_t isTemp_ : 1;
881 uint8_t isFixed_ : 1;
882 uint8_t isConstant_ : 1;
883 uint8_t isKill_ : 1;
884 uint8_t isUndef_ : 1;
885 uint8_t isFirstKill_ : 1;
886 uint8_t constSize : 2;
887 uint8_t isLateKill_ : 1;
888 uint8_t isClobbered_ : 1;
889 uint8_t isCopyKill_ : 1;
890 uint8_t is16bit_ : 1;
891 uint8_t is24bit_ : 1;
892 uint8_t signext : 1;
893 };
894 /* can't initialize bit-fields in c++11, so work around using a union */
895 uint16_t control_ = 0;
896 };
897 };
898
899 /**
900 * Definition Class
901 * Definitions are the results of Instructions
902 * and refer to temporary virtual registers
903 * which are later mapped to physical registers
904 */
905 class Definition final {
906 public:
Definition()907 constexpr Definition()
908 : temp(Temp(0, s1)), reg_(0), isFixed_(0), isKill_(0), isPrecise_(0), isInfPreserve_(0),
909 isNaNPreserve_(0), isSZPreserve_(0), isNUW_(0), isNoCSE_(0)
910 {}
Definition(uint32_t index,RegClass type)911 Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {}
Definition(Temp tmp)912 explicit Definition(Temp tmp) noexcept : temp(tmp) {}
Definition(PhysReg reg,RegClass type)913 Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); }
Definition(uint32_t tmpId,PhysReg reg,RegClass type)914 Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type))
915 {
916 setFixed(reg);
917 }
918
isTemp()919 constexpr bool isTemp() const noexcept { return tempId() > 0; }
920
getTemp()921 constexpr Temp getTemp() const noexcept { return temp; }
922
tempId()923 constexpr uint32_t tempId() const noexcept { return temp.id(); }
924
setTemp(Temp t)925 constexpr void setTemp(Temp t) noexcept { temp = t; }
926
swapTemp(Definition & other)927 void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); }
928
regClass()929 constexpr RegClass regClass() const noexcept { return temp.regClass(); }
930
bytes()931 constexpr unsigned bytes() const noexcept { return temp.bytes(); }
932
size()933 constexpr unsigned size() const noexcept { return temp.size(); }
934
isFixed()935 constexpr bool isFixed() const noexcept { return isFixed_; }
936
physReg()937 constexpr PhysReg physReg() const noexcept { return reg_; }
938
setFixed(PhysReg reg)939 constexpr void setFixed(PhysReg reg) noexcept
940 {
941 isFixed_ = 1;
942 reg_ = reg;
943 }
944
setKill(bool flag)945 constexpr void setKill(bool flag) noexcept { isKill_ = flag; }
946
isKill()947 constexpr bool isKill() const noexcept { return isKill_; }
948
setPrecise(bool precise)949 constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; }
950
isPrecise()951 constexpr bool isPrecise() const noexcept { return isPrecise_; }
952
setInfPreserve(bool inf_preserve)953 constexpr void setInfPreserve(bool inf_preserve) noexcept { isInfPreserve_ = inf_preserve; }
954
isInfPreserve()955 constexpr bool isInfPreserve() const noexcept { return isInfPreserve_; }
956
setNaNPreserve(bool nan_preserve)957 constexpr void setNaNPreserve(bool nan_preserve) noexcept { isNaNPreserve_ = nan_preserve; }
958
isNaNPreserve()959 constexpr bool isNaNPreserve() const noexcept { return isNaNPreserve_; }
960
setSZPreserve(bool sz_preserve)961 constexpr void setSZPreserve(bool sz_preserve) noexcept { isSZPreserve_ = sz_preserve; }
962
isSZPreserve()963 constexpr bool isSZPreserve() const noexcept { return isSZPreserve_; }
964
965 /* No Unsigned Wrap */
setNUW(bool nuw)966 constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; }
967
isNUW()968 constexpr bool isNUW() const noexcept { return isNUW_; }
969
setNoCSE(bool noCSE)970 constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; }
971
isNoCSE()972 constexpr bool isNoCSE() const noexcept { return isNoCSE_; }
973
974 private:
975 Temp temp = Temp(0, s1);
976 PhysReg reg_;
977 union {
978 struct {
979 uint8_t isFixed_ : 1;
980 uint8_t isKill_ : 1;
981 uint8_t isPrecise_ : 1;
982 uint8_t isInfPreserve_ : 1;
983 uint8_t isNaNPreserve_ : 1;
984 uint8_t isSZPreserve_ : 1;
985 uint8_t isNUW_ : 1;
986 uint8_t isNoCSE_ : 1;
987 };
988 /* can't initialize bit-fields in c++11, so work around using a union */
989 uint8_t control_ = 0;
990 };
991 };
992
993 struct RegisterDemand {
994 constexpr RegisterDemand() = default;
RegisterDemandRegisterDemand995 constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {}
996 int16_t vgpr = 0;
997 int16_t sgpr = 0;
998
999 constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept
1000 {
1001 return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1002 }
1003
exceedsRegisterDemand1004 constexpr bool exceeds(const RegisterDemand other) const noexcept
1005 {
1006 return vgpr > other.vgpr || sgpr > other.sgpr;
1007 }
1008
1009 constexpr RegisterDemand operator+(const Temp t) const noexcept
1010 {
1011 if (t.type() == RegType::sgpr)
1012 return RegisterDemand(vgpr, sgpr + t.size());
1013 else
1014 return RegisterDemand(vgpr + t.size(), sgpr);
1015 }
1016
1017 constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept
1018 {
1019 return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1020 }
1021
1022 constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept
1023 {
1024 return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1025 }
1026
1027 constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept
1028 {
1029 vgpr += other.vgpr;
1030 sgpr += other.sgpr;
1031 return *this;
1032 }
1033
1034 constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept
1035 {
1036 vgpr -= other.vgpr;
1037 sgpr -= other.sgpr;
1038 return *this;
1039 }
1040
1041 constexpr RegisterDemand& operator+=(const Temp t) noexcept
1042 {
1043 if (t.type() == RegType::sgpr)
1044 sgpr += t.size();
1045 else
1046 vgpr += t.size();
1047 return *this;
1048 }
1049
1050 constexpr RegisterDemand& operator-=(const Temp t) noexcept
1051 {
1052 if (t.type() == RegType::sgpr)
1053 sgpr -= t.size();
1054 else
1055 vgpr -= t.size();
1056 return *this;
1057 }
1058
updateRegisterDemand1059 constexpr void update(const RegisterDemand other) noexcept
1060 {
1061 vgpr = std::max(vgpr, other.vgpr);
1062 sgpr = std::max(sgpr, other.sgpr);
1063 }
1064 };
1065
1066 struct Block;
1067 struct Instruction;
1068 struct Pseudo_instruction;
1069 struct SALU_instruction;
1070 struct SMEM_instruction;
1071 struct DS_instruction;
1072 struct LDSDIR_instruction;
1073 struct MTBUF_instruction;
1074 struct MUBUF_instruction;
1075 struct MIMG_instruction;
1076 struct Export_instruction;
1077 struct FLAT_instruction;
1078 struct Pseudo_branch_instruction;
1079 struct Pseudo_barrier_instruction;
1080 struct Pseudo_reduction_instruction;
1081 struct VALU_instruction;
1082 struct VINTERP_inreg_instruction;
1083 struct VINTRP_instruction;
1084 struct VOPD_instruction;
1085 struct DPP16_instruction;
1086 struct DPP8_instruction;
1087 struct SDWA_instruction;
1088
1089 struct Instruction {
1090 aco_opcode opcode;
1091 Format format;
1092 union {
1093 uint32_t pass_flags;
1094 RegisterDemand register_demand;
1095 };
1096
1097 aco::span<Operand> operands;
1098 aco::span<Definition> definitions;
1099
1100 constexpr bool usesModifiers() const noexcept;
1101
reads_execInstruction1102 constexpr bool reads_exec() const noexcept
1103 {
1104 for (const Operand& op : operands) {
1105 if (op.isFixed() && (op.physReg() == exec_lo || op.physReg() == exec_hi))
1106 return true;
1107 }
1108 return false;
1109 }
1110
writes_execInstruction1111 constexpr bool writes_exec() const noexcept
1112 {
1113 for (const Definition& def : definitions) {
1114 if (def.isFixed() && (def.physReg() == exec_lo || def.physReg() == exec_hi))
1115 return true;
1116 }
1117 return false;
1118 }
1119
pseudoInstruction1120 Pseudo_instruction& pseudo() noexcept
1121 {
1122 assert(isPseudo());
1123 return *(Pseudo_instruction*)this;
1124 }
pseudoInstruction1125 const Pseudo_instruction& pseudo() const noexcept
1126 {
1127 assert(isPseudo());
1128 return *(Pseudo_instruction*)this;
1129 }
isPseudoInstruction1130 constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; }
1131
isSOP1Instruction1132 constexpr bool isSOP1() const noexcept { return format == Format::SOP1; }
isSOP2Instruction1133 constexpr bool isSOP2() const noexcept { return format == Format::SOP2; }
isSOPKInstruction1134 constexpr bool isSOPK() const noexcept { return format == Format::SOPK; }
isSOPPInstruction1135 constexpr bool isSOPP() const noexcept { return format == Format::SOPP; }
isSOPCInstruction1136 constexpr bool isSOPC() const noexcept { return format == Format::SOPC; }
1137
smemInstruction1138 SMEM_instruction& smem() noexcept
1139 {
1140 assert(isSMEM());
1141 return *(SMEM_instruction*)this;
1142 }
smemInstruction1143 const SMEM_instruction& smem() const noexcept
1144 {
1145 assert(isSMEM());
1146 return *(SMEM_instruction*)this;
1147 }
isSMEMInstruction1148 constexpr bool isSMEM() const noexcept { return format == Format::SMEM; }
dsInstruction1149 DS_instruction& ds() noexcept
1150 {
1151 assert(isDS());
1152 return *(DS_instruction*)this;
1153 }
dsInstruction1154 const DS_instruction& ds() const noexcept
1155 {
1156 assert(isDS());
1157 return *(DS_instruction*)this;
1158 }
isDSInstruction1159 constexpr bool isDS() const noexcept { return format == Format::DS; }
ldsdirInstruction1160 LDSDIR_instruction& ldsdir() noexcept
1161 {
1162 assert(isLDSDIR());
1163 return *(LDSDIR_instruction*)this;
1164 }
ldsdirInstruction1165 const LDSDIR_instruction& ldsdir() const noexcept
1166 {
1167 assert(isLDSDIR());
1168 return *(LDSDIR_instruction*)this;
1169 }
isLDSDIRInstruction1170 constexpr bool isLDSDIR() const noexcept { return format == Format::LDSDIR; }
mtbufInstruction1171 MTBUF_instruction& mtbuf() noexcept
1172 {
1173 assert(isMTBUF());
1174 return *(MTBUF_instruction*)this;
1175 }
mtbufInstruction1176 const MTBUF_instruction& mtbuf() const noexcept
1177 {
1178 assert(isMTBUF());
1179 return *(MTBUF_instruction*)this;
1180 }
isMTBUFInstruction1181 constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; }
mubufInstruction1182 MUBUF_instruction& mubuf() noexcept
1183 {
1184 assert(isMUBUF());
1185 return *(MUBUF_instruction*)this;
1186 }
mubufInstruction1187 const MUBUF_instruction& mubuf() const noexcept
1188 {
1189 assert(isMUBUF());
1190 return *(MUBUF_instruction*)this;
1191 }
isMUBUFInstruction1192 constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; }
mimgInstruction1193 MIMG_instruction& mimg() noexcept
1194 {
1195 assert(isMIMG());
1196 return *(MIMG_instruction*)this;
1197 }
mimgInstruction1198 const MIMG_instruction& mimg() const noexcept
1199 {
1200 assert(isMIMG());
1201 return *(MIMG_instruction*)this;
1202 }
isMIMGInstruction1203 constexpr bool isMIMG() const noexcept { return format == Format::MIMG; }
expInstruction1204 Export_instruction& exp() noexcept
1205 {
1206 assert(isEXP());
1207 return *(Export_instruction*)this;
1208 }
expInstruction1209 const Export_instruction& exp() const noexcept
1210 {
1211 assert(isEXP());
1212 return *(Export_instruction*)this;
1213 }
isEXPInstruction1214 constexpr bool isEXP() const noexcept { return format == Format::EXP; }
flatInstruction1215 FLAT_instruction& flat() noexcept
1216 {
1217 assert(isFlat());
1218 return *(FLAT_instruction*)this;
1219 }
flatInstruction1220 const FLAT_instruction& flat() const noexcept
1221 {
1222 assert(isFlat());
1223 return *(FLAT_instruction*)this;
1224 }
isFlatInstruction1225 constexpr bool isFlat() const noexcept { return format == Format::FLAT; }
globalInstruction1226 FLAT_instruction& global() noexcept
1227 {
1228 assert(isGlobal());
1229 return *(FLAT_instruction*)this;
1230 }
globalInstruction1231 const FLAT_instruction& global() const noexcept
1232 {
1233 assert(isGlobal());
1234 return *(FLAT_instruction*)this;
1235 }
isGlobalInstruction1236 constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; }
scratchInstruction1237 FLAT_instruction& scratch() noexcept
1238 {
1239 assert(isScratch());
1240 return *(FLAT_instruction*)this;
1241 }
scratchInstruction1242 const FLAT_instruction& scratch() const noexcept
1243 {
1244 assert(isScratch());
1245 return *(FLAT_instruction*)this;
1246 }
isScratchInstruction1247 constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; }
branchInstruction1248 Pseudo_branch_instruction& branch() noexcept
1249 {
1250 assert(isBranch());
1251 return *(Pseudo_branch_instruction*)this;
1252 }
branchInstruction1253 const Pseudo_branch_instruction& branch() const noexcept
1254 {
1255 assert(isBranch());
1256 return *(Pseudo_branch_instruction*)this;
1257 }
isBranchInstruction1258 constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; }
barrierInstruction1259 Pseudo_barrier_instruction& barrier() noexcept
1260 {
1261 assert(isBarrier());
1262 return *(Pseudo_barrier_instruction*)this;
1263 }
barrierInstruction1264 const Pseudo_barrier_instruction& barrier() const noexcept
1265 {
1266 assert(isBarrier());
1267 return *(Pseudo_barrier_instruction*)this;
1268 }
isBarrierInstruction1269 constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; }
reductionInstruction1270 Pseudo_reduction_instruction& reduction() noexcept
1271 {
1272 assert(isReduction());
1273 return *(Pseudo_reduction_instruction*)this;
1274 }
reductionInstruction1275 const Pseudo_reduction_instruction& reduction() const noexcept
1276 {
1277 assert(isReduction());
1278 return *(Pseudo_reduction_instruction*)this;
1279 }
isReductionInstruction1280 constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
isVOP3PInstruction1281 constexpr bool isVOP3P() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3P; }
vinterp_inregInstruction1282 VINTERP_inreg_instruction& vinterp_inreg() noexcept
1283 {
1284 assert(isVINTERP_INREG());
1285 return *(VINTERP_inreg_instruction*)this;
1286 }
vinterp_inregInstruction1287 const VINTERP_inreg_instruction& vinterp_inreg() const noexcept
1288 {
1289 assert(isVINTERP_INREG());
1290 return *(VINTERP_inreg_instruction*)this;
1291 }
isVINTERP_INREGInstruction1292 constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
vopdInstruction1293 VOPD_instruction& vopd() noexcept
1294 {
1295 assert(isVOPD());
1296 return *(VOPD_instruction*)this;
1297 }
vopdInstruction1298 const VOPD_instruction& vopd() const noexcept
1299 {
1300 assert(isVOPD());
1301 return *(VOPD_instruction*)this;
1302 }
isVOPDInstruction1303 constexpr bool isVOPD() const noexcept { return format == Format::VOPD; }
isVOP1Instruction1304 constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
isVOP2Instruction1305 constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
isVOPCInstruction1306 constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
isVOP3Instruction1307 constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
vintrpInstruction1308 VINTRP_instruction& vintrp() noexcept
1309 {
1310 assert(isVINTRP());
1311 return *(VINTRP_instruction*)this;
1312 }
vintrpInstruction1313 const VINTRP_instruction& vintrp() const noexcept
1314 {
1315 assert(isVINTRP());
1316 return *(VINTRP_instruction*)this;
1317 }
isVINTRPInstruction1318 constexpr bool isVINTRP() const noexcept { return format == Format::VINTRP; }
dpp16Instruction1319 DPP16_instruction& dpp16() noexcept
1320 {
1321 assert(isDPP16());
1322 return *(DPP16_instruction*)this;
1323 }
dpp16Instruction1324 const DPP16_instruction& dpp16() const noexcept
1325 {
1326 assert(isDPP16());
1327 return *(DPP16_instruction*)this;
1328 }
isDPP16Instruction1329 constexpr bool isDPP16() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP16; }
dpp8Instruction1330 DPP8_instruction& dpp8() noexcept
1331 {
1332 assert(isDPP8());
1333 return *(DPP8_instruction*)this;
1334 }
dpp8Instruction1335 const DPP8_instruction& dpp8() const noexcept
1336 {
1337 assert(isDPP8());
1338 return *(DPP8_instruction*)this;
1339 }
isDPP8Instruction1340 constexpr bool isDPP8() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP8; }
isDPPInstruction1341 constexpr bool isDPP() const noexcept { return isDPP16() || isDPP8(); }
sdwaInstruction1342 SDWA_instruction& sdwa() noexcept
1343 {
1344 assert(isSDWA());
1345 return *(SDWA_instruction*)this;
1346 }
sdwaInstruction1347 const SDWA_instruction& sdwa() const noexcept
1348 {
1349 assert(isSDWA());
1350 return *(SDWA_instruction*)this;
1351 }
isSDWAInstruction1352 constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; }
1353
flatlikeInstruction1354 FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; }
1355
flatlikeInstruction1356 const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; }
1357
isFlatLikeInstruction1358 constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); }
1359
valuInstruction1360 VALU_instruction& valu() noexcept
1361 {
1362 assert(isVALU());
1363 return *(VALU_instruction*)this;
1364 }
valuInstruction1365 const VALU_instruction& valu() const noexcept
1366 {
1367 assert(isVALU());
1368 return *(VALU_instruction*)this;
1369 }
isVALUInstruction1370 constexpr bool isVALU() const noexcept
1371 {
1372 return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG() ||
1373 isVOPD();
1374 }
1375
saluInstruction1376 SALU_instruction& salu() noexcept
1377 {
1378 assert(isSALU());
1379 return *(SALU_instruction*)this;
1380 }
saluInstruction1381 const SALU_instruction& salu() const noexcept
1382 {
1383 assert(isSALU());
1384 return *(SALU_instruction*)this;
1385 }
isSALUInstruction1386 constexpr bool isSALU() const noexcept
1387 {
1388 return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP();
1389 }
1390
isVMEMInstruction1391 constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); }
1392
1393 bool accessesLDS() const noexcept;
1394 bool isTrans() const noexcept;
1395 };
1396 static_assert(sizeof(Instruction) == 16, "Unexpected padding");
1397
1398 struct SALU_instruction : public Instruction {
1399 /* In case of SOPP branch instructions, contains the Block index,
1400 * and otherwise, for SOPP and SOPK the 16-bit signed immediate.
1401 */
1402 uint32_t imm;
1403 };
1404 static_assert(sizeof(SALU_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1405
1406 /**
1407 * Scalar Memory Format:
1408 * For s_(buffer_)load_dword*:
1409 * Operand(0): SBASE - SGPR-pair which provides base address
1410 * Operand(1): Offset - immediate (un)signed offset or SGPR
1411 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
1412 * Operand(n-1): SOffset - SGPR offset (Vega only)
1413 *
1414 * Having no operands is also valid for instructions such as s_dcache_inv.
1415 *
1416 */
1417 struct SMEM_instruction : public Instruction {
1418 memory_sync_info sync;
1419 ac_hw_cache_flags cache;
1420 };
1421 static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1422
1423 struct VALU_instruction : public Instruction {
1424 union {
1425 bitfield_array8<uint32_t, 0, 3> neg; /* VOP3, SDWA, DPP16, v_fma_mix, VINTERP_inreg */
1426 bitfield_array8<uint32_t, 0, 3> neg_lo; /* VOP3P */
1427
1428 bitfield_array8<uint32_t, 3, 3> abs; /* VOP3, SDWA, DPP16, v_fma_mix */
1429 bitfield_array8<uint32_t, 3, 3> neg_hi; /* VOP3P */
1430
1431 bitfield_array8<uint32_t, 6, 4> opsel; /* VOP3, VOPC12(GFX11+), VINTERP_inreg */
1432 bitfield_uint8<uint32_t, 10, 2> omod; /* VOP3, SDWA(GFX9+) */
1433 bitfield_array8<uint32_t, 12, 3> opsel_lo; /* VOP3P */
1434 bitfield_array8<uint32_t, 15, 3> opsel_hi; /* VOP3P */
1435 bitfield_bool<uint32_t, 18> clamp; /* VOP3, VOP3P, SDWA, VINTERP_inreg */
1436 };
1437
1438 void swapOperands(unsigned idx0, unsigned idx1);
1439 };
1440 static_assert(sizeof(VALU_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1441
1442 struct VINTERP_inreg_instruction : public VALU_instruction {
1443 uint8_t wait_exp : 3;
1444 uint8_t padding3 : 5;
1445 uint8_t padding4;
1446 uint8_t padding5;
1447 uint8_t padding6;
1448 };
1449 static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
1450 "Unexpected padding");
1451
1452 struct VOPD_instruction : public VALU_instruction {
1453 aco_opcode opy;
1454 uint16_t padding;
1455 };
1456 static_assert(sizeof(VOPD_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1457
1458 /**
1459 * Data Parallel Primitives Format:
1460 * This format can be used for VOP1, VOP2 or VOPC instructions.
1461 * The swizzle applies to the src0 operand.
1462 *
1463 */
1464 struct DPP16_instruction : public VALU_instruction {
1465 uint16_t dpp_ctrl;
1466 uint8_t row_mask : 4;
1467 uint8_t bank_mask : 4;
1468 bool bound_ctrl : 1;
1469 uint8_t fetch_inactive : 1;
1470 uint8_t padding3 : 6;
1471 };
1472 static_assert(sizeof(DPP16_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1473
1474 struct DPP8_instruction : public VALU_instruction {
1475 uint32_t lane_sel : 24;
1476 uint32_t fetch_inactive : 1;
1477 uint32_t padding : 7;
1478 };
1479 static_assert(sizeof(DPP8_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1480
1481 struct SubdwordSel {
1482 enum sdwa_sel : uint8_t {
1483 ubyte = 0x4,
1484 uword = 0x8,
1485 dword = 0x10,
1486 sext = 0x20,
1487 sbyte = ubyte | sext,
1488 sword = uword | sext,
1489
1490 ubyte0 = ubyte,
1491 ubyte1 = ubyte | 1,
1492 ubyte2 = ubyte | 2,
1493 ubyte3 = ubyte | 3,
1494 sbyte0 = sbyte,
1495 sbyte1 = sbyte | 1,
1496 sbyte2 = sbyte | 2,
1497 sbyte3 = sbyte | 3,
1498 uword0 = uword,
1499 uword1 = uword | 2,
1500 sword0 = sword,
1501 sword1 = sword | 2,
1502 };
1503
SubdwordSelSubdwordSel1504 SubdwordSel() : sel((sdwa_sel)0) {}
SubdwordSelSubdwordSel1505 constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {}
SubdwordSelSubdwordSel1506 constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend)
1507 : sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset))
1508 {}
sdwa_selSubdwordSel1509 constexpr operator sdwa_sel() const { return sel; }
1510 explicit operator bool() const { return sel != 0; }
1511
sizeSubdwordSel1512 constexpr unsigned size() const { return (sel >> 2) & 0x7; }
offsetSubdwordSel1513 constexpr unsigned offset() const { return sel & 0x3; }
sign_extendSubdwordSel1514 constexpr bool sign_extend() const { return sel & sext; }
to_sdwa_selSubdwordSel1515 constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const
1516 {
1517 reg_byte_offset += offset();
1518 if (size() == 1)
1519 return reg_byte_offset;
1520 else if (size() == 2)
1521 return 4 + (reg_byte_offset >> 1);
1522 else
1523 return 6;
1524 }
1525
1526 private:
1527 sdwa_sel sel;
1528 };
1529
1530 /**
1531 * Sub-Dword Addressing Format:
1532 * This format can be used for VOP1, VOP2 or VOPC instructions.
1533 *
1534 * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
1535 * the definition doesn't have to be VCC on GFX9+.
1536 *
1537 */
1538 struct SDWA_instruction : public VALU_instruction {
1539 /* these destination modifiers aren't available with VOPC except for
1540 * clamp on GFX8 */
1541 SubdwordSel sel[2];
1542 SubdwordSel dst_sel;
1543 uint8_t padding3;
1544 };
1545 static_assert(sizeof(SDWA_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1546
1547 struct VINTRP_instruction : public Instruction {
1548 uint8_t attribute;
1549 uint8_t component;
1550 bool high_16bits;
1551 uint8_t padding;
1552 };
1553 static_assert(sizeof(VINTRP_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1554
1555 /**
1556 * Local and Global Data Sharing instructions
1557 * Operand(0): ADDR - VGPR which supplies the address.
1558 * Operand(1): DATA0 - First data VGPR.
1559 * Operand(2): DATA1 - Second data VGPR.
1560 * Operand(n-1): M0 - LDS size.
1561 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
1562 *
1563 */
1564 struct DS_instruction : public Instruction {
1565 memory_sync_info sync;
1566 bool gds;
1567 uint16_t offset0;
1568 uint8_t offset1;
1569 uint8_t padding;
1570 };
1571 static_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1572
1573 /**
1574 * LDS Direct instructions
1575 * Operand(0): M0
1576 * Definition(0): VDST - Destination VGPR
1577 */
1578 struct LDSDIR_instruction : public Instruction {
1579 memory_sync_info sync;
1580 uint8_t attr : 6;
1581 uint8_t attr_chan : 2;
1582 uint32_t wait_vdst : 4;
1583 uint32_t wait_vsrc : 1;
1584 uint32_t padding : 27;
1585 };
1586 static_assert(sizeof(LDSDIR_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1587
1588 /**
1589 * Vector Memory Untyped-buffer Instructions
1590 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1591 * Operand(1): VADDR - Address source. Can carry an index and/or offset
1592 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1593 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1594 *
1595 */
1596 struct MUBUF_instruction : public Instruction {
1597 memory_sync_info sync;
1598 ac_hw_cache_flags cache;
1599 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
1600 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
1601 bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
1602 bool tfe : 1; /* texture fail enable */
1603 bool lds : 1; /* Return read-data to LDS instead of VGPRs */
1604 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1605 uint8_t padding0 : 2;
1606 uint8_t padding1;
1607 uint16_t offset; /* Unsigned byte offset - 12 bit */
1608 };
1609 static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1610
1611 /**
1612 * Vector Memory Typed-buffer Instructions
1613 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1614 * Operand(1): VADDR - Address source. Can carry an index and/or offset
1615 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1616 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1617 *
1618 */
1619 struct MTBUF_instruction : public Instruction {
1620 memory_sync_info sync;
1621 ac_hw_cache_flags cache;
1622 uint8_t dfmt : 4; /* Data Format of data in memory buffer */
1623 uint8_t nfmt : 3; /* Numeric format of data in memory */
1624 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
1625 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
1626 bool tfe : 1; /* texture fail enable */
1627 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1628 uint8_t padding : 5;
1629 uint16_t offset; /* Unsigned byte offset - 12 bit */
1630 };
1631 static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1632
1633 /**
1634 * Vector Memory Image Instructions
1635 * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
1636 * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
1637 * Operand(2): VDATA - Vector GPR for write data or zero if TFE/LWE=1.
1638 * Operand(3): VADDR - Address source. Can carry an offset or an index.
1639 * Definition(0): VDATA - Vector GPR for read result.
1640 *
1641 */
1642 struct MIMG_instruction : public Instruction {
1643 memory_sync_info sync;
1644 ac_hw_cache_flags cache;
1645 uint8_t dmask; /* Data VGPR enable mask */
1646 uint8_t dim : 3; /* NAVI: dimensionality */
1647 bool unrm : 1; /* Force address to be un-normalized */
1648 bool tfe : 1; /* texture fail enable */
1649 bool da : 1; /* declare an array */
1650 bool lwe : 1; /* LOD warning enable */
1651 bool r128 : 1; /* NAVI: Texture resource size */
1652 bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
1653 bool d16 : 1; /* Convert 32-bit data to 16-bit data */
1654 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1655 bool strict_wqm : 1; /* VADDR is a linear VGPR and additional VGPRs may be copied into it */
1656 uint8_t padding0 : 4;
1657 uint8_t padding1;
1658 };
1659 static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1660
1661 /**
1662 * Flat/Scratch/Global Instructions
1663 * Operand(0): ADDR
1664 * Operand(1): SADDR
1665 * Operand(2) / Definition(0): DATA/VDST
1666 *
1667 */
1668 struct FLAT_instruction : public Instruction {
1669 memory_sync_info sync;
1670 ac_hw_cache_flags cache;
1671 bool lds : 1;
1672 bool nv : 1;
1673 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1674 uint8_t padding0 : 5;
1675 uint8_t padding1;
1676 int16_t offset; /* Vega/Navi only */
1677 };
1678 static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1679
1680 struct Export_instruction : public Instruction {
1681 uint8_t enabled_mask;
1682 uint8_t dest;
1683 bool compressed : 1;
1684 bool done : 1;
1685 bool valid_mask : 1;
1686 bool row_en : 1;
1687 uint8_t padding0 : 4;
1688 uint8_t padding1;
1689 };
1690 static_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1691
1692 struct Pseudo_instruction : public Instruction {
1693 PhysReg scratch_sgpr; /* might not be valid if it's not needed */
1694 bool tmp_in_scc;
1695 bool needs_scratch_reg; /* if scratch_sgpr/scc can be written, initialized by RA. */
1696 };
1697 static_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1698
1699 struct Pseudo_branch_instruction : public Instruction {
1700 /* target[0] is the block index of the branch target.
1701 * For conditional branches, target[1] contains the fall-through alternative.
1702 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
1703 */
1704 uint32_t target[2];
1705
1706 /* Indicates that this rarely or never jumps to target[0]. */
1707 bool rarely_taken;
1708 bool never_taken;
1709
1710 uint16_t padding;
1711 };
1712 static_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 12, "Unexpected padding");
1713
1714 struct Pseudo_barrier_instruction : public Instruction {
1715 memory_sync_info sync;
1716 sync_scope exec_scope;
1717 };
1718 static_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1719
1720 enum ReduceOp : uint16_t {
1721 // clang-format off
1722 iadd8, iadd16, iadd32, iadd64,
1723 imul8, imul16, imul32, imul64,
1724 fadd16, fadd32, fadd64,
1725 fmul16, fmul32, fmul64,
1726 imin8, imin16, imin32, imin64,
1727 imax8, imax16, imax32, imax64,
1728 umin8, umin16, umin32, umin64,
1729 umax8, umax16, umax32, umax64,
1730 fmin16, fmin32, fmin64,
1731 fmax16, fmax32, fmax64,
1732 iand8, iand16, iand32, iand64,
1733 ior8, ior16, ior32, ior64,
1734 ixor8, ixor16, ixor32, ixor64,
1735 num_reduce_ops,
1736 // clang-format on
1737 };
1738
1739 /**
1740 * Subgroup Reduction Instructions, everything except for the data to be
1741 * reduced and the result as inserted by setup_reduce_temp().
1742 * Operand(0): data to be reduced
1743 * Operand(1): reduce temporary
1744 * Operand(2): vector temporary
1745 * Definition(0): result
1746 * Definition(1): scalar temporary
1747 * Definition(2): scalar identity temporary (not used to store identity on GFX10)
1748 * Definition(3): scc clobber
1749 * Definition(4): vcc clobber
1750 *
1751 */
1752 struct Pseudo_reduction_instruction : public Instruction {
1753 ReduceOp reduce_op;
1754 uint16_t cluster_size; // must be 0 for scans
1755 };
1756 static_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4,
1757 "Unexpected padding");
1758
1759 inline bool
accessesLDS()1760 Instruction::accessesLDS() const noexcept
1761 {
1762 return (isDS() && !ds().gds) || isLDSDIR() || isVINTRP();
1763 }
1764
1765 inline void
swapOperands(unsigned idx0,unsigned idx1)1766 VALU_instruction::swapOperands(unsigned idx0, unsigned idx1)
1767 {
1768 if (this->isSDWA() && idx0 != idx1) {
1769 assert(idx0 < 2 && idx1 < 2);
1770 std::swap(this->sdwa().sel[0], this->sdwa().sel[1]);
1771 }
1772 assert(idx0 < 3 && idx1 < 3);
1773 std::swap(this->operands[idx0], this->operands[idx1]);
1774 this->neg[idx0].swap(this->neg[idx1]);
1775 this->abs[idx0].swap(this->abs[idx1]);
1776 this->opsel[idx0].swap(this->opsel[idx1]);
1777 this->opsel_lo[idx0].swap(this->opsel_lo[idx1]);
1778 this->opsel_hi[idx0].swap(this->opsel_hi[idx1]);
1779 }
1780
1781 struct instr_deleter_functor {
1782 /* Don't yet free any instructions. They will be de-allocated
1783 * all at once after compilation finished.
1784 */
operatorinstr_deleter_functor1785 void operator()(void* p) { return; }
1786 };
1787
1788 template <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
1789
1790 size_t get_instr_data_size(Format format);
1791
1792 Instruction* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
1793 uint32_t num_definitions);
1794
1795 constexpr bool
usesModifiers()1796 Instruction::usesModifiers() const noexcept
1797 {
1798 if (isDPP() || isSDWA())
1799 return true;
1800
1801 if (isVOP3P()) {
1802 const VALU_instruction& vop3p = this->valu();
1803 /* opsel_hi must be 1 to not be considered a modifier - even for constants */
1804 return vop3p.opsel_lo || vop3p.clamp || vop3p.neg_lo || vop3p.neg_hi ||
1805 (vop3p.opsel_hi & BITFIELD_MASK(operands.size())) != BITFIELD_MASK(operands.size());
1806 } else if (isVALU()) {
1807 const VALU_instruction& vop3 = this->valu();
1808 return vop3.opsel || vop3.clamp || vop3.omod || vop3.abs || vop3.neg;
1809 }
1810 return false;
1811 }
1812
1813 constexpr bool
is_phi(Instruction * instr)1814 is_phi(Instruction* instr)
1815 {
1816 return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1817 }
1818
1819 static inline bool
is_phi(aco_ptr<Instruction> & instr)1820 is_phi(aco_ptr<Instruction>& instr)
1821 {
1822 return is_phi(instr.get());
1823 }
1824
1825 bool is_wait_export_ready(amd_gfx_level gfx_level, const Instruction* instr);
1826 memory_sync_info get_sync_info(const Instruction* instr);
1827
1828 inline bool
is_dead(const std::vector<uint16_t> & uses,const Instruction * instr)1829 is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
1830 {
1831 if (instr->definitions.empty() || instr->isBranch() || instr->opcode == aco_opcode::p_startpgm ||
1832 instr->opcode == aco_opcode::p_init_scratch ||
1833 instr->opcode == aco_opcode::p_dual_src_export_gfx11)
1834 return false;
1835
1836 if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
1837 [&uses](const Definition& def) { return !def.isTemp() || uses[def.tempId()]; }))
1838 return false;
1839
1840 return !(get_sync_info(instr).semantics & (semantic_volatile | semantic_acqrel));
1841 }
1842
1843 bool can_use_input_modifiers(amd_gfx_level gfx_level, aco_opcode op, int idx);
1844 bool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx);
1845 bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op);
1846 uint8_t get_gfx11_true16_mask(aco_opcode op);
1847 bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra);
1848 bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8);
1849 bool can_write_m0(const aco_ptr<Instruction>& instr);
1850 /* updates "instr" and returns the old instruction (or NULL if no update was needed) */
1851 aco_ptr<Instruction> convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr);
1852 aco_ptr<Instruction> convert_to_DPP(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr,
1853 bool dpp8);
1854 bool needs_exec_mask(const Instruction* instr);
1855
1856 aco_opcode get_vcmp_inverse(aco_opcode op);
1857 aco_opcode get_vcmp_swapped(aco_opcode op);
1858 aco_opcode get_vcmpx(aco_opcode op);
1859 bool is_cmpx(aco_opcode op);
1860
1861 bool can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op, unsigned idx0 = 0,
1862 unsigned idx1 = 1);
1863
1864 uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
1865
1866 unsigned get_mimg_nsa_dwords(const Instruction* instr);
1867
1868 unsigned get_vopd_opy_start(const Instruction* instr);
1869
1870 unsigned get_operand_size(aco_ptr<Instruction>& instr, unsigned index);
1871
1872 bool should_form_clause(const Instruction* a, const Instruction* b);
1873
1874 enum vmem_type : uint8_t {
1875 vmem_nosampler = 1 << 0,
1876 vmem_sampler = 1 << 1,
1877 vmem_bvh = 1 << 2,
1878 };
1879
1880 /* VMEM instructions of the same type return in-order. For GFX12+, this determines which counter
1881 * is used.
1882 */
1883 uint8_t get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr);
1884
1885 unsigned parse_vdst_wait(Instruction* instr);
1886
1887 enum block_kind {
1888 /* uniform indicates that leaving this block,
1889 * all actives lanes stay active */
1890 block_kind_uniform = 1 << 0,
1891 block_kind_top_level = 1 << 1,
1892 block_kind_loop_preheader = 1 << 2,
1893 block_kind_loop_header = 1 << 3,
1894 block_kind_loop_exit = 1 << 4,
1895 block_kind_continue = 1 << 5,
1896 block_kind_break = 1 << 6,
1897 block_kind_continue_or_break = 1 << 7,
1898 block_kind_branch = 1 << 8,
1899 block_kind_merge = 1 << 9,
1900 block_kind_invert = 1 << 10,
1901 block_kind_discard_early_exit = 1 << 11,
1902 block_kind_uses_discard = 1 << 12,
1903 block_kind_resume = 1 << 13,
1904 block_kind_export_end = 1 << 14,
1905 block_kind_end_with_regs = 1 << 15,
1906 };
1907
1908 /* CFG */
1909 struct Block {
1910 using edge_vec = small_vec<uint32_t, 2>;
1911
1912 float_mode fp_mode;
1913 unsigned index;
1914 unsigned offset = 0;
1915 std::vector<aco_ptr<Instruction>> instructions;
1916 edge_vec logical_preds;
1917 edge_vec linear_preds;
1918 edge_vec logical_succs;
1919 edge_vec linear_succs;
1920 RegisterDemand register_demand = RegisterDemand();
1921 RegisterDemand live_in_demand = RegisterDemand();
1922 uint32_t kind = 0;
1923 int32_t logical_idom = -1;
1924 int32_t linear_idom = -1;
1925
1926 /* Preorder and postorder traversal indices of the dominance tree. Because a program can have
1927 * several dominance trees (because of block_kind_resume), these start at the block index of the
1928 * root node. */
1929 uint32_t logical_dom_pre_index = 0;
1930 uint32_t logical_dom_post_index = 0;
1931 uint32_t linear_dom_pre_index = 0;
1932 uint32_t linear_dom_post_index = 0;
1933
1934 uint16_t loop_nest_depth = 0;
1935 uint16_t divergent_if_logical_depth = 0;
1936 uint16_t uniform_if_depth = 0;
1937
1938 /* this information is needed for predecessors to blocks with phis when
1939 * moving out of ssa */
1940 bool scc_live_out = false;
1941
BlockBlock1942 Block() : index(0) {}
1943 };
1944
1945 /*
1946 * Shader stages as provided in Vulkan by the application. Contrast this to HWStage.
1947 */
1948 enum class SWStage : uint16_t {
1949 None = 0,
1950 VS = 1 << 0, /* Vertex Shader */
1951 GS = 1 << 1, /* Geometry Shader */
1952 TCS = 1 << 2, /* Tessellation Control aka Hull Shader */
1953 TES = 1 << 3, /* Tessellation Evaluation aka Domain Shader */
1954 FS = 1 << 4, /* Fragment aka Pixel Shader */
1955 CS = 1 << 5, /* Compute Shader */
1956 TS = 1 << 6, /* Task Shader */
1957 MS = 1 << 7, /* Mesh Shader */
1958 RT = 1 << 8, /* Raytracing Shader */
1959
1960 /* Stage combinations merged to run on a single HWStage */
1961 VS_GS = VS | GS,
1962 VS_TCS = VS | TCS,
1963 TES_GS = TES | GS,
1964 };
1965
1966 constexpr SWStage
1967 operator|(SWStage a, SWStage b)
1968 {
1969 return static_cast<SWStage>(static_cast<uint16_t>(a) | static_cast<uint16_t>(b));
1970 }
1971
1972 /*
1973 * Set of SWStages to be merged into a single shader paired with the
1974 * HWStage it will run on.
1975 */
1976 struct Stage {
1977 constexpr Stage() = default;
1978
StageStage1979 explicit constexpr Stage(ac_hw_stage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
1980
1981 /* Check if the given SWStage is included */
hasStage1982 constexpr bool has(SWStage stage) const
1983 {
1984 return (static_cast<uint16_t>(sw) & static_cast<uint16_t>(stage));
1985 }
1986
num_sw_stagesStage1987 unsigned num_sw_stages() const { return util_bitcount(static_cast<uint16_t>(sw)); }
1988
1989 constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; }
1990
1991 constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; }
1992
1993 /* Mask of merged software stages */
1994 SWStage sw = SWStage::None;
1995
1996 /* Active hardware stage */
1997 ac_hw_stage hw{};
1998 };
1999
2000 /* possible settings of Program::stage */
2001 static constexpr Stage vertex_vs(AC_HW_VERTEX_SHADER, SWStage::VS);
2002 static constexpr Stage fragment_fs(AC_HW_PIXEL_SHADER, SWStage::FS);
2003 static constexpr Stage compute_cs(AC_HW_COMPUTE_SHADER, SWStage::CS);
2004 static constexpr Stage tess_eval_vs(AC_HW_VERTEX_SHADER, SWStage::TES);
2005 /* Mesh shading pipeline */
2006 static constexpr Stage task_cs(AC_HW_COMPUTE_SHADER, SWStage::TS);
2007 static constexpr Stage mesh_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::MS);
2008 /* GFX10/NGG */
2009 static constexpr Stage vertex_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS);
2010 static constexpr Stage vertex_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS_GS);
2011 static constexpr Stage tess_eval_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES);
2012 static constexpr Stage tess_eval_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES_GS);
2013 /* GFX9 (and GFX10 if NGG isn't used) */
2014 static constexpr Stage vertex_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::VS_GS);
2015 static constexpr Stage vertex_tess_control_hs(AC_HW_HULL_SHADER, SWStage::VS_TCS);
2016 static constexpr Stage tess_eval_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::TES_GS);
2017 /* pre-GFX9 */
2018 static constexpr Stage vertex_ls(AC_HW_LOCAL_SHADER,
2019 SWStage::VS); /* vertex before tessellation control */
2020 static constexpr Stage vertex_es(AC_HW_EXPORT_SHADER, SWStage::VS); /* vertex before geometry */
2021 static constexpr Stage tess_control_hs(AC_HW_HULL_SHADER, SWStage::TCS);
2022 static constexpr Stage tess_eval_es(AC_HW_EXPORT_SHADER,
2023 SWStage::TES); /* tessellation evaluation before geometry */
2024 static constexpr Stage geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::GS);
2025 /* Raytracing */
2026 static constexpr Stage raytracing_cs(AC_HW_COMPUTE_SHADER, SWStage::RT);
2027
2028 struct DeviceInfo {
2029 uint16_t lds_encoding_granule;
2030 uint16_t lds_alloc_granule;
2031 uint32_t lds_limit; /* in bytes */
2032 bool has_16bank_lds;
2033 uint16_t physical_sgprs;
2034 uint16_t physical_vgprs;
2035 uint16_t vgpr_limit;
2036 uint16_t sgpr_limit;
2037 uint16_t sgpr_alloc_granule;
2038 uint16_t vgpr_alloc_granule;
2039 unsigned scratch_alloc_granule;
2040 uint16_t max_waves_per_simd;
2041 unsigned simd_per_cu;
2042 bool has_fast_fma32 = false;
2043 bool has_mac_legacy32 = false;
2044 bool has_fmac_legacy32 = false;
2045 bool fused_mad_mix = false;
2046 bool xnack_enabled = false;
2047 bool sram_ecc_enabled = false;
2048
2049 int16_t scratch_global_offset_min;
2050 int16_t scratch_global_offset_max;
2051 unsigned max_nsa_vgprs;
2052 };
2053
2054 enum class CompilationProgress {
2055 after_isel,
2056 after_spilling,
2057 after_ra,
2058 after_lower_to_hw,
2059 };
2060
2061 class Program final {
2062 public:
2063 aco::monotonic_buffer_resource m{65536};
2064 std::vector<Block> blocks;
2065 std::vector<RegClass> temp_rc = {s1};
2066 RegisterDemand max_reg_demand = RegisterDemand();
2067 ac_shader_config* config;
2068 struct aco_shader_info info;
2069 enum amd_gfx_level gfx_level;
2070 enum radeon_family family;
2071 DeviceInfo dev;
2072 unsigned wave_size;
2073 RegClass lane_mask;
2074 Stage stage;
2075 bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
2076 bool needs_wqm = false; /* there exists a p_wqm instruction */
2077 bool has_smem_buffer_or_global_loads = false;
2078 bool has_pops_overlapped_waves_wait = false;
2079 bool has_color_exports = false;
2080 bool is_prolog = false;
2081 bool is_epilog = false;
2082
2083 std::vector<uint8_t> constant_data;
2084 Temp private_segment_buffer;
2085 Temp scratch_offset;
2086
2087 uint16_t num_waves = 0;
2088 uint16_t min_waves = 0;
2089 unsigned workgroup_size; /* if known; otherwise UINT_MAX */
2090 bool wgp_mode;
2091
2092 bool needs_vcc = false;
2093
2094 CompilationProgress progress;
2095
2096 bool collect_statistics = false;
2097 uint32_t statistics[aco_num_statistics];
2098
2099 float_mode next_fp_mode;
2100 unsigned next_loop_depth = 0;
2101 unsigned next_divergent_if_logical_depth = 0;
2102 unsigned next_uniform_if_depth = 0;
2103
2104 std::vector<Definition> args_pending_vmem;
2105
2106 /* For shader part with previous shader part that has lds access. */
2107 bool pending_lds_access = false;
2108
2109 struct {
2110 monotonic_buffer_resource memory;
2111 /* live-in temps per block */
2112 std::vector<IDSet> live_in;
2113 } live;
2114
2115 struct {
2116 FILE* output = stderr;
2117 bool shorten_messages = false;
2118 void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
2119 void* private_data;
2120 } debug;
2121
allocateId(RegClass rc)2122 uint32_t allocateId(RegClass rc)
2123 {
2124 assert(allocationID <= 16777215);
2125 temp_rc.push_back(rc);
2126 return allocationID++;
2127 }
2128
allocateRange(unsigned amount)2129 void allocateRange(unsigned amount)
2130 {
2131 assert(allocationID + amount <= 16777216);
2132 temp_rc.resize(temp_rc.size() + amount);
2133 allocationID += amount;
2134 }
2135
allocateTmp(RegClass rc)2136 Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); }
2137
peekAllocationId()2138 uint32_t peekAllocationId() { return allocationID; }
2139
2140 friend void reindex_ssa(Program* program, bool update_live_out);
2141
create_and_insert_block()2142 Block* create_and_insert_block()
2143 {
2144 Block block;
2145 return insert_block(std::move(block));
2146 }
2147
insert_block(Block && block)2148 Block* insert_block(Block&& block)
2149 {
2150 block.index = blocks.size();
2151 block.fp_mode = next_fp_mode;
2152 block.loop_nest_depth = next_loop_depth;
2153 block.divergent_if_logical_depth = next_divergent_if_logical_depth;
2154 block.uniform_if_depth = next_uniform_if_depth;
2155 blocks.emplace_back(std::move(block));
2156 return &blocks.back();
2157 }
2158
2159 private:
2160 uint32_t allocationID = 1;
2161 };
2162
2163 struct ra_test_policy {
2164 /* Force RA to always use its pessimistic fallback algorithm */
2165 bool skip_optimistic_path = false;
2166 };
2167
2168 void init();
2169
2170 void init_program(Program* program, Stage stage, const struct aco_shader_info* info,
2171 enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
2172 ac_shader_config* config);
2173
2174 void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
2175 ac_shader_config* config, const struct aco_compiler_options* options,
2176 const struct aco_shader_info* info, const struct ac_shader_args* args);
2177 void select_trap_handler_shader(Program* program, struct nir_shader* shader,
2178 ac_shader_config* config,
2179 const struct aco_compiler_options* options,
2180 const struct aco_shader_info* info,
2181 const struct ac_shader_args* args);
2182 void select_rt_prolog(Program* program, ac_shader_config* config,
2183 const struct aco_compiler_options* options,
2184 const struct aco_shader_info* info, const struct ac_shader_args* in_args,
2185 const struct ac_shader_args* out_args);
2186 void select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo,
2187 ac_shader_config* config, const struct aco_compiler_options* options,
2188 const struct aco_shader_info* info, const struct ac_shader_args* args);
2189
2190 void select_ps_epilog(Program* program, void* pinfo, ac_shader_config* config,
2191 const struct aco_compiler_options* options,
2192 const struct aco_shader_info* info, const struct ac_shader_args* args);
2193
2194 void select_ps_prolog(Program* program, void* pinfo, ac_shader_config* config,
2195 const struct aco_compiler_options* options,
2196 const struct aco_shader_info* info, const struct ac_shader_args* args);
2197
2198 void lower_phis(Program* program);
2199 void lower_subdword(Program* program);
2200 void calc_min_waves(Program* program);
2201 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
2202 void live_var_analysis(Program* program);
2203 std::vector<uint16_t> dead_code_analysis(Program* program);
2204 void dominator_tree(Program* program);
2205 void insert_exec_mask(Program* program);
2206 void value_numbering(Program* program);
2207 void optimize(Program* program);
2208 void optimize_postRA(Program* program);
2209 void setup_reduce_temp(Program* program);
2210 void lower_to_cssa(Program* program);
2211 void register_allocation(Program* program, ra_test_policy = {});
2212 void ssa_elimination(Program* program);
2213 void lower_to_hw_instr(Program* program);
2214 void schedule_program(Program* program);
2215 void schedule_ilp(Program* program);
2216 void schedule_vopd(Program* program);
2217 void spill(Program* program);
2218 void insert_waitcnt(Program* program);
2219 void insert_delay_alu(Program* program);
2220 void combine_delay_alu(Program* program);
2221 bool dealloc_vgprs(Program* program);
2222 void insert_NOPs(Program* program);
2223 void form_hard_clauses(Program* program);
2224 unsigned emit_program(Program* program, std::vector<uint32_t>& code,
2225 std::vector<struct aco_symbol>* symbols = NULL, bool append_endpgm = true);
2226 /**
2227 * Returns true if print_asm can disassemble the given program for the current build/runtime
2228 * configuration
2229 */
2230 bool check_print_asm_support(Program* program);
2231 bool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output);
2232 bool validate_ir(Program* program);
2233 bool validate_cfg(Program* program);
2234 bool validate_ra(Program* program);
2235 bool validate_live_vars(Program* program);
2236
2237 void collect_presched_stats(Program* program);
2238 void collect_preasm_stats(Program* program);
2239 void collect_postasm_stats(Program* program, const std::vector<uint32_t>& code);
2240
2241 struct Instruction_cycle_info {
2242 /* Latency until the result is ready (if not needing a waitcnt) */
2243 unsigned latency;
2244
2245 /* How many cycles issuing this instruction takes (i.e. cycles till the next instruction can be
2246 * issued)*/
2247 unsigned issue_cycles;
2248 };
2249
2250 Instruction_cycle_info get_cycle_info(const Program& program, const Instruction& instr);
2251
2252 enum print_flags {
2253 print_no_ssa = 0x1,
2254 print_perf_info = 0x2,
2255 print_kill = 0x4,
2256 print_live_vars = 0x8,
2257 };
2258
2259 void aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0);
2260 void aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
2261 unsigned flags = 0);
2262 void aco_print_program(const Program* program, FILE* output, unsigned flags = 0);
2263
2264 void _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...);
2265
2266 #define aco_err(program, ...) _aco_err(program, __FILE__, __LINE__, __VA_ARGS__)
2267
2268 int get_op_fixed_to_def(Instruction* instr);
2269
2270 /* utilities for dealing with register demand */
2271 RegisterDemand get_live_changes(Instruction* instr);
2272 RegisterDemand get_temp_registers(Instruction* instr);
2273
2274 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
2275 uint16_t get_extra_sgprs(Program* program);
2276
2277 /* adjust num_waves for workgroup size and LDS limits */
2278 uint16_t max_suitable_waves(Program* program, uint16_t waves);
2279
2280 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
2281 uint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs);
2282 uint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs);
2283
2284 /* return number of addressable sgprs/vgprs for max_waves */
2285 uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
2286 uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
2287
2288 bool uses_scratch(Program* program);
2289
2290 inline bool
dominates_logical(const Block & parent,const Block & child)2291 dominates_logical(const Block& parent, const Block& child)
2292 {
2293 return child.logical_dom_pre_index >= parent.logical_dom_pre_index &&
2294 child.logical_dom_post_index <= parent.logical_dom_post_index;
2295 }
2296
2297 inline bool
dominates_linear(const Block & parent,const Block & child)2298 dominates_linear(const Block& parent, const Block& child)
2299 {
2300 return child.linear_dom_pre_index >= parent.linear_dom_pre_index &&
2301 child.linear_dom_post_index <= parent.linear_dom_post_index;
2302 }
2303
2304 typedef struct {
2305 const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
2306 const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
2307 const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
2308 const int16_t opcode_gfx11[static_cast<int>(aco_opcode::num_opcodes)];
2309 const int16_t opcode_gfx12[static_cast<int>(aco_opcode::num_opcodes)];
2310 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
2311 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
2312 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
2313 const char* name[static_cast<int>(aco_opcode::num_opcodes)];
2314 const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
2315 /* sizes used for input/output modifiers and constants */
2316 const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
2317 const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
2318 const uint32_t definitions[static_cast<int>(aco_opcode::num_opcodes)];
2319 const uint32_t operands[static_cast<int>(aco_opcode::num_opcodes)];
2320 } Info;
2321
2322 extern const Info instr_info;
2323
2324 } // namespace aco
2325
2326 #endif /* ACO_IR_H */
2327