xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/aco_ir.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #ifndef ACO_IR_H
8 #define ACO_IR_H
9 
10 #include "aco_opcodes.h"
11 #include "aco_shader_info.h"
12 #include "aco_util.h"
13 
14 #include "util/compiler.h"
15 
16 #include "ac_binary.h"
17 #include "ac_hw_stage.h"
18 #include "ac_shader_util.h"
19 #include "amd_family.h"
20 #include <algorithm>
21 #include <bitset>
22 #include <memory>
23 #include <vector>
24 
25 typedef struct nir_shader nir_shader;
26 
27 namespace aco {
28 
29 extern uint64_t debug_flags;
30 
31 enum {
32    DEBUG_VALIDATE_IR = 0x1,
33    DEBUG_VALIDATE_RA = 0x2,
34    DEBUG_VALIDATE_LIVE_VARS = 0x4,
35    DEBUG_FORCE_WAITCNT = 0x8,
36    DEBUG_NO_VN = 0x10,
37    DEBUG_NO_OPT = 0x20,
38    DEBUG_NO_SCHED = 0x40,
39    DEBUG_PERF_INFO = 0x80,
40    DEBUG_LIVE_INFO = 0x100,
41    DEBUG_FORCE_WAITDEPS = 0x200,
42    DEBUG_NO_VALIDATE_IR = 0x400,
43    DEBUG_NO_SCHED_ILP = 0x800,
44    DEBUG_NO_SCHED_VOPD = 0x1000,
45 };
46 
47 enum storage_class : uint8_t {
48    storage_none = 0x0,   /* no synchronization and can be reordered around aliasing stores */
49    storage_buffer = 0x1, /* SSBOs and global memory */
50    storage_gds = 0x2,
51    storage_image = 0x4,
52    storage_shared = 0x8,        /* or TCS output */
53    storage_vmem_output = 0x10,  /* GS or TCS output stores using VMEM */
54    storage_task_payload = 0x20, /* Task-Mesh payload */
55    storage_scratch = 0x40,
56    storage_vgpr_spill = 0x80,
57    storage_count = 8, /* not counting storage_none */
58 };
59 
60 enum memory_semantics : uint8_t {
61    semantic_none = 0x0,
62    /* for loads: don't move any access after this load to before this load (even other loads)
63     * for barriers: don't move any access after the barrier to before any
64     * atomics/control_barriers/sendmsg_gs_done/position-primitive-export before the barrier */
65    semantic_acquire = 0x1,
66    /* for stores: don't move any access before this store to after this store
67     * for barriers: don't move any access before the barrier to after any
68     * atomics/control_barriers/sendmsg_gs_done/position-primitive-export after the barrier */
69    semantic_release = 0x2,
70 
71    /* the rest are for load/stores/atomics only */
72    /* cannot be DCE'd or CSE'd */
73    semantic_volatile = 0x4,
74    /* does not interact with barriers and assumes this lane is the only lane
75     * accessing this memory */
76    semantic_private = 0x8,
77    /* this operation can be reordered around operations of the same storage.
78     * says nothing about barriers */
79    semantic_can_reorder = 0x10,
80    /* this is a atomic instruction (may only read or write memory) */
81    semantic_atomic = 0x20,
82    /* this is instruction both reads and writes memory */
83    semantic_rmw = 0x40,
84 
85    semantic_acqrel = semantic_acquire | semantic_release,
86    semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw,
87 };
88 
89 enum sync_scope : uint8_t {
90    scope_invocation = 0,
91    scope_subgroup = 1,
92    scope_workgroup = 2,
93    scope_queuefamily = 3,
94    scope_device = 4,
95 };
96 
97 struct memory_sync_info {
memory_sync_infomemory_sync_info98    memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {}
99    memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation)
100        : storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_)
101    {}
102 
103    storage_class storage : 8;
104    memory_semantics semantics : 8;
105    sync_scope scope : 8;
106 
107    bool operator==(const memory_sync_info& rhs) const
108    {
109       return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope;
110    }
111 
can_reordermemory_sync_info112    bool can_reorder() const
113    {
114       if (semantics & semantic_acqrel)
115          return false;
116       /* Also check storage so that zero-initialized memory_sync_info can be
117        * reordered. */
118       return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
119    }
120 };
121 static_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
122 
123 enum fp_round {
124    fp_round_ne = 0,
125    fp_round_pi = 1,
126    fp_round_ni = 2,
127    fp_round_tz = 3,
128 };
129 
130 enum fp_denorm {
131    /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
132     * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
133    fp_denorm_flush = 0x0,
134    fp_denorm_keep_in = 0x1,
135    fp_denorm_keep_out = 0x2,
136    fp_denorm_keep = 0x3,
137 };
138 
139 struct float_mode {
140    /* matches encoding of the MODE register */
141    union {
142       struct {
143          fp_round round32 : 2;
144          fp_round round16_64 : 2;
145          unsigned denorm32 : 2;
146          unsigned denorm16_64 : 2;
147       };
148       struct {
149          uint8_t round : 4;
150          uint8_t denorm : 4;
151       };
152       uint8_t val = 0;
153    };
154    /* if false, optimizations which may remove denormal flushing can be done */
155    bool must_flush_denorms32 : 1;
156    bool must_flush_denorms16_64 : 1;
157    bool care_about_round32 : 1;
158    bool care_about_round16_64 : 1;
159 
160    /* Returns true if instructions using the mode "other" can safely use the
161     * current one instead. */
canReplacefloat_mode162    bool canReplace(float_mode other) const noexcept
163    {
164       return val == other.val && (must_flush_denorms32 || !other.must_flush_denorms32) &&
165              (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
166              (care_about_round32 || !other.care_about_round32) &&
167              (care_about_round16_64 || !other.care_about_round16_64);
168    }
169 };
170 
171 enum wait_type {
172    wait_type_exp = 0,
173    wait_type_lgkm = 1,
174    wait_type_vm = 2,
175    /* GFX10+ */
176    wait_type_vs = 3,
177    /* GFX12+ */
178    wait_type_sample = 4,
179    wait_type_bvh = 5,
180    wait_type_km = 6,
181    wait_type_num = 7,
182 };
183 
184 struct Instruction;
185 
186 struct wait_imm {
187    static const uint8_t unset_counter = 0xff;
188 
189    uint8_t exp;
190    uint8_t lgkm;
191    uint8_t vm;
192    uint8_t vs;
193    uint8_t sample;
194    uint8_t bvh;
195    uint8_t km;
196 
197    wait_imm();
198    wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
199 
200    uint16_t pack(enum amd_gfx_level chip) const;
201 
202    static wait_imm max(enum amd_gfx_level gfx_level);
203 
204    bool unpack(enum amd_gfx_level gfx_level, const Instruction* instr);
205 
206    bool combine(const wait_imm& other);
207 
208    bool empty() const;
209 
210    void print(FILE* output) const;
211 
212    uint8_t& operator[](size_t i)
213    {
214       assert(i < wait_type_num);
215       return *((uint8_t*)this + i);
216    }
217 
218    const uint8_t& operator[](size_t i) const
219    {
220       assert(i < wait_type_num);
221       return *((uint8_t*)this + i);
222    }
223 };
224 static_assert(offsetof(wait_imm, exp) == wait_type_exp);
225 static_assert(offsetof(wait_imm, lgkm) == wait_type_lgkm);
226 static_assert(offsetof(wait_imm, vm) == wait_type_vm);
227 static_assert(offsetof(wait_imm, vs) == wait_type_vs);
228 static_assert(offsetof(wait_imm, sample) == wait_type_sample);
229 static_assert(offsetof(wait_imm, bvh) == wait_type_bvh);
230 static_assert(offsetof(wait_imm, km) == wait_type_km);
231 
232 /* s_wait_event immediate bits. */
233 enum wait_event_imm : uint16_t {
234    /* If this bit is 0, await that the export buffer space has been allocated.
235     * In Primitive Ordered Pixel Shading, export ready means that the overlapped waves have exited
236     * their ordered sections (by performing the `done` export), and that the current wave may enter
237     * its ordered section.
238     */
239    wait_event_imm_dont_wait_export_ready_gfx11 = 0x1,
240    wait_event_imm_wait_export_ready_gfx12 = 0x2,
241 };
242 
243 constexpr Format
asVOP3(Format format)244 asVOP3(Format format)
245 {
246    return (Format)((uint32_t)Format::VOP3 | (uint32_t)format);
247 };
248 
249 constexpr Format
asSDWA(Format format)250 asSDWA(Format format)
251 {
252    assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
253    return (Format)((uint32_t)Format::SDWA | (uint32_t)format);
254 }
255 
256 constexpr Format
withoutDPP(Format format)257 withoutDPP(Format format)
258 {
259    return (Format)((uint32_t)format & ~((uint32_t)Format::DPP16 | (uint32_t)Format::DPP8));
260 }
261 
262 constexpr Format
withoutVOP3(Format format)263 withoutVOP3(Format format)
264 {
265    return (Format)((uint32_t)format & ~((uint32_t)Format::VOP3));
266 }
267 
268 enum class RegType {
269    sgpr,
270    vgpr,
271 };
272 
273 struct RegClass {
274 
275    enum RC : uint8_t {
276       s1 = 1,
277       s2 = 2,
278       s3 = 3,
279       s4 = 4,
280       s6 = 6,
281       s8 = 8,
282       s16 = 16,
283       v1 = s1 | (1 << 5),
284       v2 = s2 | (1 << 5),
285       v3 = s3 | (1 << 5),
286       v4 = s4 | (1 << 5),
287       v5 = 5 | (1 << 5),
288       v6 = 6 | (1 << 5),
289       v7 = 7 | (1 << 5),
290       v8 = 8 | (1 << 5),
291       /* byte-sized register class */
292       v1b = v1 | (1 << 7),
293       v2b = v2 | (1 << 7),
294       v3b = v3 | (1 << 7),
295       v4b = v4 | (1 << 7),
296       v6b = v6 | (1 << 7),
297       v8b = v8 | (1 << 7),
298       /* these are used for WWM and spills to vgpr */
299       v1_linear = v1 | (1 << 6),
300       v2_linear = v2 | (1 << 6),
301    };
302 
303    RegClass() = default;
RegClassRegClass304    constexpr RegClass(RC rc_) : rc(rc_) {}
RegClassRegClass305    constexpr RegClass(RegType type, unsigned size)
306        : rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size))
307    {}
308 
RCRegClass309    constexpr operator RC() const { return rc; }
310    explicit operator bool() = delete;
311 
typeRegClass312    constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
is_linear_vgprRegClass313    constexpr bool is_linear_vgpr() const { return rc & (1 << 6); };
is_subdwordRegClass314    constexpr bool is_subdword() const { return rc & (1 << 7); }
bytesRegClass315    constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); }
316    // TODO: use size() less in favor of bytes()
sizeRegClass317    constexpr unsigned size() const { return (bytes() + 3) >> 2; }
is_linearRegClass318    constexpr bool is_linear() const { return rc <= RC::s16 || is_linear_vgpr(); }
as_linearRegClass319    constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); }
as_subdwordRegClass320    constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); }
321 
getRegClass322    static constexpr RegClass get(RegType type, unsigned bytes)
323    {
324       if (type == RegType::sgpr) {
325          return RegClass(type, DIV_ROUND_UP(bytes, 4u));
326       } else {
327          return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u);
328       }
329    }
330 
resizeRegClass331    constexpr RegClass resize(unsigned bytes) const
332    {
333       if (is_linear_vgpr()) {
334          assert(bytes % 4u == 0);
335          return get(RegType::vgpr, bytes).as_linear();
336       }
337       return get(type(), bytes);
338    }
339 
340 private:
341    RC rc;
342 };
343 
344 /* transitional helper expressions */
345 static constexpr RegClass s1{RegClass::s1};
346 static constexpr RegClass s2{RegClass::s2};
347 static constexpr RegClass s3{RegClass::s3};
348 static constexpr RegClass s4{RegClass::s4};
349 static constexpr RegClass s8{RegClass::s8};
350 static constexpr RegClass s16{RegClass::s16};
351 static constexpr RegClass v1{RegClass::v1};
352 static constexpr RegClass v2{RegClass::v2};
353 static constexpr RegClass v3{RegClass::v3};
354 static constexpr RegClass v4{RegClass::v4};
355 static constexpr RegClass v5{RegClass::v5};
356 static constexpr RegClass v6{RegClass::v6};
357 static constexpr RegClass v7{RegClass::v7};
358 static constexpr RegClass v8{RegClass::v8};
359 static constexpr RegClass v1b{RegClass::v1b};
360 static constexpr RegClass v2b{RegClass::v2b};
361 static constexpr RegClass v3b{RegClass::v3b};
362 static constexpr RegClass v4b{RegClass::v4b};
363 static constexpr RegClass v6b{RegClass::v6b};
364 static constexpr RegClass v8b{RegClass::v8b};
365 
366 /**
367  * Temp Class
368  * Each temporary virtual register has a
369  * register class (i.e. size and type)
370  * and SSA id.
371  */
372 struct Temp {
TempTemp373    Temp() noexcept : id_(0), reg_class(0) {}
TempTemp374    constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {}
375 
idTemp376    constexpr uint32_t id() const noexcept { return id_; }
regClassTemp377    constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
378 
bytesTemp379    constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
sizeTemp380    constexpr unsigned size() const noexcept { return regClass().size(); }
typeTemp381    constexpr RegType type() const noexcept { return regClass().type(); }
is_linearTemp382    constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
383 
384    constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); }
385    constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
386    constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
387 
388 private:
389    uint32_t id_ : 24;
390    uint32_t reg_class : 8;
391 };
392 
393 /**
394  * PhysReg
395  * Represents the physical register for each
396  * Operand and Definition.
397  */
398 struct PhysReg {
399    constexpr PhysReg() = default;
PhysRegPhysReg400    explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
regPhysReg401    constexpr unsigned reg() const { return reg_b >> 2; }
bytePhysReg402    constexpr unsigned byte() const { return reg_b & 0x3; }
403    constexpr operator unsigned() const { return reg(); }
404    constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
405    constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
406    constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; }
advancePhysReg407    constexpr PhysReg advance(int bytes) const
408    {
409       PhysReg res = *this;
410       res.reg_b += bytes;
411       return res;
412    }
413 
414    uint16_t reg_b = 0;
415 };
416 
417 /* helper expressions for special registers */
418 static constexpr PhysReg m0{124};
419 static constexpr PhysReg flat_scr_lo{102}; /* GFX8-GFX9, encoded differently on GFX6-7 */
420 static constexpr PhysReg flat_scr_hi{103}; /* GFX8-GFX9, encoded differently on GFX6-7 */
421 static constexpr PhysReg vcc{106};
422 static constexpr PhysReg vcc_hi{107};
423 static constexpr PhysReg tba{108}; /* GFX6-GFX8 */
424 static constexpr PhysReg tma{110}; /* GFX6-GFX8 */
425 static constexpr PhysReg ttmp0{112};
426 static constexpr PhysReg ttmp1{113};
427 static constexpr PhysReg ttmp2{114};
428 static constexpr PhysReg ttmp3{115};
429 static constexpr PhysReg ttmp4{116};
430 static constexpr PhysReg ttmp5{117};
431 static constexpr PhysReg ttmp6{118};
432 static constexpr PhysReg ttmp7{119};
433 static constexpr PhysReg ttmp8{120};
434 static constexpr PhysReg ttmp9{121};
435 static constexpr PhysReg ttmp10{122};
436 static constexpr PhysReg ttmp11{123};
437 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
438 static constexpr PhysReg exec{126};
439 static constexpr PhysReg exec_lo{126};
440 static constexpr PhysReg exec_hi{127};
441 static constexpr PhysReg pops_exiting_wave_id{239}; /* GFX9-GFX10.3 */
442 static constexpr PhysReg scc{253};
443 
444 /**
445  * Operand Class
446  * Initially, each Operand refers to either
447  * a temporary virtual register
448  * or to a constant value
449  * Temporary registers get mapped to physical register during RA
450  * Constant values are inlined into the instruction sequence.
451  */
452 class Operand final {
453 public:
Operand()454    constexpr Operand()
455        : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false),
456          isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), isClobbered_(false),
457          isCopyKill_(false), is16bit_(false), is24bit_(false), signext(false)
458    {}
459 
Operand(Temp r)460    explicit Operand(Temp r) noexcept
461    {
462       data_.temp = r;
463       if (r.id()) {
464          isTemp_ = true;
465       } else {
466          isUndef_ = true;
467          setFixed(PhysReg{128});
468       }
469    };
Operand(Temp r,PhysReg reg)470    explicit Operand(Temp r, PhysReg reg) noexcept
471    {
472       assert(r.id()); /* Don't allow fixing an undef to a register */
473       data_.temp = r;
474       isTemp_ = true;
475       setFixed(reg);
476    };
477 
478    /* 8-bit constant */
c8(uint8_t v)479    static Operand c8(uint8_t v) noexcept
480    {
481       /* 8-bit constants are only used for copies and copies from any 8-bit
482        * constant can be implemented with a SDWA v_mul_u32_u24. So consider all
483        * to be inline constants. */
484       Operand op;
485       op.control_ = 0;
486       op.data_.i = v;
487       op.isConstant_ = true;
488       op.constSize = 0;
489       op.setFixed(PhysReg{0u});
490       return op;
491    };
492 
493    /* 16-bit constant */
c16(uint16_t v)494    static Operand c16(uint16_t v) noexcept
495    {
496       Operand op;
497       op.control_ = 0;
498       op.data_.i = v;
499       op.isConstant_ = true;
500       op.constSize = 1;
501       if (v <= 64)
502          op.setFixed(PhysReg{128u + v});
503       else if (v >= 0xFFF0) /* [-16 .. -1] */
504          op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)});
505       else if (v == 0x3800) /* 0.5 */
506          op.setFixed(PhysReg{240});
507       else if (v == 0xB800) /* -0.5 */
508          op.setFixed(PhysReg{241});
509       else if (v == 0x3C00) /* 1.0 */
510          op.setFixed(PhysReg{242});
511       else if (v == 0xBC00) /* -1.0 */
512          op.setFixed(PhysReg{243});
513       else if (v == 0x4000) /* 2.0 */
514          op.setFixed(PhysReg{244});
515       else if (v == 0xC000) /* -2.0 */
516          op.setFixed(PhysReg{245});
517       else if (v == 0x4400) /* 4.0 */
518          op.setFixed(PhysReg{246});
519       else if (v == 0xC400) /* -4.0 */
520          op.setFixed(PhysReg{247});
521       else if (v == 0x3118) /* 1/2 PI */
522          op.setFixed(PhysReg{248});
523       else /* Literal Constant */
524          op.setFixed(PhysReg{255});
525       return op;
526    }
527 
528    /* 32-bit constant */
c32(uint32_t v)529    static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); }
530 
531    /* 64-bit constant */
c64(uint64_t v)532    static Operand c64(uint64_t v) noexcept
533    {
534       Operand op;
535       op.control_ = 0;
536       op.isConstant_ = true;
537       op.constSize = 3;
538       if (v <= 64) {
539          op.data_.i = (uint32_t)v;
540          op.setFixed(PhysReg{128 + (uint32_t)v});
541       } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
542          op.data_.i = (uint32_t)v;
543          op.setFixed(PhysReg{192 - (uint32_t)v});
544       } else if (v == 0x3FE0000000000000) { /* 0.5 */
545          op.data_.i = 0x3f000000;
546          op.setFixed(PhysReg{240});
547       } else if (v == 0xBFE0000000000000) { /* -0.5 */
548          op.data_.i = 0xbf000000;
549          op.setFixed(PhysReg{241});
550       } else if (v == 0x3FF0000000000000) { /* 1.0 */
551          op.data_.i = 0x3f800000;
552          op.setFixed(PhysReg{242});
553       } else if (v == 0xBFF0000000000000) { /* -1.0 */
554          op.data_.i = 0xbf800000;
555          op.setFixed(PhysReg{243});
556       } else if (v == 0x4000000000000000) { /* 2.0 */
557          op.data_.i = 0x40000000;
558          op.setFixed(PhysReg{244});
559       } else if (v == 0xC000000000000000) { /* -2.0 */
560          op.data_.i = 0xc0000000;
561          op.setFixed(PhysReg{245});
562       } else if (v == 0x4010000000000000) { /* 4.0 */
563          op.data_.i = 0x40800000;
564          op.setFixed(PhysReg{246});
565       } else if (v == 0xC010000000000000) { /* -4.0 */
566          op.data_.i = 0xc0800000;
567          op.setFixed(PhysReg{247});
568       } else { /* Literal Constant: we don't know if it is a long or double.*/
569          op.signext = v >> 63;
570          op.data_.i = v & 0xffffffffu;
571          op.setFixed(PhysReg{255});
572          assert(op.constantValue64() == v &&
573                 "attempt to create a unrepresentable 64-bit literal constant");
574       }
575       return op;
576    }
577 
578    /* 32-bit constant stored as a 32-bit or 64-bit operand */
c32_or_c64(uint32_t v,bool is64bit)579    static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept
580    {
581       Operand op;
582       op.control_ = 0;
583       op.data_.i = v;
584       op.isConstant_ = true;
585       op.constSize = is64bit ? 3 : 2;
586       if (v <= 64)
587          op.setFixed(PhysReg{128 + v});
588       else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
589          op.setFixed(PhysReg{192 - v});
590       else if (v == 0x3f000000) /* 0.5 */
591          op.setFixed(PhysReg{240});
592       else if (v == 0xbf000000) /* -0.5 */
593          op.setFixed(PhysReg{241});
594       else if (v == 0x3f800000) /* 1.0 */
595          op.setFixed(PhysReg{242});
596       else if (v == 0xbf800000) /* -1.0 */
597          op.setFixed(PhysReg{243});
598       else if (v == 0x40000000) /* 2.0 */
599          op.setFixed(PhysReg{244});
600       else if (v == 0xc0000000) /* -2.0 */
601          op.setFixed(PhysReg{245});
602       else if (v == 0x40800000) /* 4.0 */
603          op.setFixed(PhysReg{246});
604       else if (v == 0xc0800000) /* -4.0 */
605          op.setFixed(PhysReg{247});
606       else { /* Literal Constant */
607          assert(!is64bit && "attempt to create a 64-bit literal constant");
608          op.setFixed(PhysReg{255});
609       }
610       return op;
611    }
612 
literal32(uint32_t v)613    static Operand literal32(uint32_t v) noexcept
614    {
615       Operand op;
616       op.control_ = 0;
617       op.data_.i = v;
618       op.isConstant_ = true;
619       op.constSize = 2;
620       op.setFixed(PhysReg{255});
621       return op;
622    }
623 
Operand(RegClass type)624    explicit Operand(RegClass type) noexcept
625    {
626       isUndef_ = true;
627       data_.temp = Temp(0, type);
628       setFixed(PhysReg{128});
629    };
Operand(PhysReg reg,RegClass type)630    explicit Operand(PhysReg reg, RegClass type) noexcept
631    {
632       data_.temp = Temp(0, type);
633       setFixed(reg);
634    }
635 
636    static Operand zero(unsigned bytes = 4) noexcept
637    {
638       if (bytes == 8)
639          return Operand::c64(0);
640       else if (bytes == 4)
641          return Operand::c32(0);
642       else if (bytes == 2)
643          return Operand::c16(0);
644       assert(bytes == 1);
645       return Operand::c8(0);
646    }
647 
648    /* This is useful over the constructors when you want to take a gfx level
649     * for 1/2 PI or an unknown operand size.
650     */
get_const(enum amd_gfx_level chip,uint64_t val,unsigned bytes)651    static Operand get_const(enum amd_gfx_level chip, uint64_t val, unsigned bytes)
652    {
653       if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
654          /* 1/2 PI can be an inline constant on GFX8+ */
655          Operand op = Operand::c32(val);
656          op.setFixed(PhysReg{248});
657          return op;
658       }
659 
660       if (bytes == 8)
661          return Operand::c64(val);
662       else if (bytes == 4)
663          return Operand::c32(val);
664       else if (bytes == 2)
665          return Operand::c16(val);
666       assert(bytes == 1);
667       return Operand::c8(val);
668    }
669 
670    static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false,
671                                          bool sext = false)
672    {
673       if (bytes <= 4)
674          return true;
675 
676       if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000)
677          return true;
678       uint64_t upper33 = val & 0xFFFFFFFF80000000;
679       if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0))
680          return true;
681 
682       return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 || /* [-16 .. 64] */
683              val == 0x3FE0000000000000 ||              /* 0.5 */
684              val == 0xBFE0000000000000 ||              /* -0.5 */
685              val == 0x3FF0000000000000 ||              /* 1.0 */
686              val == 0xBFF0000000000000 ||              /* -1.0 */
687              val == 0x4000000000000000 ||              /* 2.0 */
688              val == 0xC000000000000000 ||              /* -2.0 */
689              val == 0x4010000000000000 ||              /* 4.0 */
690              val == 0xC010000000000000;                /* -4.0 */
691    }
692 
isTemp()693    constexpr bool isTemp() const noexcept { return isTemp_; }
694 
setTemp(Temp t)695    constexpr void setTemp(Temp t) noexcept
696    {
697       assert(!isConstant_);
698       if (t.id() != 0)
699          isTemp_ = true;
700       data_.temp = t;
701    }
702 
getTemp()703    constexpr Temp getTemp() const noexcept { return data_.temp; }
704 
tempId()705    constexpr uint32_t tempId() const noexcept { return data_.temp.id(); }
706 
hasRegClass()707    constexpr bool hasRegClass() const noexcept { return !isConstant(); }
708 
regClass()709    constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); }
710 
bytes()711    constexpr unsigned bytes() const noexcept
712    {
713       if (isConstant())
714          return 1 << constSize;
715       else
716          return data_.temp.bytes();
717    }
718 
size()719    constexpr unsigned size() const noexcept
720    {
721       if (isConstant())
722          return constSize > 2 ? 2 : 1;
723       else
724          return data_.temp.size();
725    }
726 
isFixed()727    constexpr bool isFixed() const noexcept { return isFixed_; }
728 
physReg()729    constexpr PhysReg physReg() const noexcept { return reg_; }
730 
setFixed(PhysReg reg)731    constexpr void setFixed(PhysReg reg) noexcept
732    {
733       isFixed_ = reg != unsigned(-1);
734       reg_ = reg;
735    }
736 
isConstant()737    constexpr bool isConstant() const noexcept { return isConstant_; }
738 
isLiteral()739    constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; }
740 
isUndefined()741    constexpr bool isUndefined() const noexcept { return isUndef_; }
742 
constantValue()743    constexpr uint32_t constantValue() const noexcept { return data_.i; }
744 
constantEquals(uint32_t cmp)745    constexpr bool constantEquals(uint32_t cmp) const noexcept
746    {
747       return isConstant() && constantValue() == cmp;
748    }
749 
constantValue64()750    constexpr uint64_t constantValue64() const noexcept
751    {
752       if (constSize == 3) {
753          if (reg_ <= 192)
754             return reg_ - 128;
755          else if (reg_ <= 208)
756             return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
757 
758          switch (reg_) {
759          case 240: return 0x3FE0000000000000;
760          case 241: return 0xBFE0000000000000;
761          case 242: return 0x3FF0000000000000;
762          case 243: return 0xBFF0000000000000;
763          case 244: return 0x4000000000000000;
764          case 245: return 0xC000000000000000;
765          case 246: return 0x4010000000000000;
766          case 247: return 0xC010000000000000;
767          case 255:
768             return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
769          }
770          unreachable("invalid register for 64-bit constant");
771       } else {
772          return data_.i;
773       }
774    }
775 
776    /* Value if this were used with vop3/opsel or vop3p. */
constantValue16(bool opsel)777    constexpr uint16_t constantValue16(bool opsel) const noexcept
778    {
779       assert(bytes() == 2 || bytes() == 4);
780       if (opsel) {
781          if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral())
782             return int16_t(data_.i) >>
783                    16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
784          else
785             return data_.i >> 16;
786       }
787       return data_.i;
788    }
789 
isOfType(RegType type)790    constexpr bool isOfType(RegType type) const noexcept
791    {
792       return hasRegClass() && regClass().type() == type;
793    }
794 
795    /* Indicates that the killed operand's live range intersects with the
796     * instruction's definitions. Unlike isKill() and isFirstKill(), this is
797     * not set by liveness analysis. */
setLateKill(bool flag)798    constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; }
799 
isLateKill()800    constexpr bool isLateKill() const noexcept { return isLateKill_; }
801 
802    /* Indicates that the Operand's register gets clobbered by the instruction. */
setClobbered(bool flag)803    constexpr void setClobbered(bool flag) noexcept { isClobbered_ = flag; }
isClobbered()804    constexpr bool isClobbered() const noexcept { return isClobbered_; }
805 
806    /* Indicates that the Operand must be copied in order to satisfy register
807     * constraints. The copy is immediately killed by the instruction.
808     */
setCopyKill(bool flag)809    constexpr void setCopyKill(bool flag) noexcept
810    {
811       isCopyKill_ = flag;
812       if (flag)
813          setKill(flag);
814    }
isCopyKill()815    constexpr bool isCopyKill() const noexcept { return isCopyKill_; }
816 
setKill(bool flag)817    constexpr void setKill(bool flag) noexcept
818    {
819       isKill_ = flag;
820       if (!flag) {
821          setFirstKill(false);
822          setCopyKill(false);
823       }
824    }
825 
isKill()826    constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); }
827 
setFirstKill(bool flag)828    constexpr void setFirstKill(bool flag) noexcept
829    {
830       isFirstKill_ = flag;
831       if (flag)
832          setKill(flag);
833    }
834 
835    /* When there are multiple operands killing the same temporary,
836     * isFirstKill() is only returns true for the first one. */
isFirstKill()837    constexpr bool isFirstKill() const noexcept { return isFirstKill_; }
838 
isKillBeforeDef()839    constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); }
840 
isFirstKillBeforeDef()841    constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); }
842 
843    constexpr bool operator==(Operand other) const noexcept
844    {
845       if (other.size() != size())
846          return false;
847       if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
848          return false;
849       if (isFixed() && other.isFixed() && physReg() != other.physReg())
850          return false;
851       if (isLiteral())
852          return other.isLiteral() && other.constantValue() == constantValue();
853       else if (isConstant())
854          return other.isConstant() && other.physReg() == physReg();
855       else if (isUndefined())
856          return other.isUndefined() && other.regClass() == regClass();
857       else
858          return other.isTemp() && other.getTemp() == getTemp();
859    }
860 
861    constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); }
862 
set16bit(bool flag)863    constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; }
864 
is16bit()865    constexpr bool is16bit() const noexcept { return is16bit_; }
866 
set24bit(bool flag)867    constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; }
868 
is24bit()869    constexpr bool is24bit() const noexcept { return is24bit_; }
870 
871 private:
872    union {
873       Temp temp;
874       uint32_t i;
875       float f;
876    } data_ = {Temp(0, s1)};
877    PhysReg reg_;
878    union {
879       struct {
880          uint8_t isTemp_ : 1;
881          uint8_t isFixed_ : 1;
882          uint8_t isConstant_ : 1;
883          uint8_t isKill_ : 1;
884          uint8_t isUndef_ : 1;
885          uint8_t isFirstKill_ : 1;
886          uint8_t constSize : 2;
887          uint8_t isLateKill_ : 1;
888          uint8_t isClobbered_ : 1;
889          uint8_t isCopyKill_ : 1;
890          uint8_t is16bit_ : 1;
891          uint8_t is24bit_ : 1;
892          uint8_t signext : 1;
893       };
894       /* can't initialize bit-fields in c++11, so work around using a union */
895       uint16_t control_ = 0;
896    };
897 };
898 
899 /**
900  * Definition Class
901  * Definitions are the results of Instructions
902  * and refer to temporary virtual registers
903  * which are later mapped to physical registers
904  */
905 class Definition final {
906 public:
Definition()907    constexpr Definition()
908        : temp(Temp(0, s1)), reg_(0), isFixed_(0), isKill_(0), isPrecise_(0), isInfPreserve_(0),
909          isNaNPreserve_(0), isSZPreserve_(0), isNUW_(0), isNoCSE_(0)
910    {}
Definition(uint32_t index,RegClass type)911    Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {}
Definition(Temp tmp)912    explicit Definition(Temp tmp) noexcept : temp(tmp) {}
Definition(PhysReg reg,RegClass type)913    Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); }
Definition(uint32_t tmpId,PhysReg reg,RegClass type)914    Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type))
915    {
916       setFixed(reg);
917    }
918 
isTemp()919    constexpr bool isTemp() const noexcept { return tempId() > 0; }
920 
getTemp()921    constexpr Temp getTemp() const noexcept { return temp; }
922 
tempId()923    constexpr uint32_t tempId() const noexcept { return temp.id(); }
924 
setTemp(Temp t)925    constexpr void setTemp(Temp t) noexcept { temp = t; }
926 
swapTemp(Definition & other)927    void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); }
928 
regClass()929    constexpr RegClass regClass() const noexcept { return temp.regClass(); }
930 
bytes()931    constexpr unsigned bytes() const noexcept { return temp.bytes(); }
932 
size()933    constexpr unsigned size() const noexcept { return temp.size(); }
934 
isFixed()935    constexpr bool isFixed() const noexcept { return isFixed_; }
936 
physReg()937    constexpr PhysReg physReg() const noexcept { return reg_; }
938 
setFixed(PhysReg reg)939    constexpr void setFixed(PhysReg reg) noexcept
940    {
941       isFixed_ = 1;
942       reg_ = reg;
943    }
944 
setKill(bool flag)945    constexpr void setKill(bool flag) noexcept { isKill_ = flag; }
946 
isKill()947    constexpr bool isKill() const noexcept { return isKill_; }
948 
setPrecise(bool precise)949    constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; }
950 
isPrecise()951    constexpr bool isPrecise() const noexcept { return isPrecise_; }
952 
setInfPreserve(bool inf_preserve)953    constexpr void setInfPreserve(bool inf_preserve) noexcept { isInfPreserve_ = inf_preserve; }
954 
isInfPreserve()955    constexpr bool isInfPreserve() const noexcept { return isInfPreserve_; }
956 
setNaNPreserve(bool nan_preserve)957    constexpr void setNaNPreserve(bool nan_preserve) noexcept { isNaNPreserve_ = nan_preserve; }
958 
isNaNPreserve()959    constexpr bool isNaNPreserve() const noexcept { return isNaNPreserve_; }
960 
setSZPreserve(bool sz_preserve)961    constexpr void setSZPreserve(bool sz_preserve) noexcept { isSZPreserve_ = sz_preserve; }
962 
isSZPreserve()963    constexpr bool isSZPreserve() const noexcept { return isSZPreserve_; }
964 
965    /* No Unsigned Wrap */
setNUW(bool nuw)966    constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; }
967 
isNUW()968    constexpr bool isNUW() const noexcept { return isNUW_; }
969 
setNoCSE(bool noCSE)970    constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; }
971 
isNoCSE()972    constexpr bool isNoCSE() const noexcept { return isNoCSE_; }
973 
974 private:
975    Temp temp = Temp(0, s1);
976    PhysReg reg_;
977    union {
978       struct {
979          uint8_t isFixed_ : 1;
980          uint8_t isKill_ : 1;
981          uint8_t isPrecise_ : 1;
982          uint8_t isInfPreserve_ : 1;
983          uint8_t isNaNPreserve_ : 1;
984          uint8_t isSZPreserve_ : 1;
985          uint8_t isNUW_ : 1;
986          uint8_t isNoCSE_ : 1;
987       };
988       /* can't initialize bit-fields in c++11, so work around using a union */
989       uint8_t control_ = 0;
990    };
991 };
992 
993 struct RegisterDemand {
994    constexpr RegisterDemand() = default;
RegisterDemandRegisterDemand995    constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {}
996    int16_t vgpr = 0;
997    int16_t sgpr = 0;
998 
999    constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept
1000    {
1001       return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1002    }
1003 
exceedsRegisterDemand1004    constexpr bool exceeds(const RegisterDemand other) const noexcept
1005    {
1006       return vgpr > other.vgpr || sgpr > other.sgpr;
1007    }
1008 
1009    constexpr RegisterDemand operator+(const Temp t) const noexcept
1010    {
1011       if (t.type() == RegType::sgpr)
1012          return RegisterDemand(vgpr, sgpr + t.size());
1013       else
1014          return RegisterDemand(vgpr + t.size(), sgpr);
1015    }
1016 
1017    constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept
1018    {
1019       return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1020    }
1021 
1022    constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept
1023    {
1024       return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1025    }
1026 
1027    constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept
1028    {
1029       vgpr += other.vgpr;
1030       sgpr += other.sgpr;
1031       return *this;
1032    }
1033 
1034    constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept
1035    {
1036       vgpr -= other.vgpr;
1037       sgpr -= other.sgpr;
1038       return *this;
1039    }
1040 
1041    constexpr RegisterDemand& operator+=(const Temp t) noexcept
1042    {
1043       if (t.type() == RegType::sgpr)
1044          sgpr += t.size();
1045       else
1046          vgpr += t.size();
1047       return *this;
1048    }
1049 
1050    constexpr RegisterDemand& operator-=(const Temp t) noexcept
1051    {
1052       if (t.type() == RegType::sgpr)
1053          sgpr -= t.size();
1054       else
1055          vgpr -= t.size();
1056       return *this;
1057    }
1058 
updateRegisterDemand1059    constexpr void update(const RegisterDemand other) noexcept
1060    {
1061       vgpr = std::max(vgpr, other.vgpr);
1062       sgpr = std::max(sgpr, other.sgpr);
1063    }
1064 };
1065 
1066 struct Block;
1067 struct Instruction;
1068 struct Pseudo_instruction;
1069 struct SALU_instruction;
1070 struct SMEM_instruction;
1071 struct DS_instruction;
1072 struct LDSDIR_instruction;
1073 struct MTBUF_instruction;
1074 struct MUBUF_instruction;
1075 struct MIMG_instruction;
1076 struct Export_instruction;
1077 struct FLAT_instruction;
1078 struct Pseudo_branch_instruction;
1079 struct Pseudo_barrier_instruction;
1080 struct Pseudo_reduction_instruction;
1081 struct VALU_instruction;
1082 struct VINTERP_inreg_instruction;
1083 struct VINTRP_instruction;
1084 struct VOPD_instruction;
1085 struct DPP16_instruction;
1086 struct DPP8_instruction;
1087 struct SDWA_instruction;
1088 
1089 struct Instruction {
1090    aco_opcode opcode;
1091    Format format;
1092    union {
1093       uint32_t pass_flags;
1094       RegisterDemand register_demand;
1095    };
1096 
1097    aco::span<Operand> operands;
1098    aco::span<Definition> definitions;
1099 
1100    constexpr bool usesModifiers() const noexcept;
1101 
reads_execInstruction1102    constexpr bool reads_exec() const noexcept
1103    {
1104       for (const Operand& op : operands) {
1105          if (op.isFixed() && (op.physReg() == exec_lo || op.physReg() == exec_hi))
1106             return true;
1107       }
1108       return false;
1109    }
1110 
writes_execInstruction1111    constexpr bool writes_exec() const noexcept
1112    {
1113       for (const Definition& def : definitions) {
1114          if (def.isFixed() && (def.physReg() == exec_lo || def.physReg() == exec_hi))
1115             return true;
1116       }
1117       return false;
1118    }
1119 
pseudoInstruction1120    Pseudo_instruction& pseudo() noexcept
1121    {
1122       assert(isPseudo());
1123       return *(Pseudo_instruction*)this;
1124    }
pseudoInstruction1125    const Pseudo_instruction& pseudo() const noexcept
1126    {
1127       assert(isPseudo());
1128       return *(Pseudo_instruction*)this;
1129    }
isPseudoInstruction1130    constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; }
1131 
isSOP1Instruction1132    constexpr bool isSOP1() const noexcept { return format == Format::SOP1; }
isSOP2Instruction1133    constexpr bool isSOP2() const noexcept { return format == Format::SOP2; }
isSOPKInstruction1134    constexpr bool isSOPK() const noexcept { return format == Format::SOPK; }
isSOPPInstruction1135    constexpr bool isSOPP() const noexcept { return format == Format::SOPP; }
isSOPCInstruction1136    constexpr bool isSOPC() const noexcept { return format == Format::SOPC; }
1137 
smemInstruction1138    SMEM_instruction& smem() noexcept
1139    {
1140       assert(isSMEM());
1141       return *(SMEM_instruction*)this;
1142    }
smemInstruction1143    const SMEM_instruction& smem() const noexcept
1144    {
1145       assert(isSMEM());
1146       return *(SMEM_instruction*)this;
1147    }
isSMEMInstruction1148    constexpr bool isSMEM() const noexcept { return format == Format::SMEM; }
dsInstruction1149    DS_instruction& ds() noexcept
1150    {
1151       assert(isDS());
1152       return *(DS_instruction*)this;
1153    }
dsInstruction1154    const DS_instruction& ds() const noexcept
1155    {
1156       assert(isDS());
1157       return *(DS_instruction*)this;
1158    }
isDSInstruction1159    constexpr bool isDS() const noexcept { return format == Format::DS; }
ldsdirInstruction1160    LDSDIR_instruction& ldsdir() noexcept
1161    {
1162       assert(isLDSDIR());
1163       return *(LDSDIR_instruction*)this;
1164    }
ldsdirInstruction1165    const LDSDIR_instruction& ldsdir() const noexcept
1166    {
1167       assert(isLDSDIR());
1168       return *(LDSDIR_instruction*)this;
1169    }
isLDSDIRInstruction1170    constexpr bool isLDSDIR() const noexcept { return format == Format::LDSDIR; }
mtbufInstruction1171    MTBUF_instruction& mtbuf() noexcept
1172    {
1173       assert(isMTBUF());
1174       return *(MTBUF_instruction*)this;
1175    }
mtbufInstruction1176    const MTBUF_instruction& mtbuf() const noexcept
1177    {
1178       assert(isMTBUF());
1179       return *(MTBUF_instruction*)this;
1180    }
isMTBUFInstruction1181    constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; }
mubufInstruction1182    MUBUF_instruction& mubuf() noexcept
1183    {
1184       assert(isMUBUF());
1185       return *(MUBUF_instruction*)this;
1186    }
mubufInstruction1187    const MUBUF_instruction& mubuf() const noexcept
1188    {
1189       assert(isMUBUF());
1190       return *(MUBUF_instruction*)this;
1191    }
isMUBUFInstruction1192    constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; }
mimgInstruction1193    MIMG_instruction& mimg() noexcept
1194    {
1195       assert(isMIMG());
1196       return *(MIMG_instruction*)this;
1197    }
mimgInstruction1198    const MIMG_instruction& mimg() const noexcept
1199    {
1200       assert(isMIMG());
1201       return *(MIMG_instruction*)this;
1202    }
isMIMGInstruction1203    constexpr bool isMIMG() const noexcept { return format == Format::MIMG; }
expInstruction1204    Export_instruction& exp() noexcept
1205    {
1206       assert(isEXP());
1207       return *(Export_instruction*)this;
1208    }
expInstruction1209    const Export_instruction& exp() const noexcept
1210    {
1211       assert(isEXP());
1212       return *(Export_instruction*)this;
1213    }
isEXPInstruction1214    constexpr bool isEXP() const noexcept { return format == Format::EXP; }
flatInstruction1215    FLAT_instruction& flat() noexcept
1216    {
1217       assert(isFlat());
1218       return *(FLAT_instruction*)this;
1219    }
flatInstruction1220    const FLAT_instruction& flat() const noexcept
1221    {
1222       assert(isFlat());
1223       return *(FLAT_instruction*)this;
1224    }
isFlatInstruction1225    constexpr bool isFlat() const noexcept { return format == Format::FLAT; }
globalInstruction1226    FLAT_instruction& global() noexcept
1227    {
1228       assert(isGlobal());
1229       return *(FLAT_instruction*)this;
1230    }
globalInstruction1231    const FLAT_instruction& global() const noexcept
1232    {
1233       assert(isGlobal());
1234       return *(FLAT_instruction*)this;
1235    }
isGlobalInstruction1236    constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; }
scratchInstruction1237    FLAT_instruction& scratch() noexcept
1238    {
1239       assert(isScratch());
1240       return *(FLAT_instruction*)this;
1241    }
scratchInstruction1242    const FLAT_instruction& scratch() const noexcept
1243    {
1244       assert(isScratch());
1245       return *(FLAT_instruction*)this;
1246    }
isScratchInstruction1247    constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; }
branchInstruction1248    Pseudo_branch_instruction& branch() noexcept
1249    {
1250       assert(isBranch());
1251       return *(Pseudo_branch_instruction*)this;
1252    }
branchInstruction1253    const Pseudo_branch_instruction& branch() const noexcept
1254    {
1255       assert(isBranch());
1256       return *(Pseudo_branch_instruction*)this;
1257    }
isBranchInstruction1258    constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; }
barrierInstruction1259    Pseudo_barrier_instruction& barrier() noexcept
1260    {
1261       assert(isBarrier());
1262       return *(Pseudo_barrier_instruction*)this;
1263    }
barrierInstruction1264    const Pseudo_barrier_instruction& barrier() const noexcept
1265    {
1266       assert(isBarrier());
1267       return *(Pseudo_barrier_instruction*)this;
1268    }
isBarrierInstruction1269    constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; }
reductionInstruction1270    Pseudo_reduction_instruction& reduction() noexcept
1271    {
1272       assert(isReduction());
1273       return *(Pseudo_reduction_instruction*)this;
1274    }
reductionInstruction1275    const Pseudo_reduction_instruction& reduction() const noexcept
1276    {
1277       assert(isReduction());
1278       return *(Pseudo_reduction_instruction*)this;
1279    }
isReductionInstruction1280    constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
isVOP3PInstruction1281    constexpr bool isVOP3P() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3P; }
vinterp_inregInstruction1282    VINTERP_inreg_instruction& vinterp_inreg() noexcept
1283    {
1284       assert(isVINTERP_INREG());
1285       return *(VINTERP_inreg_instruction*)this;
1286    }
vinterp_inregInstruction1287    const VINTERP_inreg_instruction& vinterp_inreg() const noexcept
1288    {
1289       assert(isVINTERP_INREG());
1290       return *(VINTERP_inreg_instruction*)this;
1291    }
isVINTERP_INREGInstruction1292    constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
vopdInstruction1293    VOPD_instruction& vopd() noexcept
1294    {
1295       assert(isVOPD());
1296       return *(VOPD_instruction*)this;
1297    }
vopdInstruction1298    const VOPD_instruction& vopd() const noexcept
1299    {
1300       assert(isVOPD());
1301       return *(VOPD_instruction*)this;
1302    }
isVOPDInstruction1303    constexpr bool isVOPD() const noexcept { return format == Format::VOPD; }
isVOP1Instruction1304    constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
isVOP2Instruction1305    constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
isVOPCInstruction1306    constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
isVOP3Instruction1307    constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
vintrpInstruction1308    VINTRP_instruction& vintrp() noexcept
1309    {
1310       assert(isVINTRP());
1311       return *(VINTRP_instruction*)this;
1312    }
vintrpInstruction1313    const VINTRP_instruction& vintrp() const noexcept
1314    {
1315       assert(isVINTRP());
1316       return *(VINTRP_instruction*)this;
1317    }
isVINTRPInstruction1318    constexpr bool isVINTRP() const noexcept { return format == Format::VINTRP; }
dpp16Instruction1319    DPP16_instruction& dpp16() noexcept
1320    {
1321       assert(isDPP16());
1322       return *(DPP16_instruction*)this;
1323    }
dpp16Instruction1324    const DPP16_instruction& dpp16() const noexcept
1325    {
1326       assert(isDPP16());
1327       return *(DPP16_instruction*)this;
1328    }
isDPP16Instruction1329    constexpr bool isDPP16() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP16; }
dpp8Instruction1330    DPP8_instruction& dpp8() noexcept
1331    {
1332       assert(isDPP8());
1333       return *(DPP8_instruction*)this;
1334    }
dpp8Instruction1335    const DPP8_instruction& dpp8() const noexcept
1336    {
1337       assert(isDPP8());
1338       return *(DPP8_instruction*)this;
1339    }
isDPP8Instruction1340    constexpr bool isDPP8() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP8; }
isDPPInstruction1341    constexpr bool isDPP() const noexcept { return isDPP16() || isDPP8(); }
sdwaInstruction1342    SDWA_instruction& sdwa() noexcept
1343    {
1344       assert(isSDWA());
1345       return *(SDWA_instruction*)this;
1346    }
sdwaInstruction1347    const SDWA_instruction& sdwa() const noexcept
1348    {
1349       assert(isSDWA());
1350       return *(SDWA_instruction*)this;
1351    }
isSDWAInstruction1352    constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; }
1353 
flatlikeInstruction1354    FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; }
1355 
flatlikeInstruction1356    const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; }
1357 
isFlatLikeInstruction1358    constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); }
1359 
valuInstruction1360    VALU_instruction& valu() noexcept
1361    {
1362       assert(isVALU());
1363       return *(VALU_instruction*)this;
1364    }
valuInstruction1365    const VALU_instruction& valu() const noexcept
1366    {
1367       assert(isVALU());
1368       return *(VALU_instruction*)this;
1369    }
isVALUInstruction1370    constexpr bool isVALU() const noexcept
1371    {
1372       return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG() ||
1373              isVOPD();
1374    }
1375 
saluInstruction1376    SALU_instruction& salu() noexcept
1377    {
1378       assert(isSALU());
1379       return *(SALU_instruction*)this;
1380    }
saluInstruction1381    const SALU_instruction& salu() const noexcept
1382    {
1383       assert(isSALU());
1384       return *(SALU_instruction*)this;
1385    }
isSALUInstruction1386    constexpr bool isSALU() const noexcept
1387    {
1388       return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP();
1389    }
1390 
isVMEMInstruction1391    constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); }
1392 
1393    bool accessesLDS() const noexcept;
1394    bool isTrans() const noexcept;
1395 };
1396 static_assert(sizeof(Instruction) == 16, "Unexpected padding");
1397 
1398 struct SALU_instruction : public Instruction {
1399    /* In case of SOPP branch instructions, contains the Block index,
1400     * and otherwise, for SOPP and SOPK the 16-bit signed immediate.
1401     */
1402    uint32_t imm;
1403 };
1404 static_assert(sizeof(SALU_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1405 
1406 /**
1407  * Scalar Memory Format:
1408  * For s_(buffer_)load_dword*:
1409  * Operand(0): SBASE - SGPR-pair which provides base address
1410  * Operand(1): Offset - immediate (un)signed offset or SGPR
1411  * Operand(2) / Definition(0): SDATA - SGPR for read / write result
1412  * Operand(n-1): SOffset - SGPR offset (Vega only)
1413  *
1414  * Having no operands is also valid for instructions such as s_dcache_inv.
1415  *
1416  */
1417 struct SMEM_instruction : public Instruction {
1418    memory_sync_info sync;
1419    ac_hw_cache_flags cache;
1420 };
1421 static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1422 
1423 struct VALU_instruction : public Instruction {
1424    union {
1425       bitfield_array8<uint32_t, 0, 3> neg;    /* VOP3, SDWA, DPP16, v_fma_mix, VINTERP_inreg */
1426       bitfield_array8<uint32_t, 0, 3> neg_lo; /* VOP3P */
1427 
1428       bitfield_array8<uint32_t, 3, 3> abs;    /* VOP3, SDWA, DPP16, v_fma_mix */
1429       bitfield_array8<uint32_t, 3, 3> neg_hi; /* VOP3P */
1430 
1431       bitfield_array8<uint32_t, 6, 4> opsel;     /* VOP3, VOPC12(GFX11+), VINTERP_inreg */
1432       bitfield_uint8<uint32_t, 10, 2> omod;      /* VOP3, SDWA(GFX9+) */
1433       bitfield_array8<uint32_t, 12, 3> opsel_lo; /* VOP3P */
1434       bitfield_array8<uint32_t, 15, 3> opsel_hi; /* VOP3P */
1435       bitfield_bool<uint32_t, 18> clamp;         /* VOP3, VOP3P, SDWA, VINTERP_inreg */
1436    };
1437 
1438    void swapOperands(unsigned idx0, unsigned idx1);
1439 };
1440 static_assert(sizeof(VALU_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1441 
1442 struct VINTERP_inreg_instruction : public VALU_instruction {
1443    uint8_t wait_exp : 3;
1444    uint8_t padding3 : 5;
1445    uint8_t padding4;
1446    uint8_t padding5;
1447    uint8_t padding6;
1448 };
1449 static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
1450               "Unexpected padding");
1451 
1452 struct VOPD_instruction : public VALU_instruction {
1453    aco_opcode opy;
1454    uint16_t padding;
1455 };
1456 static_assert(sizeof(VOPD_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1457 
1458 /**
1459  * Data Parallel Primitives Format:
1460  * This format can be used for VOP1, VOP2 or VOPC instructions.
1461  * The swizzle applies to the src0 operand.
1462  *
1463  */
1464 struct DPP16_instruction : public VALU_instruction {
1465    uint16_t dpp_ctrl;
1466    uint8_t row_mask : 4;
1467    uint8_t bank_mask : 4;
1468    bool bound_ctrl : 1;
1469    uint8_t fetch_inactive : 1;
1470    uint8_t padding3 : 6;
1471 };
1472 static_assert(sizeof(DPP16_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1473 
1474 struct DPP8_instruction : public VALU_instruction {
1475    uint32_t lane_sel : 24;
1476    uint32_t fetch_inactive : 1;
1477    uint32_t padding : 7;
1478 };
1479 static_assert(sizeof(DPP8_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1480 
1481 struct SubdwordSel {
1482    enum sdwa_sel : uint8_t {
1483       ubyte = 0x4,
1484       uword = 0x8,
1485       dword = 0x10,
1486       sext = 0x20,
1487       sbyte = ubyte | sext,
1488       sword = uword | sext,
1489 
1490       ubyte0 = ubyte,
1491       ubyte1 = ubyte | 1,
1492       ubyte2 = ubyte | 2,
1493       ubyte3 = ubyte | 3,
1494       sbyte0 = sbyte,
1495       sbyte1 = sbyte | 1,
1496       sbyte2 = sbyte | 2,
1497       sbyte3 = sbyte | 3,
1498       uword0 = uword,
1499       uword1 = uword | 2,
1500       sword0 = sword,
1501       sword1 = sword | 2,
1502    };
1503 
SubdwordSelSubdwordSel1504    SubdwordSel() : sel((sdwa_sel)0) {}
SubdwordSelSubdwordSel1505    constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {}
SubdwordSelSubdwordSel1506    constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend)
1507        : sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset))
1508    {}
sdwa_selSubdwordSel1509    constexpr operator sdwa_sel() const { return sel; }
1510    explicit operator bool() const { return sel != 0; }
1511 
sizeSubdwordSel1512    constexpr unsigned size() const { return (sel >> 2) & 0x7; }
offsetSubdwordSel1513    constexpr unsigned offset() const { return sel & 0x3; }
sign_extendSubdwordSel1514    constexpr bool sign_extend() const { return sel & sext; }
to_sdwa_selSubdwordSel1515    constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const
1516    {
1517       reg_byte_offset += offset();
1518       if (size() == 1)
1519          return reg_byte_offset;
1520       else if (size() == 2)
1521          return 4 + (reg_byte_offset >> 1);
1522       else
1523          return 6;
1524    }
1525 
1526 private:
1527    sdwa_sel sel;
1528 };
1529 
1530 /**
1531  * Sub-Dword Addressing Format:
1532  * This format can be used for VOP1, VOP2 or VOPC instructions.
1533  *
1534  * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
1535  * the definition doesn't have to be VCC on GFX9+.
1536  *
1537  */
1538 struct SDWA_instruction : public VALU_instruction {
1539    /* these destination modifiers aren't available with VOPC except for
1540     * clamp on GFX8 */
1541    SubdwordSel sel[2];
1542    SubdwordSel dst_sel;
1543    uint8_t padding3;
1544 };
1545 static_assert(sizeof(SDWA_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1546 
1547 struct VINTRP_instruction : public Instruction {
1548    uint8_t attribute;
1549    uint8_t component;
1550    bool high_16bits;
1551    uint8_t padding;
1552 };
1553 static_assert(sizeof(VINTRP_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1554 
1555 /**
1556  * Local and Global Data Sharing instructions
1557  * Operand(0): ADDR - VGPR which supplies the address.
1558  * Operand(1): DATA0 - First data VGPR.
1559  * Operand(2): DATA1 - Second data VGPR.
1560  * Operand(n-1): M0 - LDS size.
1561  * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
1562  *
1563  */
1564 struct DS_instruction : public Instruction {
1565    memory_sync_info sync;
1566    bool gds;
1567    uint16_t offset0;
1568    uint8_t offset1;
1569    uint8_t padding;
1570 };
1571 static_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1572 
1573 /**
1574  * LDS Direct instructions
1575  * Operand(0): M0
1576  * Definition(0): VDST - Destination VGPR
1577  */
1578 struct LDSDIR_instruction : public Instruction {
1579    memory_sync_info sync;
1580    uint8_t attr : 6;
1581    uint8_t attr_chan : 2;
1582    uint32_t wait_vdst : 4;
1583    uint32_t wait_vsrc : 1;
1584    uint32_t padding : 27;
1585 };
1586 static_assert(sizeof(LDSDIR_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1587 
1588 /**
1589  * Vector Memory Untyped-buffer Instructions
1590  * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1591  * Operand(1): VADDR - Address source. Can carry an index and/or offset
1592  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1593  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1594  *
1595  */
1596 struct MUBUF_instruction : public Instruction {
1597    memory_sync_info sync;
1598    ac_hw_cache_flags cache;
1599    bool offen : 1;           /* Supply an offset from VGPR (VADDR) */
1600    bool idxen : 1;           /* Supply an index from VGPR (VADDR) */
1601    bool addr64 : 1;          /* SI, CIK: Address size is 64-bit */
1602    bool tfe : 1;             /* texture fail enable */
1603    bool lds : 1;             /* Return read-data to LDS instead of VGPRs */
1604    bool disable_wqm : 1;     /* Require an exec mask without helper invocations */
1605    uint8_t padding0 : 2;
1606    uint8_t padding1;
1607    uint16_t offset; /* Unsigned byte offset - 12 bit */
1608 };
1609 static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1610 
1611 /**
1612  * Vector Memory Typed-buffer Instructions
1613  * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1614  * Operand(1): VADDR - Address source. Can carry an index and/or offset
1615  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1616  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1617  *
1618  */
1619 struct MTBUF_instruction : public Instruction {
1620    memory_sync_info sync;
1621    ac_hw_cache_flags cache;
1622    uint8_t dfmt : 4;         /* Data Format of data in memory buffer */
1623    uint8_t nfmt : 3;         /* Numeric format of data in memory */
1624    bool offen : 1;           /* Supply an offset from VGPR (VADDR) */
1625    bool idxen : 1;           /* Supply an index from VGPR (VADDR) */
1626    bool tfe : 1;             /* texture fail enable */
1627    bool disable_wqm : 1;     /* Require an exec mask without helper invocations */
1628    uint8_t padding : 5;
1629    uint16_t offset; /* Unsigned byte offset - 12 bit */
1630 };
1631 static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1632 
1633 /**
1634  * Vector Memory Image Instructions
1635  * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
1636  * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
1637  * Operand(2): VDATA - Vector GPR for write data or zero if TFE/LWE=1.
1638  * Operand(3): VADDR - Address source. Can carry an offset or an index.
1639  * Definition(0): VDATA - Vector GPR for read result.
1640  *
1641  */
1642 struct MIMG_instruction : public Instruction {
1643    memory_sync_info sync;
1644    ac_hw_cache_flags cache;
1645    uint8_t dmask;        /* Data VGPR enable mask */
1646    uint8_t dim : 3;      /* NAVI: dimensionality */
1647    bool unrm : 1;        /* Force address to be un-normalized */
1648    bool tfe : 1;         /* texture fail enable */
1649    bool da : 1;          /* declare an array */
1650    bool lwe : 1;         /* LOD warning enable */
1651    bool r128 : 1;        /* NAVI: Texture resource size */
1652    bool a16 : 1;         /* VEGA, NAVI: Address components are 16-bits */
1653    bool d16 : 1;         /* Convert 32-bit data to 16-bit data */
1654    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1655    bool strict_wqm : 1;  /* VADDR is a linear VGPR and additional VGPRs may be copied into it */
1656    uint8_t padding0 : 4;
1657    uint8_t padding1;
1658 };
1659 static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1660 
1661 /**
1662  * Flat/Scratch/Global Instructions
1663  * Operand(0): ADDR
1664  * Operand(1): SADDR
1665  * Operand(2) / Definition(0): DATA/VDST
1666  *
1667  */
1668 struct FLAT_instruction : public Instruction {
1669    memory_sync_info sync;
1670    ac_hw_cache_flags cache;
1671    bool lds : 1;
1672    bool nv : 1;
1673    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1674    uint8_t padding0 : 5;
1675    uint8_t padding1;
1676    int16_t offset; /* Vega/Navi only */
1677 };
1678 static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1679 
1680 struct Export_instruction : public Instruction {
1681    uint8_t enabled_mask;
1682    uint8_t dest;
1683    bool compressed : 1;
1684    bool done : 1;
1685    bool valid_mask : 1;
1686    bool row_en : 1;
1687    uint8_t padding0 : 4;
1688    uint8_t padding1;
1689 };
1690 static_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1691 
1692 struct Pseudo_instruction : public Instruction {
1693    PhysReg scratch_sgpr; /* might not be valid if it's not needed */
1694    bool tmp_in_scc;
1695    bool needs_scratch_reg; /* if scratch_sgpr/scc can be written, initialized by RA. */
1696 };
1697 static_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1698 
1699 struct Pseudo_branch_instruction : public Instruction {
1700    /* target[0] is the block index of the branch target.
1701     * For conditional branches, target[1] contains the fall-through alternative.
1702     * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
1703     */
1704    uint32_t target[2];
1705 
1706    /* Indicates that this rarely or never jumps to target[0]. */
1707    bool rarely_taken;
1708    bool never_taken;
1709 
1710    uint16_t padding;
1711 };
1712 static_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 12, "Unexpected padding");
1713 
1714 struct Pseudo_barrier_instruction : public Instruction {
1715    memory_sync_info sync;
1716    sync_scope exec_scope;
1717 };
1718 static_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1719 
1720 enum ReduceOp : uint16_t {
1721    // clang-format off
1722    iadd8, iadd16, iadd32, iadd64,
1723    imul8, imul16, imul32, imul64,
1724           fadd16, fadd32, fadd64,
1725           fmul16, fmul32, fmul64,
1726    imin8, imin16, imin32, imin64,
1727    imax8, imax16, imax32, imax64,
1728    umin8, umin16, umin32, umin64,
1729    umax8, umax16, umax32, umax64,
1730           fmin16, fmin32, fmin64,
1731           fmax16, fmax32, fmax64,
1732    iand8, iand16, iand32, iand64,
1733    ior8, ior16, ior32, ior64,
1734    ixor8, ixor16, ixor32, ixor64,
1735    num_reduce_ops,
1736    // clang-format on
1737 };
1738 
1739 /**
1740  * Subgroup Reduction Instructions, everything except for the data to be
1741  * reduced and the result as inserted by setup_reduce_temp().
1742  * Operand(0): data to be reduced
1743  * Operand(1): reduce temporary
1744  * Operand(2): vector temporary
1745  * Definition(0): result
1746  * Definition(1): scalar temporary
1747  * Definition(2): scalar identity temporary (not used to store identity on GFX10)
1748  * Definition(3): scc clobber
1749  * Definition(4): vcc clobber
1750  *
1751  */
1752 struct Pseudo_reduction_instruction : public Instruction {
1753    ReduceOp reduce_op;
1754    uint16_t cluster_size; // must be 0 for scans
1755 };
1756 static_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4,
1757               "Unexpected padding");
1758 
1759 inline bool
accessesLDS()1760 Instruction::accessesLDS() const noexcept
1761 {
1762    return (isDS() && !ds().gds) || isLDSDIR() || isVINTRP();
1763 }
1764 
1765 inline void
swapOperands(unsigned idx0,unsigned idx1)1766 VALU_instruction::swapOperands(unsigned idx0, unsigned idx1)
1767 {
1768    if (this->isSDWA() && idx0 != idx1) {
1769       assert(idx0 < 2 && idx1 < 2);
1770       std::swap(this->sdwa().sel[0], this->sdwa().sel[1]);
1771    }
1772    assert(idx0 < 3 && idx1 < 3);
1773    std::swap(this->operands[idx0], this->operands[idx1]);
1774    this->neg[idx0].swap(this->neg[idx1]);
1775    this->abs[idx0].swap(this->abs[idx1]);
1776    this->opsel[idx0].swap(this->opsel[idx1]);
1777    this->opsel_lo[idx0].swap(this->opsel_lo[idx1]);
1778    this->opsel_hi[idx0].swap(this->opsel_hi[idx1]);
1779 }
1780 
1781 struct instr_deleter_functor {
1782    /* Don't yet free any instructions. They will be de-allocated
1783     * all at once after compilation finished.
1784     */
operatorinstr_deleter_functor1785    void operator()(void* p) { return; }
1786 };
1787 
1788 template <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
1789 
1790 size_t get_instr_data_size(Format format);
1791 
1792 Instruction* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
1793                                 uint32_t num_definitions);
1794 
1795 constexpr bool
usesModifiers()1796 Instruction::usesModifiers() const noexcept
1797 {
1798    if (isDPP() || isSDWA())
1799       return true;
1800 
1801    if (isVOP3P()) {
1802       const VALU_instruction& vop3p = this->valu();
1803       /* opsel_hi must be 1 to not be considered a modifier - even for constants */
1804       return vop3p.opsel_lo || vop3p.clamp || vop3p.neg_lo || vop3p.neg_hi ||
1805              (vop3p.opsel_hi & BITFIELD_MASK(operands.size())) != BITFIELD_MASK(operands.size());
1806    } else if (isVALU()) {
1807       const VALU_instruction& vop3 = this->valu();
1808       return vop3.opsel || vop3.clamp || vop3.omod || vop3.abs || vop3.neg;
1809    }
1810    return false;
1811 }
1812 
1813 constexpr bool
is_phi(Instruction * instr)1814 is_phi(Instruction* instr)
1815 {
1816    return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1817 }
1818 
1819 static inline bool
is_phi(aco_ptr<Instruction> & instr)1820 is_phi(aco_ptr<Instruction>& instr)
1821 {
1822    return is_phi(instr.get());
1823 }
1824 
1825 bool is_wait_export_ready(amd_gfx_level gfx_level, const Instruction* instr);
1826 memory_sync_info get_sync_info(const Instruction* instr);
1827 
1828 inline bool
is_dead(const std::vector<uint16_t> & uses,const Instruction * instr)1829 is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
1830 {
1831    if (instr->definitions.empty() || instr->isBranch() || instr->opcode == aco_opcode::p_startpgm ||
1832        instr->opcode == aco_opcode::p_init_scratch ||
1833        instr->opcode == aco_opcode::p_dual_src_export_gfx11)
1834       return false;
1835 
1836    if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
1837                    [&uses](const Definition& def) { return !def.isTemp() || uses[def.tempId()]; }))
1838       return false;
1839 
1840    return !(get_sync_info(instr).semantics & (semantic_volatile | semantic_acqrel));
1841 }
1842 
1843 bool can_use_input_modifiers(amd_gfx_level gfx_level, aco_opcode op, int idx);
1844 bool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx);
1845 bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op);
1846 uint8_t get_gfx11_true16_mask(aco_opcode op);
1847 bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra);
1848 bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8);
1849 bool can_write_m0(const aco_ptr<Instruction>& instr);
1850 /* updates "instr" and returns the old instruction (or NULL if no update was needed) */
1851 aco_ptr<Instruction> convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr);
1852 aco_ptr<Instruction> convert_to_DPP(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr,
1853                                     bool dpp8);
1854 bool needs_exec_mask(const Instruction* instr);
1855 
1856 aco_opcode get_vcmp_inverse(aco_opcode op);
1857 aco_opcode get_vcmp_swapped(aco_opcode op);
1858 aco_opcode get_vcmpx(aco_opcode op);
1859 bool is_cmpx(aco_opcode op);
1860 
1861 bool can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op, unsigned idx0 = 0,
1862                        unsigned idx1 = 1);
1863 
1864 uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
1865 
1866 unsigned get_mimg_nsa_dwords(const Instruction* instr);
1867 
1868 unsigned get_vopd_opy_start(const Instruction* instr);
1869 
1870 unsigned get_operand_size(aco_ptr<Instruction>& instr, unsigned index);
1871 
1872 bool should_form_clause(const Instruction* a, const Instruction* b);
1873 
1874 enum vmem_type : uint8_t {
1875    vmem_nosampler = 1 << 0,
1876    vmem_sampler = 1 << 1,
1877    vmem_bvh = 1 << 2,
1878 };
1879 
1880 /* VMEM instructions of the same type return in-order. For GFX12+, this determines which counter
1881  * is used.
1882  */
1883 uint8_t get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr);
1884 
1885 unsigned parse_vdst_wait(Instruction* instr);
1886 
1887 enum block_kind {
1888    /* uniform indicates that leaving this block,
1889     * all actives lanes stay active */
1890    block_kind_uniform = 1 << 0,
1891    block_kind_top_level = 1 << 1,
1892    block_kind_loop_preheader = 1 << 2,
1893    block_kind_loop_header = 1 << 3,
1894    block_kind_loop_exit = 1 << 4,
1895    block_kind_continue = 1 << 5,
1896    block_kind_break = 1 << 6,
1897    block_kind_continue_or_break = 1 << 7,
1898    block_kind_branch = 1 << 8,
1899    block_kind_merge = 1 << 9,
1900    block_kind_invert = 1 << 10,
1901    block_kind_discard_early_exit = 1 << 11,
1902    block_kind_uses_discard = 1 << 12,
1903    block_kind_resume = 1 << 13,
1904    block_kind_export_end = 1 << 14,
1905    block_kind_end_with_regs = 1 << 15,
1906 };
1907 
1908 /* CFG */
1909 struct Block {
1910    using edge_vec = small_vec<uint32_t, 2>;
1911 
1912    float_mode fp_mode;
1913    unsigned index;
1914    unsigned offset = 0;
1915    std::vector<aco_ptr<Instruction>> instructions;
1916    edge_vec logical_preds;
1917    edge_vec linear_preds;
1918    edge_vec logical_succs;
1919    edge_vec linear_succs;
1920    RegisterDemand register_demand = RegisterDemand();
1921    RegisterDemand live_in_demand = RegisterDemand();
1922    uint32_t kind = 0;
1923    int32_t logical_idom = -1;
1924    int32_t linear_idom = -1;
1925 
1926    /* Preorder and postorder traversal indices of the dominance tree. Because a program can have
1927     * several dominance trees (because of block_kind_resume), these start at the block index of the
1928     * root node. */
1929    uint32_t logical_dom_pre_index = 0;
1930    uint32_t logical_dom_post_index = 0;
1931    uint32_t linear_dom_pre_index = 0;
1932    uint32_t linear_dom_post_index = 0;
1933 
1934    uint16_t loop_nest_depth = 0;
1935    uint16_t divergent_if_logical_depth = 0;
1936    uint16_t uniform_if_depth = 0;
1937 
1938    /* this information is needed for predecessors to blocks with phis when
1939     * moving out of ssa */
1940    bool scc_live_out = false;
1941 
BlockBlock1942    Block() : index(0) {}
1943 };
1944 
1945 /*
1946  * Shader stages as provided in Vulkan by the application. Contrast this to HWStage.
1947  */
1948 enum class SWStage : uint16_t {
1949    None = 0,
1950    VS = 1 << 0,  /* Vertex Shader */
1951    GS = 1 << 1,  /* Geometry Shader */
1952    TCS = 1 << 2, /* Tessellation Control aka Hull Shader */
1953    TES = 1 << 3, /* Tessellation Evaluation aka Domain Shader */
1954    FS = 1 << 4,  /* Fragment aka Pixel Shader */
1955    CS = 1 << 5,  /* Compute Shader */
1956    TS = 1 << 6,  /* Task Shader */
1957    MS = 1 << 7,  /* Mesh Shader */
1958    RT = 1 << 8,  /* Raytracing Shader */
1959 
1960    /* Stage combinations merged to run on a single HWStage */
1961    VS_GS = VS | GS,
1962    VS_TCS = VS | TCS,
1963    TES_GS = TES | GS,
1964 };
1965 
1966 constexpr SWStage
1967 operator|(SWStage a, SWStage b)
1968 {
1969    return static_cast<SWStage>(static_cast<uint16_t>(a) | static_cast<uint16_t>(b));
1970 }
1971 
1972 /*
1973  * Set of SWStages to be merged into a single shader paired with the
1974  * HWStage it will run on.
1975  */
1976 struct Stage {
1977    constexpr Stage() = default;
1978 
StageStage1979    explicit constexpr Stage(ac_hw_stage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
1980 
1981    /* Check if the given SWStage is included */
hasStage1982    constexpr bool has(SWStage stage) const
1983    {
1984       return (static_cast<uint16_t>(sw) & static_cast<uint16_t>(stage));
1985    }
1986 
num_sw_stagesStage1987    unsigned num_sw_stages() const { return util_bitcount(static_cast<uint16_t>(sw)); }
1988 
1989    constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; }
1990 
1991    constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; }
1992 
1993    /* Mask of merged software stages */
1994    SWStage sw = SWStage::None;
1995 
1996    /* Active hardware stage */
1997    ac_hw_stage hw{};
1998 };
1999 
2000 /* possible settings of Program::stage */
2001 static constexpr Stage vertex_vs(AC_HW_VERTEX_SHADER, SWStage::VS);
2002 static constexpr Stage fragment_fs(AC_HW_PIXEL_SHADER, SWStage::FS);
2003 static constexpr Stage compute_cs(AC_HW_COMPUTE_SHADER, SWStage::CS);
2004 static constexpr Stage tess_eval_vs(AC_HW_VERTEX_SHADER, SWStage::TES);
2005 /* Mesh shading pipeline */
2006 static constexpr Stage task_cs(AC_HW_COMPUTE_SHADER, SWStage::TS);
2007 static constexpr Stage mesh_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::MS);
2008 /* GFX10/NGG */
2009 static constexpr Stage vertex_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS);
2010 static constexpr Stage vertex_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS_GS);
2011 static constexpr Stage tess_eval_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES);
2012 static constexpr Stage tess_eval_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES_GS);
2013 /* GFX9 (and GFX10 if NGG isn't used) */
2014 static constexpr Stage vertex_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::VS_GS);
2015 static constexpr Stage vertex_tess_control_hs(AC_HW_HULL_SHADER, SWStage::VS_TCS);
2016 static constexpr Stage tess_eval_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::TES_GS);
2017 /* pre-GFX9 */
2018 static constexpr Stage vertex_ls(AC_HW_LOCAL_SHADER,
2019                                  SWStage::VS); /* vertex before tessellation control */
2020 static constexpr Stage vertex_es(AC_HW_EXPORT_SHADER, SWStage::VS); /* vertex before geometry */
2021 static constexpr Stage tess_control_hs(AC_HW_HULL_SHADER, SWStage::TCS);
2022 static constexpr Stage tess_eval_es(AC_HW_EXPORT_SHADER,
2023                                     SWStage::TES); /* tessellation evaluation before geometry */
2024 static constexpr Stage geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::GS);
2025 /* Raytracing */
2026 static constexpr Stage raytracing_cs(AC_HW_COMPUTE_SHADER, SWStage::RT);
2027 
2028 struct DeviceInfo {
2029    uint16_t lds_encoding_granule;
2030    uint16_t lds_alloc_granule;
2031    uint32_t lds_limit; /* in bytes */
2032    bool has_16bank_lds;
2033    uint16_t physical_sgprs;
2034    uint16_t physical_vgprs;
2035    uint16_t vgpr_limit;
2036    uint16_t sgpr_limit;
2037    uint16_t sgpr_alloc_granule;
2038    uint16_t vgpr_alloc_granule;
2039    unsigned scratch_alloc_granule;
2040    uint16_t max_waves_per_simd;
2041    unsigned simd_per_cu;
2042    bool has_fast_fma32 = false;
2043    bool has_mac_legacy32 = false;
2044    bool has_fmac_legacy32 = false;
2045    bool fused_mad_mix = false;
2046    bool xnack_enabled = false;
2047    bool sram_ecc_enabled = false;
2048 
2049    int16_t scratch_global_offset_min;
2050    int16_t scratch_global_offset_max;
2051    unsigned max_nsa_vgprs;
2052 };
2053 
2054 enum class CompilationProgress {
2055    after_isel,
2056    after_spilling,
2057    after_ra,
2058    after_lower_to_hw,
2059 };
2060 
2061 class Program final {
2062 public:
2063    aco::monotonic_buffer_resource m{65536};
2064    std::vector<Block> blocks;
2065    std::vector<RegClass> temp_rc = {s1};
2066    RegisterDemand max_reg_demand = RegisterDemand();
2067    ac_shader_config* config;
2068    struct aco_shader_info info;
2069    enum amd_gfx_level gfx_level;
2070    enum radeon_family family;
2071    DeviceInfo dev;
2072    unsigned wave_size;
2073    RegClass lane_mask;
2074    Stage stage;
2075    bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
2076    bool needs_wqm = false;   /* there exists a p_wqm instruction */
2077    bool has_smem_buffer_or_global_loads = false;
2078    bool has_pops_overlapped_waves_wait = false;
2079    bool has_color_exports = false;
2080    bool is_prolog = false;
2081    bool is_epilog = false;
2082 
2083    std::vector<uint8_t> constant_data;
2084    Temp private_segment_buffer;
2085    Temp scratch_offset;
2086 
2087    uint16_t num_waves = 0;
2088    uint16_t min_waves = 0;
2089    unsigned workgroup_size; /* if known; otherwise UINT_MAX */
2090    bool wgp_mode;
2091 
2092    bool needs_vcc = false;
2093 
2094    CompilationProgress progress;
2095 
2096    bool collect_statistics = false;
2097    uint32_t statistics[aco_num_statistics];
2098 
2099    float_mode next_fp_mode;
2100    unsigned next_loop_depth = 0;
2101    unsigned next_divergent_if_logical_depth = 0;
2102    unsigned next_uniform_if_depth = 0;
2103 
2104    std::vector<Definition> args_pending_vmem;
2105 
2106    /* For shader part with previous shader part that has lds access. */
2107    bool pending_lds_access = false;
2108 
2109    struct {
2110       monotonic_buffer_resource memory;
2111       /* live-in temps per block */
2112       std::vector<IDSet> live_in;
2113    } live;
2114 
2115    struct {
2116       FILE* output = stderr;
2117       bool shorten_messages = false;
2118       void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
2119       void* private_data;
2120    } debug;
2121 
allocateId(RegClass rc)2122    uint32_t allocateId(RegClass rc)
2123    {
2124       assert(allocationID <= 16777215);
2125       temp_rc.push_back(rc);
2126       return allocationID++;
2127    }
2128 
allocateRange(unsigned amount)2129    void allocateRange(unsigned amount)
2130    {
2131       assert(allocationID + amount <= 16777216);
2132       temp_rc.resize(temp_rc.size() + amount);
2133       allocationID += amount;
2134    }
2135 
allocateTmp(RegClass rc)2136    Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); }
2137 
peekAllocationId()2138    uint32_t peekAllocationId() { return allocationID; }
2139 
2140    friend void reindex_ssa(Program* program, bool update_live_out);
2141 
create_and_insert_block()2142    Block* create_and_insert_block()
2143    {
2144       Block block;
2145       return insert_block(std::move(block));
2146    }
2147 
insert_block(Block && block)2148    Block* insert_block(Block&& block)
2149    {
2150       block.index = blocks.size();
2151       block.fp_mode = next_fp_mode;
2152       block.loop_nest_depth = next_loop_depth;
2153       block.divergent_if_logical_depth = next_divergent_if_logical_depth;
2154       block.uniform_if_depth = next_uniform_if_depth;
2155       blocks.emplace_back(std::move(block));
2156       return &blocks.back();
2157    }
2158 
2159 private:
2160    uint32_t allocationID = 1;
2161 };
2162 
2163 struct ra_test_policy {
2164    /* Force RA to always use its pessimistic fallback algorithm */
2165    bool skip_optimistic_path = false;
2166 };
2167 
2168 void init();
2169 
2170 void init_program(Program* program, Stage stage, const struct aco_shader_info* info,
2171                   enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
2172                   ac_shader_config* config);
2173 
2174 void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
2175                     ac_shader_config* config, const struct aco_compiler_options* options,
2176                     const struct aco_shader_info* info, const struct ac_shader_args* args);
2177 void select_trap_handler_shader(Program* program, struct nir_shader* shader,
2178                                 ac_shader_config* config,
2179                                 const struct aco_compiler_options* options,
2180                                 const struct aco_shader_info* info,
2181                                 const struct ac_shader_args* args);
2182 void select_rt_prolog(Program* program, ac_shader_config* config,
2183                       const struct aco_compiler_options* options,
2184                       const struct aco_shader_info* info, const struct ac_shader_args* in_args,
2185                       const struct ac_shader_args* out_args);
2186 void select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo,
2187                       ac_shader_config* config, const struct aco_compiler_options* options,
2188                       const struct aco_shader_info* info, const struct ac_shader_args* args);
2189 
2190 void select_ps_epilog(Program* program, void* pinfo, ac_shader_config* config,
2191                       const struct aco_compiler_options* options,
2192                       const struct aco_shader_info* info, const struct ac_shader_args* args);
2193 
2194 void select_ps_prolog(Program* program, void* pinfo, ac_shader_config* config,
2195                       const struct aco_compiler_options* options,
2196                       const struct aco_shader_info* info, const struct ac_shader_args* args);
2197 
2198 void lower_phis(Program* program);
2199 void lower_subdword(Program* program);
2200 void calc_min_waves(Program* program);
2201 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
2202 void live_var_analysis(Program* program);
2203 std::vector<uint16_t> dead_code_analysis(Program* program);
2204 void dominator_tree(Program* program);
2205 void insert_exec_mask(Program* program);
2206 void value_numbering(Program* program);
2207 void optimize(Program* program);
2208 void optimize_postRA(Program* program);
2209 void setup_reduce_temp(Program* program);
2210 void lower_to_cssa(Program* program);
2211 void register_allocation(Program* program, ra_test_policy = {});
2212 void ssa_elimination(Program* program);
2213 void lower_to_hw_instr(Program* program);
2214 void schedule_program(Program* program);
2215 void schedule_ilp(Program* program);
2216 void schedule_vopd(Program* program);
2217 void spill(Program* program);
2218 void insert_waitcnt(Program* program);
2219 void insert_delay_alu(Program* program);
2220 void combine_delay_alu(Program* program);
2221 bool dealloc_vgprs(Program* program);
2222 void insert_NOPs(Program* program);
2223 void form_hard_clauses(Program* program);
2224 unsigned emit_program(Program* program, std::vector<uint32_t>& code,
2225                       std::vector<struct aco_symbol>* symbols = NULL, bool append_endpgm = true);
2226 /**
2227  * Returns true if print_asm can disassemble the given program for the current build/runtime
2228  * configuration
2229  */
2230 bool check_print_asm_support(Program* program);
2231 bool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output);
2232 bool validate_ir(Program* program);
2233 bool validate_cfg(Program* program);
2234 bool validate_ra(Program* program);
2235 bool validate_live_vars(Program* program);
2236 
2237 void collect_presched_stats(Program* program);
2238 void collect_preasm_stats(Program* program);
2239 void collect_postasm_stats(Program* program, const std::vector<uint32_t>& code);
2240 
2241 struct Instruction_cycle_info {
2242    /* Latency until the result is ready (if not needing a waitcnt) */
2243    unsigned latency;
2244 
2245    /* How many cycles issuing this instruction takes (i.e. cycles till the next instruction can be
2246     * issued)*/
2247    unsigned issue_cycles;
2248 };
2249 
2250 Instruction_cycle_info get_cycle_info(const Program& program, const Instruction& instr);
2251 
2252 enum print_flags {
2253    print_no_ssa = 0x1,
2254    print_perf_info = 0x2,
2255    print_kill = 0x4,
2256    print_live_vars = 0x8,
2257 };
2258 
2259 void aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0);
2260 void aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
2261                      unsigned flags = 0);
2262 void aco_print_program(const Program* program, FILE* output, unsigned flags = 0);
2263 
2264 void _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...);
2265 
2266 #define aco_err(program, ...)      _aco_err(program, __FILE__, __LINE__, __VA_ARGS__)
2267 
2268 int get_op_fixed_to_def(Instruction* instr);
2269 
2270 /* utilities for dealing with register demand */
2271 RegisterDemand get_live_changes(Instruction* instr);
2272 RegisterDemand get_temp_registers(Instruction* instr);
2273 
2274 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
2275 uint16_t get_extra_sgprs(Program* program);
2276 
2277 /* adjust num_waves for workgroup size and LDS limits */
2278 uint16_t max_suitable_waves(Program* program, uint16_t waves);
2279 
2280 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
2281 uint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs);
2282 uint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs);
2283 
2284 /* return number of addressable sgprs/vgprs for max_waves */
2285 uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
2286 uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
2287 
2288 bool uses_scratch(Program* program);
2289 
2290 inline bool
dominates_logical(const Block & parent,const Block & child)2291 dominates_logical(const Block& parent, const Block& child)
2292 {
2293    return child.logical_dom_pre_index >= parent.logical_dom_pre_index &&
2294           child.logical_dom_post_index <= parent.logical_dom_post_index;
2295 }
2296 
2297 inline bool
dominates_linear(const Block & parent,const Block & child)2298 dominates_linear(const Block& parent, const Block& child)
2299 {
2300    return child.linear_dom_pre_index >= parent.linear_dom_pre_index &&
2301           child.linear_dom_post_index <= parent.linear_dom_post_index;
2302 }
2303 
2304 typedef struct {
2305    const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
2306    const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
2307    const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
2308    const int16_t opcode_gfx11[static_cast<int>(aco_opcode::num_opcodes)];
2309    const int16_t opcode_gfx12[static_cast<int>(aco_opcode::num_opcodes)];
2310    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
2311    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
2312    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
2313    const char* name[static_cast<int>(aco_opcode::num_opcodes)];
2314    const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
2315    /* sizes used for input/output modifiers and constants */
2316    const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
2317    const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
2318    const uint32_t definitions[static_cast<int>(aco_opcode::num_opcodes)];
2319    const uint32_t operands[static_cast<int>(aco_opcode::num_opcodes)];
2320 } Info;
2321 
2322 extern const Info instr_info;
2323 
2324 } // namespace aco
2325 
2326 #endif /* ACO_IR_H */
2327