xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/agx_compiler.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * Copyright 2020 Collabora Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #pragma once
8 
9 #include "compiler/nir/nir.h"
10 #include "util/half_float.h"
11 #include "util/u_dynarray.h"
12 #include "util/u_math.h"
13 #include "util/u_worklist.h"
14 #include "agx_compile.h"
15 #include "agx_minifloat.h"
16 #include "agx_opcodes.h"
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 /* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
23 #define AGX_NUM_REGS (256)
24 
25 /* u0-u255 inclusive, as pairs of 16-bits */
26 #define AGX_NUM_UNIFORMS (512)
27 
28 /* Semi-arbitrary limit for spill slot allocation */
29 #define AGX_NUM_MODELED_REGS (2048)
30 
31 /* Limit on number of sources for non-phi instructions */
32 #define AGX_MAX_NORMAL_SOURCES (16)
33 
34 enum agx_index_type {
35    AGX_INDEX_NULL = 0,
36    AGX_INDEX_NORMAL = 1,
37    AGX_INDEX_IMMEDIATE = 2,
38    AGX_INDEX_UNIFORM = 3,
39    AGX_INDEX_REGISTER = 4,
40    AGX_INDEX_UNDEF = 5,
41 };
42 
43 enum agx_size { AGX_SIZE_16 = 0, AGX_SIZE_32 = 1, AGX_SIZE_64 = 2 };
44 
45 static inline unsigned
agx_size_align_16(enum agx_size size)46 agx_size_align_16(enum agx_size size)
47 {
48    switch (size) {
49    case AGX_SIZE_16:
50       return 1;
51    case AGX_SIZE_32:
52       return 2;
53    case AGX_SIZE_64:
54       return 4;
55    }
56 
57    unreachable("Invalid size");
58 }
59 
60 /* Keep synced with hash_index */
61 typedef struct {
62    /* Sufficient for as many SSA values, immediates, and uniforms as we need. */
63    uint32_t value;
64 
65    /* Indicates that this source kills the referenced value (because it is the
66     * last use in a block and the source is not live after the block). Set by
67     * liveness analysis.
68     */
69    bool kill : 1;
70 
71    /* Cache hints */
72    bool cache   : 1;
73    bool discard : 1;
74 
75    /* src - float modifiers */
76    bool abs : 1;
77    bool neg : 1;
78 
79    /* Register class */
80    bool memory : 1;
81 
82    unsigned channels_m1     : 3;
83    enum agx_size size       : 2;
84    enum agx_index_type type : 3;
85    unsigned padding         : 18;
86 } agx_index;
87 
88 static inline unsigned
agx_channels(agx_index idx)89 agx_channels(agx_index idx)
90 {
91    return idx.channels_m1 + 1;
92 }
93 
94 static inline unsigned
agx_index_size_16(agx_index idx)95 agx_index_size_16(agx_index idx)
96 {
97    return agx_size_align_16(idx.size) * agx_channels(idx);
98 }
99 
100 static inline agx_index
agx_get_vec_index(unsigned value,enum agx_size size,unsigned channels)101 agx_get_vec_index(unsigned value, enum agx_size size, unsigned channels)
102 {
103    return (agx_index){
104       .value = value,
105       .channels_m1 = channels - 1,
106       .size = size,
107       .type = AGX_INDEX_NORMAL,
108    };
109 }
110 
111 static inline agx_index
agx_get_index(unsigned value,enum agx_size size)112 agx_get_index(unsigned value, enum agx_size size)
113 {
114    return agx_get_vec_index(value, size, 1);
115 }
116 
117 static inline agx_index
agx_immediate(uint32_t imm)118 agx_immediate(uint32_t imm)
119 {
120    assert(imm < (1 << 16) && "overflowed immediate");
121 
122    return (agx_index){
123       .value = imm,
124       .size = AGX_SIZE_16,
125       .type = AGX_INDEX_IMMEDIATE,
126    };
127 }
128 
129 static inline agx_index
agx_immediate_f(float f)130 agx_immediate_f(float f)
131 {
132    assert(agx_minifloat_exact(f));
133    return agx_immediate(agx_minifloat_encode(f));
134 }
135 
136 /* in half-words, specify r0h as 1, r1 as 2... */
137 static inline agx_index
agx_register(uint32_t imm,enum agx_size size)138 agx_register(uint32_t imm, enum agx_size size)
139 {
140    assert(imm < AGX_NUM_REGS);
141 
142    return (agx_index){
143       .value = imm,
144       .size = size,
145       .type = AGX_INDEX_REGISTER,
146    };
147 }
148 
149 static inline agx_index
agx_memory_register(uint32_t imm,enum agx_size size)150 agx_memory_register(uint32_t imm, enum agx_size size)
151 {
152    return (agx_index){
153       .value = imm,
154       .memory = true,
155       .size = size,
156       .type = AGX_INDEX_REGISTER,
157    };
158 }
159 
160 static inline agx_index
agx_register_like(uint32_t imm,agx_index like)161 agx_register_like(uint32_t imm, agx_index like)
162 {
163    return (agx_index){
164       .value = imm,
165       .memory = like.memory,
166       .channels_m1 = like.channels_m1,
167       .size = like.size,
168       .type = AGX_INDEX_REGISTER,
169    };
170 }
171 
172 static inline agx_index
agx_undef(enum agx_size size)173 agx_undef(enum agx_size size)
174 {
175    return (agx_index){
176       .size = size,
177       .type = AGX_INDEX_UNDEF,
178    };
179 }
180 
181 /* Also in half-words */
182 static inline agx_index
agx_uniform(uint32_t imm,enum agx_size size)183 agx_uniform(uint32_t imm, enum agx_size size)
184 {
185    assert(imm < AGX_NUM_UNIFORMS);
186 
187    return (agx_index){
188       .value = imm,
189       .size = size,
190       .type = AGX_INDEX_UNIFORM,
191    };
192 }
193 
194 static inline agx_index
agx_null()195 agx_null()
196 {
197    return (agx_index){.type = AGX_INDEX_NULL};
198 }
199 
200 static inline agx_index
agx_zero()201 agx_zero()
202 {
203    return agx_immediate(0);
204 }
205 
206 /* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
207  * = exponent = 0, sign bit set */
208 
209 static inline agx_index
agx_negzero()210 agx_negzero()
211 {
212    return agx_immediate(0x80);
213 }
214 
215 static inline agx_index
agx_abs(agx_index idx)216 agx_abs(agx_index idx)
217 {
218    idx.abs = true;
219    idx.neg = false;
220    return idx;
221 }
222 
223 static inline agx_index
agx_neg(agx_index idx)224 agx_neg(agx_index idx)
225 {
226    idx.neg ^= true;
227    return idx;
228 }
229 
230 /* Replaces an index, preserving any modifiers */
231 
232 static inline agx_index
agx_replace_index(agx_index old,agx_index replacement)233 agx_replace_index(agx_index old, agx_index replacement)
234 {
235    replacement.abs = old.abs;
236    replacement.neg = old.neg;
237    return replacement;
238 }
239 
240 static inline bool
agx_is_null(agx_index idx)241 agx_is_null(agx_index idx)
242 {
243    return idx.type == AGX_INDEX_NULL;
244 }
245 
246 /* Compares equivalence as references */
247 
248 static inline bool
agx_is_equiv(agx_index left,agx_index right)249 agx_is_equiv(agx_index left, agx_index right)
250 {
251    return (left.type == right.type) && (left.value == right.value);
252 }
253 
254 enum agx_icond {
255    AGX_ICOND_UEQ = 0,
256    AGX_ICOND_ULT = 1,
257    AGX_ICOND_UGT = 2,
258    /* unknown */
259    AGX_ICOND_SEQ = 4,
260    AGX_ICOND_SLT = 5,
261    AGX_ICOND_SGT = 6,
262    /* unknown */
263 };
264 
265 enum agx_fcond {
266    AGX_FCOND_EQ = 0,
267    AGX_FCOND_LT = 1,
268    AGX_FCOND_GT = 2,
269    AGX_FCOND_LTN = 3,
270    /* unknown */
271    AGX_FCOND_GE = 5,
272    AGX_FCOND_LE = 6,
273    AGX_FCOND_GTN = 7,
274 };
275 
276 enum agx_round {
277    AGX_ROUND_RTZ = 0,
278    AGX_ROUND_RTE = 1,
279 };
280 
281 enum agx_convert {
282    AGX_CONVERT_U8_TO_F = 0,
283    AGX_CONVERT_S8_TO_F = 1,
284    AGX_CONVERT_F_TO_U16 = 4,
285    AGX_CONVERT_F_TO_S16 = 5,
286    AGX_CONVERT_U16_TO_F = 6,
287    AGX_CONVERT_S16_TO_F = 7,
288    AGX_CONVERT_F_TO_U32 = 8,
289    AGX_CONVERT_F_TO_S32 = 9,
290    AGX_CONVERT_U32_TO_F = 10,
291    AGX_CONVERT_S32_TO_F = 11
292 };
293 
294 enum agx_lod_mode {
295    AGX_LOD_MODE_AUTO_LOD = 0,
296    AGX_LOD_MODE_AUTO_LOD_BIAS_UNIFORM = 1,
297    AGX_LOD_MODE_LOD_MIN_UNIFORM = 2,
298    AGX_LOD_MODE_AUTO_LOD_BIAS = 5,
299    AGX_LOD_MODE_LOD_GRAD = 4,
300    AGX_LOD_MODE_LOD_MIN = 6,
301    AGX_LOD_MODE_AUTO_LOD_BIAS_MIN_UNIFORM = 9,
302    AGX_LOD_MODE_LOD_GRAD_MIN = 12,
303    AGX_LOD_MODE_AUTO_LOD_BIAS_MIN = 13,
304 };
305 
306 /* Forward declare for branch target */
307 struct agx_block;
308 
309 /* Keep synced with hash_instr */
310 typedef struct {
311    /* Must be first */
312    struct list_head link;
313 
314    /* The sources list. */
315    agx_index *src;
316 
317    /* Data flow */
318    agx_index *dest;
319 
320    enum agx_opcode op;
321 
322    uint8_t nr_dests;
323    uint8_t nr_srcs;
324 
325    /* TODO: More efficient */
326    union {
327       enum agx_icond icond;
328       enum agx_fcond fcond;
329    };
330 
331    union {
332       uint64_t imm;
333       uint32_t writeout;
334       uint32_t truth_table;
335       uint32_t component;
336       uint32_t channels;
337       uint32_t bfi_mask;
338       uint16_t pixel_offset;
339       uint16_t zs;
340       int16_t stack_size;
341       enum agx_sr sr;
342       enum agx_round round;
343       enum agx_atomic_opc atomic_opc;
344       enum agx_lod_mode lod_mode;
345       enum agx_simd_op simd_op;
346       struct agx_block *target;
347 
348       /* As a special case to workaround ordering issues when translating phis,
349        * if nr_srcs == 0 and the opcode is PHI, points to the NIR phi.
350        */
351       nir_phi_instr *phi;
352    };
353 
354    /* For local access */
355    enum agx_format format;
356 
357    /* Number of nested control flow layers to jump by. TODO: Optimize */
358    uint32_t nest;
359 
360    /* Invert icond/fcond */
361    bool invert_cond : 1;
362 
363    /* TODO: Handle tex ops more efficient */
364    enum agx_dim dim       : 4;
365    bool offset            : 1;
366    bool shadow            : 1;
367    bool query_lod         : 1;
368    enum agx_gather gather : 3;
369 
370    /* TODO: Handle tilebuffer ops more efficient */
371    bool explicit_coords : 1;
372 
373    /* TODO: Handle iter ops more efficient */
374    enum agx_interpolation interpolation : 2;
375 
376    /* Final st_vary op */
377    bool last : 1;
378 
379    /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
380    unsigned shift : 4;
381 
382    /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
383     * scoreboarding (everything but memory load/store and texturing). */
384    unsigned scoreboard : 1;
385 
386    /* Output modifiers */
387    bool saturate : 1;
388    unsigned mask : 4;
389 
390    unsigned padding : 8;
391 } agx_instr;
392 
393 static inline void
agx_replace_src(agx_instr * I,unsigned src_index,agx_index replacement)394 agx_replace_src(agx_instr *I, unsigned src_index, agx_index replacement)
395 {
396    I->src[src_index] = agx_replace_index(I->src[src_index], replacement);
397 }
398 
399 struct agx_block;
400 
401 typedef struct agx_block {
402    /* Link to next block. Must be first */
403    struct list_head link;
404 
405    /* List of instructions emitted for the current block */
406    struct list_head instructions;
407 
408    /* Index of the block in source order */
409    unsigned index;
410 
411    /* Control flow graph */
412    struct agx_block *successors[2];
413    struct util_dynarray predecessors;
414    bool unconditional_jumps;
415 
416    /* Liveness analysis results */
417    BITSET_WORD *live_in;
418    BITSET_WORD *live_out;
419 
420    /* For visited blocks during register assignment and live-out registers, the
421     * mapping of registers to SSA names at the end of the block. This is dense,
422     * unlike its inverse.
423     */
424    uint32_t *reg_to_ssa_out[2];
425 
426    /* Is this block a loop header? If not, all of its predecessors precede it in
427     * source order.
428     */
429    bool loop_header;
430 
431    /* Offset of the block in the emitted binary */
432    off_t offset, last_offset;
433 
434    /** Available for passes to use for metadata */
435    uint8_t pass_flags;
436 } agx_block;
437 
438 typedef struct {
439    nir_shader *nir;
440    gl_shader_stage stage;
441    bool is_preamble;
442    unsigned scratch_size;
443 
444    struct list_head blocks; /* list of agx_block */
445    struct agx_shader_info *out;
446    struct agx_shader_key *key;
447 
448    /* Maximum block index */
449    unsigned num_blocks;
450 
451    /* For creating temporaries */
452    unsigned alloc;
453 
454    /* Does the shader statically use scratch memory? */
455    bool any_scratch;
456 
457    /* I don't really understand how writeout ops work yet */
458    bool did_writeout;
459 
460    /* Has r0l been zeroed yet due to control flow? */
461    bool any_cf;
462 
463    /* Do we need r0h zero throughout the program to handle quad-divergent
464     * shuffle?
465     */
466    bool any_quad_divergent_shuffle;
467 
468    /* Number of nested control flow structures within the innermost loop. Since
469     * NIR is just loop and if-else, this is the number of nested if-else
470     * statements in the loop */
471    unsigned loop_nesting;
472 
473    /* Total nesting across all loops, to determine if we need push_exec */
474    unsigned total_nesting;
475 
476    /* Whether loop being emitted used any `continue` jumps */
477    bool loop_continues;
478 
479    /* During instruction selection, for inserting control flow */
480    agx_block *current_block;
481    agx_block *continue_block;
482    agx_block *break_block;
483    agx_block *after_block;
484    agx_block **indexed_nir_blocks;
485 
486    /* During instruction selection, map from vector agx_index to its scalar
487     * components, populated by a split. */
488    struct hash_table_u64 *allocated_vec;
489 
490    /* During instruction selection, preloaded values or NULL if it hasn't been
491     * preloaded.
492     */
493    agx_index preloaded[AGX_NUM_REGS];
494 
495    /* Beginning of our stack allocation used for spilling, below that is
496     * NIR-level scratch.
497     */
498    unsigned spill_base;
499 
500    /* Beginning of stack allocation used for parallel copy lowering */
501    bool has_spill_pcopy_reserved;
502    unsigned spill_pcopy_base;
503 
504    /* Stats for shader-db */
505    unsigned loop_count;
506    unsigned max_reg;
507 } agx_context;
508 
509 static inline void
agx_remove_instruction(agx_instr * ins)510 agx_remove_instruction(agx_instr *ins)
511 {
512    list_del(&ins->link);
513 }
514 
515 static inline agx_index
agx_vec_temp(agx_context * ctx,enum agx_size size,unsigned channels)516 agx_vec_temp(agx_context *ctx, enum agx_size size, unsigned channels)
517 {
518    return agx_get_vec_index(ctx->alloc++, size, channels);
519 }
520 
521 static inline agx_index
agx_temp(agx_context * ctx,enum agx_size size)522 agx_temp(agx_context *ctx, enum agx_size size)
523 {
524    return agx_get_index(ctx->alloc++, size);
525 }
526 
527 static inline agx_index
agx_temp_like(agx_context * ctx,agx_index idx)528 agx_temp_like(agx_context *ctx, agx_index idx)
529 {
530    idx.value = ctx->alloc++;
531    return idx;
532 }
533 
534 static enum agx_size
agx_size_for_bits(unsigned bits)535 agx_size_for_bits(unsigned bits)
536 {
537    switch (bits) {
538    case 1:
539    case 8:
540    case 16:
541       return AGX_SIZE_16;
542    case 32:
543       return AGX_SIZE_32;
544    case 64:
545       return AGX_SIZE_64;
546    default:
547       unreachable("Invalid bitsize");
548    }
549 }
550 
551 static inline agx_index
agx_def_index(nir_def * ssa)552 agx_def_index(nir_def *ssa)
553 {
554    return agx_get_vec_index(ssa->index, agx_size_for_bits(ssa->bit_size),
555                             ssa->num_components);
556 }
557 
558 static inline agx_index
agx_src_index(nir_src * src)559 agx_src_index(nir_src *src)
560 {
561    return agx_def_index(src->ssa);
562 }
563 
564 static inline agx_index
agx_vec_for_def(agx_context * ctx,nir_def * def)565 agx_vec_for_def(agx_context *ctx, nir_def *def)
566 {
567    return agx_vec_temp(ctx, agx_size_for_bits(def->bit_size),
568                        def->num_components);
569 }
570 
571 static inline agx_index
agx_vec_for_intr(agx_context * ctx,nir_intrinsic_instr * instr)572 agx_vec_for_intr(agx_context *ctx, nir_intrinsic_instr *instr)
573 {
574    return agx_vec_for_def(ctx, &instr->def);
575 }
576 
577 static inline unsigned
agx_num_predecessors(agx_block * block)578 agx_num_predecessors(agx_block *block)
579 {
580    return util_dynarray_num_elements(&block->predecessors, agx_block *);
581 }
582 
583 static inline unsigned
agx_num_successors(agx_block * block)584 agx_num_successors(agx_block *block)
585 {
586    STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
587    return (block->successors[0] ? 1 : 0) + (block->successors[1] ? 1 : 0);
588 }
589 
590 static inline agx_block *
agx_start_block(agx_context * ctx)591 agx_start_block(agx_context *ctx)
592 {
593    agx_block *first = list_first_entry(&ctx->blocks, agx_block, link);
594    assert(agx_num_predecessors(first) == 0);
595    return first;
596 }
597 
598 static inline agx_block *
agx_end_block(agx_context * ctx)599 agx_end_block(agx_context *ctx)
600 {
601    agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
602    assert(agx_num_successors(last) == 0);
603    return last;
604 }
605 
606 void agx_block_add_successor(agx_block *block, agx_block *successor);
607 
608 /* Iterators for AGX IR */
609 
610 #define agx_foreach_block(ctx, v)                                              \
611    list_for_each_entry(agx_block, v, &ctx->blocks, link)
612 
613 #define agx_foreach_block_safe(ctx, v)                                         \
614    list_for_each_entry_safe(agx_block, v, &ctx->blocks, link)
615 
616 #define agx_foreach_block_rev(ctx, v)                                          \
617    list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
618 
619 #define agx_foreach_block_from(ctx, from, v)                                   \
620    list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
621 
622 #define agx_foreach_block_from_rev(ctx, from, v)                               \
623    list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
624 
625 #define agx_foreach_instr_in_block(block, v)                                   \
626    list_for_each_entry(agx_instr, v, &(block)->instructions, link)
627 
628 #define agx_foreach_instr_in_block_rev(block, v)                               \
629    list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
630 
631 #define agx_foreach_instr_in_block_safe(block, v)                              \
632    list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
633 
634 #define agx_foreach_instr_in_block_safe_rev(block, v)                          \
635    list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
636 
637 #define agx_foreach_instr_in_block_from(block, v, from)                        \
638    list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
639 
640 #define agx_foreach_instr_in_block_from_rev(block, v, from)                    \
641    list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions,    \
642                                 link)
643 
644 #define agx_foreach_instr_global(ctx, v)                                       \
645    agx_foreach_block(ctx, v_block)                                             \
646       agx_foreach_instr_in_block(v_block, v)
647 
648 #define agx_foreach_instr_global_rev(ctx, v)                                   \
649    agx_foreach_block_rev(ctx, v_block)                                         \
650       agx_foreach_instr_in_block_rev(v_block, v)
651 
652 #define agx_foreach_instr_global_safe(ctx, v)                                  \
653    agx_foreach_block(ctx, v_block)                                             \
654       agx_foreach_instr_in_block_safe(v_block, v)
655 
656 #define agx_foreach_instr_global_safe_rev(ctx, v)                              \
657    agx_foreach_block_rev(ctx, v_block)                                         \
658       agx_foreach_instr_in_block_safe_rev(v_block, v)
659 
660 /* Based on set_foreach, expanded with automatic type casts */
661 
662 #define agx_foreach_successor(blk, v)                                          \
663    agx_block *v;                                                               \
664    agx_block **_v;                                                             \
665    for (_v = (agx_block **)&blk->successors[0], v = *_v;                       \
666         v != NULL && _v < (agx_block **)&blk->successors[2]; _v++, v = *_v)
667 
668 #define agx_foreach_predecessor(blk, v)                                        \
669    util_dynarray_foreach(&blk->predecessors, agx_block *, v)
670 
671 #define agx_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
672 
673 #define agx_foreach_src_rev(ins, v)                                            \
674    for (signed v = ins->nr_srcs - 1; v >= 0; --v)
675 
676 #define agx_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
677 
678 #define agx_foreach_dest_rev(ins, v)                                           \
679    for (signed v = ins->nr_dests - 1; v >= 0; --v)
680 
681 #define agx_foreach_ssa_src(ins, v)                                            \
682    agx_foreach_src(ins, v)                                                     \
683       if (ins->src[v].type == AGX_INDEX_NORMAL)
684 
685 #define agx_foreach_ssa_src_rev(ins, v)                                        \
686    agx_foreach_src_rev(ins, v)                                                 \
687       if (ins->src[v].type == AGX_INDEX_NORMAL)
688 
689 #define agx_foreach_ssa_dest(ins, v)                                           \
690    agx_foreach_dest(ins, v)                                                    \
691       if (ins->dest[v].type == AGX_INDEX_NORMAL)
692 
693 #define agx_foreach_ssa_dest_rev(ins, v)                                       \
694    agx_foreach_dest_rev(ins, v)                                                \
695       if (ins->dest[v].type == AGX_INDEX_NORMAL)
696 
697 /* Phis only come at the start (after else instructions) so we stop as soon as
698  * we hit a non-phi
699  */
700 #define agx_foreach_phi_in_block(block, v)                                     \
701    agx_foreach_instr_in_block(block, v)                                        \
702       if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP)      \
703          continue;                                                             \
704       else if (v->op != AGX_OPCODE_PHI)                                        \
705          break;                                                                \
706       else
707 
708 #define agx_foreach_phi_in_block_safe(block, v)                                \
709    agx_foreach_instr_in_block_safe(block, v)                                   \
710       if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP)      \
711          continue;                                                             \
712       else if (v->op != AGX_OPCODE_PHI)                                        \
713          break;                                                                \
714       else
715 
716 /*
717  * Find the index of a predecessor, used as the implicit order of phi sources.
718  */
719 static inline unsigned
agx_predecessor_index(agx_block * succ,agx_block * pred)720 agx_predecessor_index(agx_block *succ, agx_block *pred)
721 {
722    unsigned index = 0;
723 
724    agx_foreach_predecessor(succ, x) {
725       if (*x == pred)
726          return index;
727 
728       index++;
729    }
730 
731    unreachable("Invalid predecessor");
732 }
733 
734 static inline agx_block *
agx_prev_block(agx_block * ins)735 agx_prev_block(agx_block *ins)
736 {
737    return list_last_entry(&(ins->link), agx_block, link);
738 }
739 
740 static inline agx_instr *
agx_prev_op(agx_instr * ins)741 agx_prev_op(agx_instr *ins)
742 {
743    return list_last_entry(&(ins->link), agx_instr, link);
744 }
745 
746 static inline agx_instr *
agx_first_instr(agx_block * block)747 agx_first_instr(agx_block *block)
748 {
749    if (list_is_empty(&block->instructions))
750       return NULL;
751    else
752       return list_first_entry(&block->instructions, agx_instr, link);
753 }
754 
755 static inline agx_instr *
agx_last_instr(agx_block * block)756 agx_last_instr(agx_block *block)
757 {
758    if (list_is_empty(&block->instructions))
759       return NULL;
760    else
761       return list_last_entry(&block->instructions, agx_instr, link);
762 }
763 
764 static inline agx_instr *
agx_next_op(agx_instr * ins)765 agx_next_op(agx_instr *ins)
766 {
767    return list_first_entry(&(ins->link), agx_instr, link);
768 }
769 
770 static inline agx_block *
agx_next_block(agx_block * block)771 agx_next_block(agx_block *block)
772 {
773    return list_first_entry(&(block->link), agx_block, link);
774 }
775 
776 static inline agx_block *
agx_exit_block(agx_context * ctx)777 agx_exit_block(agx_context *ctx)
778 {
779    agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
780    assert(!last->successors[0] && !last->successors[1]);
781    return last;
782 }
783 
784 #define agx_worklist_init(ctx, w)        u_worklist_init(w, ctx->num_blocks, ctx)
785 #define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
786 #define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
787 #define agx_worklist_peek_head(w)        u_worklist_peek_head(w, agx_block, index)
788 #define agx_worklist_pop_head(w)         u_worklist_pop_head(w, agx_block, index)
789 #define agx_worklist_peek_tail(w)        u_worklist_peek_tail(w, agx_block, index)
790 #define agx_worklist_pop_tail(w)         u_worklist_pop_tail(w, agx_block, index)
791 
792 /* Like in NIR, for use with the builder */
793 
794 enum agx_cursor_option {
795    agx_cursor_after_block,
796    agx_cursor_before_instr,
797    agx_cursor_after_instr
798 };
799 
800 typedef struct {
801    enum agx_cursor_option option;
802 
803    union {
804       agx_block *block;
805       agx_instr *instr;
806    };
807 } agx_cursor;
808 
809 static inline bool
agx_cursors_equal(agx_cursor a,agx_cursor b)810 agx_cursors_equal(agx_cursor a, agx_cursor b)
811 {
812    if (a.option != b.option)
813       return false;
814 
815    if (a.option == agx_cursor_after_block)
816       return a.block == b.block;
817    else
818       return a.instr == b.instr;
819 }
820 
821 static inline agx_cursor
agx_after_block(agx_block * block)822 agx_after_block(agx_block *block)
823 {
824    return (agx_cursor){
825       .option = agx_cursor_after_block,
826       .block = block,
827    };
828 }
829 
830 static inline agx_cursor
agx_before_instr(agx_instr * instr)831 agx_before_instr(agx_instr *instr)
832 {
833    return (agx_cursor){
834       .option = agx_cursor_before_instr,
835       .instr = instr,
836    };
837 }
838 
839 static inline agx_cursor
agx_after_instr(agx_instr * instr)840 agx_after_instr(agx_instr *instr)
841 {
842    return (agx_cursor){
843       .option = agx_cursor_after_instr,
844       .instr = instr,
845    };
846 }
847 
848 static inline agx_cursor
agx_before_nonempty_block(agx_block * block)849 agx_before_nonempty_block(agx_block *block)
850 {
851    agx_instr *I = list_first_entry(&block->instructions, agx_instr, link);
852    assert(I != NULL);
853 
854    return agx_before_instr(I);
855 }
856 
857 static inline agx_cursor
agx_before_block(agx_block * block)858 agx_before_block(agx_block *block)
859 {
860    if (list_is_empty(&block->instructions))
861       return agx_after_block(block);
862    else
863       return agx_before_nonempty_block(block);
864 }
865 
866 static inline bool
instr_after_logical_end(const agx_instr * I)867 instr_after_logical_end(const agx_instr *I)
868 {
869    switch (I->op) {
870    case AGX_OPCODE_JMP_EXEC_ANY:
871    case AGX_OPCODE_JMP_EXEC_NONE:
872    case AGX_OPCODE_POP_EXEC:
873    case AGX_OPCODE_BREAK:
874    case AGX_OPCODE_IF_ICMP:
875    case AGX_OPCODE_WHILE_ICMP:
876    case AGX_OPCODE_IF_FCMP:
877    case AGX_OPCODE_WHILE_FCMP:
878    case AGX_OPCODE_STOP:
879    case AGX_OPCODE_EXPORT:
880       return true;
881    default:
882       return false;
883    }
884 }
885 
886 /*
887  * Get a cursor inserting at the logical end of the block. In particular, this
888  * is before branches or control flow instructions, which occur after the
889  * logical end but before the physical end.
890  */
891 static inline agx_cursor
agx_after_block_logical(agx_block * block)892 agx_after_block_logical(agx_block *block)
893 {
894    /* Search for the first instruction that's not past the logical end */
895    agx_foreach_instr_in_block_rev(block, I) {
896       if (!instr_after_logical_end(I))
897          return agx_after_instr(I);
898    }
899 
900    /* If we got here, the block is either empty or entirely control flow */
901    return agx_before_block(block);
902 }
903 
904 /* Get a cursor at the start of a function, after any preloads */
905 static inline agx_cursor
agx_before_function(agx_context * ctx)906 agx_before_function(agx_context *ctx)
907 {
908    agx_block *block = agx_start_block(ctx);
909 
910    agx_foreach_instr_in_block(block, I) {
911       if (I->op != AGX_OPCODE_PRELOAD)
912          return agx_before_instr(I);
913    }
914 
915    /* The whole block is preloads, so insert at the end */
916    return agx_after_block(block);
917 }
918 
919 /* IR builder in terms of cursor infrastructure */
920 
921 typedef struct {
922    agx_context *shader;
923    agx_cursor cursor;
924 } agx_builder;
925 
926 static inline agx_builder
agx_init_builder(agx_context * ctx,agx_cursor cursor)927 agx_init_builder(agx_context *ctx, agx_cursor cursor)
928 {
929    return (agx_builder){
930       .shader = ctx,
931       .cursor = cursor,
932    };
933 }
934 
935 /* Insert an instruction at the cursor and move the cursor */
936 
937 static inline void
agx_builder_insert(agx_cursor * cursor,agx_instr * I)938 agx_builder_insert(agx_cursor *cursor, agx_instr *I)
939 {
940    switch (cursor->option) {
941    case agx_cursor_after_instr:
942       list_add(&I->link, &cursor->instr->link);
943       cursor->instr = I;
944       return;
945 
946    case agx_cursor_after_block:
947       list_addtail(&I->link, &cursor->block->instructions);
948       cursor->option = agx_cursor_after_instr;
949       cursor->instr = I;
950       return;
951 
952    case agx_cursor_before_instr:
953       list_addtail(&I->link, &cursor->instr->link);
954       cursor->option = agx_cursor_after_instr;
955       cursor->instr = I;
956       return;
957    }
958 
959    unreachable("Invalid cursor option");
960 }
961 
962 bool agx_instr_accepts_uniform(enum agx_opcode op, unsigned src_index,
963                                unsigned value, enum agx_size size);
964 
965 /* Routines defined for AIR */
966 void agx_print_index(agx_index index, bool is_float, FILE *fp);
967 void agx_print_instr(const agx_instr *I, FILE *fp);
968 void agx_print_block(const agx_block *block, FILE *fp);
969 void agx_print_shader(const agx_context *ctx, FILE *fp);
970 void agx_optimizer(agx_context *ctx);
971 void agx_lower_divergent_shuffle(agx_context *ctx);
972 void agx_lower_pseudo(agx_context *ctx);
973 void agx_lower_spill(agx_context *ctx);
974 void agx_lower_uniform_sources(agx_context *ctx);
975 void agx_opt_cse(agx_context *ctx);
976 void agx_opt_compact_constants(agx_context *ctx);
977 void agx_opt_promote_constants(agx_context *ctx);
978 void agx_dce(agx_context *ctx, bool partial);
979 void agx_pressure_schedule(agx_context *ctx);
980 void agx_spill(agx_context *ctx, unsigned k);
981 void agx_repair_ssa(agx_context *ctx);
982 void agx_reindex_ssa(agx_context *ctx);
983 void agx_ra(agx_context *ctx);
984 void agx_lower_64bit_postra(agx_context *ctx);
985 void agx_insert_waits(agx_context *ctx);
986 void agx_opt_empty_else(agx_context *ctx);
987 void agx_opt_break_if(agx_context *ctx);
988 void agx_opt_jmp_none(agx_context *ctx);
989 void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
990 
991 #ifndef NDEBUG
992 void agx_validate(agx_context *ctx, const char *after_str);
993 #else
994 static inline void
agx_validate(UNUSED agx_context * ctx,UNUSED const char * after_str)995 agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str)
996 {
997    return;
998 }
999 #endif
1000 
1001 enum agx_size agx_split_width(const agx_instr *I);
1002 bool agx_allows_16bit_immediate(agx_instr *I);
1003 
1004 static inline bool
agx_is_float_src(const agx_instr * I,unsigned s)1005 agx_is_float_src(const agx_instr *I, unsigned s)
1006 {
1007    struct agx_opcode_info info = agx_opcodes_info[I->op];
1008    bool fcmp = (I->op == AGX_OPCODE_FCMPSEL || I->op == AGX_OPCODE_FCMP);
1009 
1010    /* fcmp takes first 2 as floats but returns an integer */
1011    return info.is_float || (s < 2 && fcmp);
1012 }
1013 
1014 struct agx_copy {
1015    /* Base register destination of the copy */
1016    unsigned dest;
1017 
1018    /* Destination is memory */
1019    bool dest_mem;
1020 
1021    /* Source of the copy */
1022    agx_index src;
1023 
1024    /* Whether the copy has been handled. Callers must leave to false. */
1025    bool done;
1026 };
1027 
1028 void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
1029                               unsigned n);
1030 
1031 void agx_compute_liveness(agx_context *ctx);
1032 void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
1033 
1034 bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
1035 bool agx_nir_lower_load_mask(nir_shader *shader);
1036 bool agx_nir_lower_address(nir_shader *shader);
1037 bool agx_nir_lower_ubo(nir_shader *shader);
1038 bool agx_nir_lower_shared_bitsize(nir_shader *shader);
1039 bool agx_nir_lower_frag_sidefx(nir_shader *s);
1040 
1041 struct agx_cycle_estimate {
1042    /* ALU throughput */
1043    unsigned alu;
1044 
1045    /* Floating point and SCIB (select, conditional, integer, and boolean)
1046     * throughput.
1047     */
1048    unsigned f_scib;
1049 
1050    /* IC (Integer and complex) throughput */
1051    unsigned ic;
1052 };
1053 
1054 struct agx_cycle_estimate agx_estimate_cycles(agx_context *ctx);
1055 
1056 extern int agx_compiler_debug;
1057 
1058 #ifdef __cplusplus
1059 } /* extern C */
1060 #endif
1061