1 /*
2 * Copyright 2021 Alyssa Rosenzweig
3 * Copyright 2020 Collabora Ltd.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #pragma once
8
9 #include "compiler/nir/nir.h"
10 #include "util/half_float.h"
11 #include "util/u_dynarray.h"
12 #include "util/u_math.h"
13 #include "util/u_worklist.h"
14 #include "agx_compile.h"
15 #include "agx_minifloat.h"
16 #include "agx_opcodes.h"
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 /* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
23 #define AGX_NUM_REGS (256)
24
25 /* u0-u255 inclusive, as pairs of 16-bits */
26 #define AGX_NUM_UNIFORMS (512)
27
28 /* Semi-arbitrary limit for spill slot allocation */
29 #define AGX_NUM_MODELED_REGS (2048)
30
31 /* Limit on number of sources for non-phi instructions */
32 #define AGX_MAX_NORMAL_SOURCES (16)
33
34 enum agx_index_type {
35 AGX_INDEX_NULL = 0,
36 AGX_INDEX_NORMAL = 1,
37 AGX_INDEX_IMMEDIATE = 2,
38 AGX_INDEX_UNIFORM = 3,
39 AGX_INDEX_REGISTER = 4,
40 AGX_INDEX_UNDEF = 5,
41 };
42
43 enum agx_size { AGX_SIZE_16 = 0, AGX_SIZE_32 = 1, AGX_SIZE_64 = 2 };
44
45 static inline unsigned
agx_size_align_16(enum agx_size size)46 agx_size_align_16(enum agx_size size)
47 {
48 switch (size) {
49 case AGX_SIZE_16:
50 return 1;
51 case AGX_SIZE_32:
52 return 2;
53 case AGX_SIZE_64:
54 return 4;
55 }
56
57 unreachable("Invalid size");
58 }
59
60 /* Keep synced with hash_index */
61 typedef struct {
62 /* Sufficient for as many SSA values, immediates, and uniforms as we need. */
63 uint32_t value;
64
65 /* Indicates that this source kills the referenced value (because it is the
66 * last use in a block and the source is not live after the block). Set by
67 * liveness analysis.
68 */
69 bool kill : 1;
70
71 /* Cache hints */
72 bool cache : 1;
73 bool discard : 1;
74
75 /* src - float modifiers */
76 bool abs : 1;
77 bool neg : 1;
78
79 /* Register class */
80 bool memory : 1;
81
82 unsigned channels_m1 : 3;
83 enum agx_size size : 2;
84 enum agx_index_type type : 3;
85 unsigned padding : 18;
86 } agx_index;
87
88 static inline unsigned
agx_channels(agx_index idx)89 agx_channels(agx_index idx)
90 {
91 return idx.channels_m1 + 1;
92 }
93
94 static inline unsigned
agx_index_size_16(agx_index idx)95 agx_index_size_16(agx_index idx)
96 {
97 return agx_size_align_16(idx.size) * agx_channels(idx);
98 }
99
100 static inline agx_index
agx_get_vec_index(unsigned value,enum agx_size size,unsigned channels)101 agx_get_vec_index(unsigned value, enum agx_size size, unsigned channels)
102 {
103 return (agx_index){
104 .value = value,
105 .channels_m1 = channels - 1,
106 .size = size,
107 .type = AGX_INDEX_NORMAL,
108 };
109 }
110
111 static inline agx_index
agx_get_index(unsigned value,enum agx_size size)112 agx_get_index(unsigned value, enum agx_size size)
113 {
114 return agx_get_vec_index(value, size, 1);
115 }
116
117 static inline agx_index
agx_immediate(uint32_t imm)118 agx_immediate(uint32_t imm)
119 {
120 assert(imm < (1 << 16) && "overflowed immediate");
121
122 return (agx_index){
123 .value = imm,
124 .size = AGX_SIZE_16,
125 .type = AGX_INDEX_IMMEDIATE,
126 };
127 }
128
129 static inline agx_index
agx_immediate_f(float f)130 agx_immediate_f(float f)
131 {
132 assert(agx_minifloat_exact(f));
133 return agx_immediate(agx_minifloat_encode(f));
134 }
135
136 /* in half-words, specify r0h as 1, r1 as 2... */
137 static inline agx_index
agx_register(uint32_t imm,enum agx_size size)138 agx_register(uint32_t imm, enum agx_size size)
139 {
140 assert(imm < AGX_NUM_REGS);
141
142 return (agx_index){
143 .value = imm,
144 .size = size,
145 .type = AGX_INDEX_REGISTER,
146 };
147 }
148
149 static inline agx_index
agx_memory_register(uint32_t imm,enum agx_size size)150 agx_memory_register(uint32_t imm, enum agx_size size)
151 {
152 return (agx_index){
153 .value = imm,
154 .memory = true,
155 .size = size,
156 .type = AGX_INDEX_REGISTER,
157 };
158 }
159
160 static inline agx_index
agx_register_like(uint32_t imm,agx_index like)161 agx_register_like(uint32_t imm, agx_index like)
162 {
163 return (agx_index){
164 .value = imm,
165 .memory = like.memory,
166 .channels_m1 = like.channels_m1,
167 .size = like.size,
168 .type = AGX_INDEX_REGISTER,
169 };
170 }
171
172 static inline agx_index
agx_undef(enum agx_size size)173 agx_undef(enum agx_size size)
174 {
175 return (agx_index){
176 .size = size,
177 .type = AGX_INDEX_UNDEF,
178 };
179 }
180
181 /* Also in half-words */
182 static inline agx_index
agx_uniform(uint32_t imm,enum agx_size size)183 agx_uniform(uint32_t imm, enum agx_size size)
184 {
185 assert(imm < AGX_NUM_UNIFORMS);
186
187 return (agx_index){
188 .value = imm,
189 .size = size,
190 .type = AGX_INDEX_UNIFORM,
191 };
192 }
193
194 static inline agx_index
agx_null()195 agx_null()
196 {
197 return (agx_index){.type = AGX_INDEX_NULL};
198 }
199
200 static inline agx_index
agx_zero()201 agx_zero()
202 {
203 return agx_immediate(0);
204 }
205
206 /* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
207 * = exponent = 0, sign bit set */
208
209 static inline agx_index
agx_negzero()210 agx_negzero()
211 {
212 return agx_immediate(0x80);
213 }
214
215 static inline agx_index
agx_abs(agx_index idx)216 agx_abs(agx_index idx)
217 {
218 idx.abs = true;
219 idx.neg = false;
220 return idx;
221 }
222
223 static inline agx_index
agx_neg(agx_index idx)224 agx_neg(agx_index idx)
225 {
226 idx.neg ^= true;
227 return idx;
228 }
229
230 /* Replaces an index, preserving any modifiers */
231
232 static inline agx_index
agx_replace_index(agx_index old,agx_index replacement)233 agx_replace_index(agx_index old, agx_index replacement)
234 {
235 replacement.abs = old.abs;
236 replacement.neg = old.neg;
237 return replacement;
238 }
239
240 static inline bool
agx_is_null(agx_index idx)241 agx_is_null(agx_index idx)
242 {
243 return idx.type == AGX_INDEX_NULL;
244 }
245
246 /* Compares equivalence as references */
247
248 static inline bool
agx_is_equiv(agx_index left,agx_index right)249 agx_is_equiv(agx_index left, agx_index right)
250 {
251 return (left.type == right.type) && (left.value == right.value);
252 }
253
254 enum agx_icond {
255 AGX_ICOND_UEQ = 0,
256 AGX_ICOND_ULT = 1,
257 AGX_ICOND_UGT = 2,
258 /* unknown */
259 AGX_ICOND_SEQ = 4,
260 AGX_ICOND_SLT = 5,
261 AGX_ICOND_SGT = 6,
262 /* unknown */
263 };
264
265 enum agx_fcond {
266 AGX_FCOND_EQ = 0,
267 AGX_FCOND_LT = 1,
268 AGX_FCOND_GT = 2,
269 AGX_FCOND_LTN = 3,
270 /* unknown */
271 AGX_FCOND_GE = 5,
272 AGX_FCOND_LE = 6,
273 AGX_FCOND_GTN = 7,
274 };
275
276 enum agx_round {
277 AGX_ROUND_RTZ = 0,
278 AGX_ROUND_RTE = 1,
279 };
280
281 enum agx_convert {
282 AGX_CONVERT_U8_TO_F = 0,
283 AGX_CONVERT_S8_TO_F = 1,
284 AGX_CONVERT_F_TO_U16 = 4,
285 AGX_CONVERT_F_TO_S16 = 5,
286 AGX_CONVERT_U16_TO_F = 6,
287 AGX_CONVERT_S16_TO_F = 7,
288 AGX_CONVERT_F_TO_U32 = 8,
289 AGX_CONVERT_F_TO_S32 = 9,
290 AGX_CONVERT_U32_TO_F = 10,
291 AGX_CONVERT_S32_TO_F = 11
292 };
293
294 enum agx_lod_mode {
295 AGX_LOD_MODE_AUTO_LOD = 0,
296 AGX_LOD_MODE_AUTO_LOD_BIAS_UNIFORM = 1,
297 AGX_LOD_MODE_LOD_MIN_UNIFORM = 2,
298 AGX_LOD_MODE_AUTO_LOD_BIAS = 5,
299 AGX_LOD_MODE_LOD_GRAD = 4,
300 AGX_LOD_MODE_LOD_MIN = 6,
301 AGX_LOD_MODE_AUTO_LOD_BIAS_MIN_UNIFORM = 9,
302 AGX_LOD_MODE_LOD_GRAD_MIN = 12,
303 AGX_LOD_MODE_AUTO_LOD_BIAS_MIN = 13,
304 };
305
306 /* Forward declare for branch target */
307 struct agx_block;
308
309 /* Keep synced with hash_instr */
310 typedef struct {
311 /* Must be first */
312 struct list_head link;
313
314 /* The sources list. */
315 agx_index *src;
316
317 /* Data flow */
318 agx_index *dest;
319
320 enum agx_opcode op;
321
322 uint8_t nr_dests;
323 uint8_t nr_srcs;
324
325 /* TODO: More efficient */
326 union {
327 enum agx_icond icond;
328 enum agx_fcond fcond;
329 };
330
331 union {
332 uint64_t imm;
333 uint32_t writeout;
334 uint32_t truth_table;
335 uint32_t component;
336 uint32_t channels;
337 uint32_t bfi_mask;
338 uint16_t pixel_offset;
339 uint16_t zs;
340 int16_t stack_size;
341 enum agx_sr sr;
342 enum agx_round round;
343 enum agx_atomic_opc atomic_opc;
344 enum agx_lod_mode lod_mode;
345 enum agx_simd_op simd_op;
346 struct agx_block *target;
347
348 /* As a special case to workaround ordering issues when translating phis,
349 * if nr_srcs == 0 and the opcode is PHI, points to the NIR phi.
350 */
351 nir_phi_instr *phi;
352 };
353
354 /* For local access */
355 enum agx_format format;
356
357 /* Number of nested control flow layers to jump by. TODO: Optimize */
358 uint32_t nest;
359
360 /* Invert icond/fcond */
361 bool invert_cond : 1;
362
363 /* TODO: Handle tex ops more efficient */
364 enum agx_dim dim : 4;
365 bool offset : 1;
366 bool shadow : 1;
367 bool query_lod : 1;
368 enum agx_gather gather : 3;
369
370 /* TODO: Handle tilebuffer ops more efficient */
371 bool explicit_coords : 1;
372
373 /* TODO: Handle iter ops more efficient */
374 enum agx_interpolation interpolation : 2;
375
376 /* Final st_vary op */
377 bool last : 1;
378
379 /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
380 unsigned shift : 4;
381
382 /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
383 * scoreboarding (everything but memory load/store and texturing). */
384 unsigned scoreboard : 1;
385
386 /* Output modifiers */
387 bool saturate : 1;
388 unsigned mask : 4;
389
390 unsigned padding : 8;
391 } agx_instr;
392
393 static inline void
agx_replace_src(agx_instr * I,unsigned src_index,agx_index replacement)394 agx_replace_src(agx_instr *I, unsigned src_index, agx_index replacement)
395 {
396 I->src[src_index] = agx_replace_index(I->src[src_index], replacement);
397 }
398
399 struct agx_block;
400
401 typedef struct agx_block {
402 /* Link to next block. Must be first */
403 struct list_head link;
404
405 /* List of instructions emitted for the current block */
406 struct list_head instructions;
407
408 /* Index of the block in source order */
409 unsigned index;
410
411 /* Control flow graph */
412 struct agx_block *successors[2];
413 struct util_dynarray predecessors;
414 bool unconditional_jumps;
415
416 /* Liveness analysis results */
417 BITSET_WORD *live_in;
418 BITSET_WORD *live_out;
419
420 /* For visited blocks during register assignment and live-out registers, the
421 * mapping of registers to SSA names at the end of the block. This is dense,
422 * unlike its inverse.
423 */
424 uint32_t *reg_to_ssa_out[2];
425
426 /* Is this block a loop header? If not, all of its predecessors precede it in
427 * source order.
428 */
429 bool loop_header;
430
431 /* Offset of the block in the emitted binary */
432 off_t offset, last_offset;
433
434 /** Available for passes to use for metadata */
435 uint8_t pass_flags;
436 } agx_block;
437
438 typedef struct {
439 nir_shader *nir;
440 gl_shader_stage stage;
441 bool is_preamble;
442 unsigned scratch_size;
443
444 struct list_head blocks; /* list of agx_block */
445 struct agx_shader_info *out;
446 struct agx_shader_key *key;
447
448 /* Maximum block index */
449 unsigned num_blocks;
450
451 /* For creating temporaries */
452 unsigned alloc;
453
454 /* Does the shader statically use scratch memory? */
455 bool any_scratch;
456
457 /* I don't really understand how writeout ops work yet */
458 bool did_writeout;
459
460 /* Has r0l been zeroed yet due to control flow? */
461 bool any_cf;
462
463 /* Do we need r0h zero throughout the program to handle quad-divergent
464 * shuffle?
465 */
466 bool any_quad_divergent_shuffle;
467
468 /* Number of nested control flow structures within the innermost loop. Since
469 * NIR is just loop and if-else, this is the number of nested if-else
470 * statements in the loop */
471 unsigned loop_nesting;
472
473 /* Total nesting across all loops, to determine if we need push_exec */
474 unsigned total_nesting;
475
476 /* Whether loop being emitted used any `continue` jumps */
477 bool loop_continues;
478
479 /* During instruction selection, for inserting control flow */
480 agx_block *current_block;
481 agx_block *continue_block;
482 agx_block *break_block;
483 agx_block *after_block;
484 agx_block **indexed_nir_blocks;
485
486 /* During instruction selection, map from vector agx_index to its scalar
487 * components, populated by a split. */
488 struct hash_table_u64 *allocated_vec;
489
490 /* During instruction selection, preloaded values or NULL if it hasn't been
491 * preloaded.
492 */
493 agx_index preloaded[AGX_NUM_REGS];
494
495 /* Beginning of our stack allocation used for spilling, below that is
496 * NIR-level scratch.
497 */
498 unsigned spill_base;
499
500 /* Beginning of stack allocation used for parallel copy lowering */
501 bool has_spill_pcopy_reserved;
502 unsigned spill_pcopy_base;
503
504 /* Stats for shader-db */
505 unsigned loop_count;
506 unsigned max_reg;
507 } agx_context;
508
509 static inline void
agx_remove_instruction(agx_instr * ins)510 agx_remove_instruction(agx_instr *ins)
511 {
512 list_del(&ins->link);
513 }
514
515 static inline agx_index
agx_vec_temp(agx_context * ctx,enum agx_size size,unsigned channels)516 agx_vec_temp(agx_context *ctx, enum agx_size size, unsigned channels)
517 {
518 return agx_get_vec_index(ctx->alloc++, size, channels);
519 }
520
521 static inline agx_index
agx_temp(agx_context * ctx,enum agx_size size)522 agx_temp(agx_context *ctx, enum agx_size size)
523 {
524 return agx_get_index(ctx->alloc++, size);
525 }
526
527 static inline agx_index
agx_temp_like(agx_context * ctx,agx_index idx)528 agx_temp_like(agx_context *ctx, agx_index idx)
529 {
530 idx.value = ctx->alloc++;
531 return idx;
532 }
533
534 static enum agx_size
agx_size_for_bits(unsigned bits)535 agx_size_for_bits(unsigned bits)
536 {
537 switch (bits) {
538 case 1:
539 case 8:
540 case 16:
541 return AGX_SIZE_16;
542 case 32:
543 return AGX_SIZE_32;
544 case 64:
545 return AGX_SIZE_64;
546 default:
547 unreachable("Invalid bitsize");
548 }
549 }
550
551 static inline agx_index
agx_def_index(nir_def * ssa)552 agx_def_index(nir_def *ssa)
553 {
554 return agx_get_vec_index(ssa->index, agx_size_for_bits(ssa->bit_size),
555 ssa->num_components);
556 }
557
558 static inline agx_index
agx_src_index(nir_src * src)559 agx_src_index(nir_src *src)
560 {
561 return agx_def_index(src->ssa);
562 }
563
564 static inline agx_index
agx_vec_for_def(agx_context * ctx,nir_def * def)565 agx_vec_for_def(agx_context *ctx, nir_def *def)
566 {
567 return agx_vec_temp(ctx, agx_size_for_bits(def->bit_size),
568 def->num_components);
569 }
570
571 static inline agx_index
agx_vec_for_intr(agx_context * ctx,nir_intrinsic_instr * instr)572 agx_vec_for_intr(agx_context *ctx, nir_intrinsic_instr *instr)
573 {
574 return agx_vec_for_def(ctx, &instr->def);
575 }
576
577 static inline unsigned
agx_num_predecessors(agx_block * block)578 agx_num_predecessors(agx_block *block)
579 {
580 return util_dynarray_num_elements(&block->predecessors, agx_block *);
581 }
582
583 static inline unsigned
agx_num_successors(agx_block * block)584 agx_num_successors(agx_block *block)
585 {
586 STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
587 return (block->successors[0] ? 1 : 0) + (block->successors[1] ? 1 : 0);
588 }
589
590 static inline agx_block *
agx_start_block(agx_context * ctx)591 agx_start_block(agx_context *ctx)
592 {
593 agx_block *first = list_first_entry(&ctx->blocks, agx_block, link);
594 assert(agx_num_predecessors(first) == 0);
595 return first;
596 }
597
598 static inline agx_block *
agx_end_block(agx_context * ctx)599 agx_end_block(agx_context *ctx)
600 {
601 agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
602 assert(agx_num_successors(last) == 0);
603 return last;
604 }
605
606 void agx_block_add_successor(agx_block *block, agx_block *successor);
607
608 /* Iterators for AGX IR */
609
610 #define agx_foreach_block(ctx, v) \
611 list_for_each_entry(agx_block, v, &ctx->blocks, link)
612
613 #define agx_foreach_block_safe(ctx, v) \
614 list_for_each_entry_safe(agx_block, v, &ctx->blocks, link)
615
616 #define agx_foreach_block_rev(ctx, v) \
617 list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
618
619 #define agx_foreach_block_from(ctx, from, v) \
620 list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
621
622 #define agx_foreach_block_from_rev(ctx, from, v) \
623 list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
624
625 #define agx_foreach_instr_in_block(block, v) \
626 list_for_each_entry(agx_instr, v, &(block)->instructions, link)
627
628 #define agx_foreach_instr_in_block_rev(block, v) \
629 list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
630
631 #define agx_foreach_instr_in_block_safe(block, v) \
632 list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
633
634 #define agx_foreach_instr_in_block_safe_rev(block, v) \
635 list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
636
637 #define agx_foreach_instr_in_block_from(block, v, from) \
638 list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
639
640 #define agx_foreach_instr_in_block_from_rev(block, v, from) \
641 list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, \
642 link)
643
644 #define agx_foreach_instr_global(ctx, v) \
645 agx_foreach_block(ctx, v_block) \
646 agx_foreach_instr_in_block(v_block, v)
647
648 #define agx_foreach_instr_global_rev(ctx, v) \
649 agx_foreach_block_rev(ctx, v_block) \
650 agx_foreach_instr_in_block_rev(v_block, v)
651
652 #define agx_foreach_instr_global_safe(ctx, v) \
653 agx_foreach_block(ctx, v_block) \
654 agx_foreach_instr_in_block_safe(v_block, v)
655
656 #define agx_foreach_instr_global_safe_rev(ctx, v) \
657 agx_foreach_block_rev(ctx, v_block) \
658 agx_foreach_instr_in_block_safe_rev(v_block, v)
659
660 /* Based on set_foreach, expanded with automatic type casts */
661
662 #define agx_foreach_successor(blk, v) \
663 agx_block *v; \
664 agx_block **_v; \
665 for (_v = (agx_block **)&blk->successors[0], v = *_v; \
666 v != NULL && _v < (agx_block **)&blk->successors[2]; _v++, v = *_v)
667
668 #define agx_foreach_predecessor(blk, v) \
669 util_dynarray_foreach(&blk->predecessors, agx_block *, v)
670
671 #define agx_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
672
673 #define agx_foreach_src_rev(ins, v) \
674 for (signed v = ins->nr_srcs - 1; v >= 0; --v)
675
676 #define agx_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
677
678 #define agx_foreach_dest_rev(ins, v) \
679 for (signed v = ins->nr_dests - 1; v >= 0; --v)
680
681 #define agx_foreach_ssa_src(ins, v) \
682 agx_foreach_src(ins, v) \
683 if (ins->src[v].type == AGX_INDEX_NORMAL)
684
685 #define agx_foreach_ssa_src_rev(ins, v) \
686 agx_foreach_src_rev(ins, v) \
687 if (ins->src[v].type == AGX_INDEX_NORMAL)
688
689 #define agx_foreach_ssa_dest(ins, v) \
690 agx_foreach_dest(ins, v) \
691 if (ins->dest[v].type == AGX_INDEX_NORMAL)
692
693 #define agx_foreach_ssa_dest_rev(ins, v) \
694 agx_foreach_dest_rev(ins, v) \
695 if (ins->dest[v].type == AGX_INDEX_NORMAL)
696
697 /* Phis only come at the start (after else instructions) so we stop as soon as
698 * we hit a non-phi
699 */
700 #define agx_foreach_phi_in_block(block, v) \
701 agx_foreach_instr_in_block(block, v) \
702 if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP) \
703 continue; \
704 else if (v->op != AGX_OPCODE_PHI) \
705 break; \
706 else
707
708 #define agx_foreach_phi_in_block_safe(block, v) \
709 agx_foreach_instr_in_block_safe(block, v) \
710 if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP) \
711 continue; \
712 else if (v->op != AGX_OPCODE_PHI) \
713 break; \
714 else
715
716 /*
717 * Find the index of a predecessor, used as the implicit order of phi sources.
718 */
719 static inline unsigned
agx_predecessor_index(agx_block * succ,agx_block * pred)720 agx_predecessor_index(agx_block *succ, agx_block *pred)
721 {
722 unsigned index = 0;
723
724 agx_foreach_predecessor(succ, x) {
725 if (*x == pred)
726 return index;
727
728 index++;
729 }
730
731 unreachable("Invalid predecessor");
732 }
733
734 static inline agx_block *
agx_prev_block(agx_block * ins)735 agx_prev_block(agx_block *ins)
736 {
737 return list_last_entry(&(ins->link), agx_block, link);
738 }
739
740 static inline agx_instr *
agx_prev_op(agx_instr * ins)741 agx_prev_op(agx_instr *ins)
742 {
743 return list_last_entry(&(ins->link), agx_instr, link);
744 }
745
746 static inline agx_instr *
agx_first_instr(agx_block * block)747 agx_first_instr(agx_block *block)
748 {
749 if (list_is_empty(&block->instructions))
750 return NULL;
751 else
752 return list_first_entry(&block->instructions, agx_instr, link);
753 }
754
755 static inline agx_instr *
agx_last_instr(agx_block * block)756 agx_last_instr(agx_block *block)
757 {
758 if (list_is_empty(&block->instructions))
759 return NULL;
760 else
761 return list_last_entry(&block->instructions, agx_instr, link);
762 }
763
764 static inline agx_instr *
agx_next_op(agx_instr * ins)765 agx_next_op(agx_instr *ins)
766 {
767 return list_first_entry(&(ins->link), agx_instr, link);
768 }
769
770 static inline agx_block *
agx_next_block(agx_block * block)771 agx_next_block(agx_block *block)
772 {
773 return list_first_entry(&(block->link), agx_block, link);
774 }
775
776 static inline agx_block *
agx_exit_block(agx_context * ctx)777 agx_exit_block(agx_context *ctx)
778 {
779 agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
780 assert(!last->successors[0] && !last->successors[1]);
781 return last;
782 }
783
784 #define agx_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
785 #define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
786 #define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
787 #define agx_worklist_peek_head(w) u_worklist_peek_head(w, agx_block, index)
788 #define agx_worklist_pop_head(w) u_worklist_pop_head(w, agx_block, index)
789 #define agx_worklist_peek_tail(w) u_worklist_peek_tail(w, agx_block, index)
790 #define agx_worklist_pop_tail(w) u_worklist_pop_tail(w, agx_block, index)
791
792 /* Like in NIR, for use with the builder */
793
794 enum agx_cursor_option {
795 agx_cursor_after_block,
796 agx_cursor_before_instr,
797 agx_cursor_after_instr
798 };
799
800 typedef struct {
801 enum agx_cursor_option option;
802
803 union {
804 agx_block *block;
805 agx_instr *instr;
806 };
807 } agx_cursor;
808
809 static inline bool
agx_cursors_equal(agx_cursor a,agx_cursor b)810 agx_cursors_equal(agx_cursor a, agx_cursor b)
811 {
812 if (a.option != b.option)
813 return false;
814
815 if (a.option == agx_cursor_after_block)
816 return a.block == b.block;
817 else
818 return a.instr == b.instr;
819 }
820
821 static inline agx_cursor
agx_after_block(agx_block * block)822 agx_after_block(agx_block *block)
823 {
824 return (agx_cursor){
825 .option = agx_cursor_after_block,
826 .block = block,
827 };
828 }
829
830 static inline agx_cursor
agx_before_instr(agx_instr * instr)831 agx_before_instr(agx_instr *instr)
832 {
833 return (agx_cursor){
834 .option = agx_cursor_before_instr,
835 .instr = instr,
836 };
837 }
838
839 static inline agx_cursor
agx_after_instr(agx_instr * instr)840 agx_after_instr(agx_instr *instr)
841 {
842 return (agx_cursor){
843 .option = agx_cursor_after_instr,
844 .instr = instr,
845 };
846 }
847
848 static inline agx_cursor
agx_before_nonempty_block(agx_block * block)849 agx_before_nonempty_block(agx_block *block)
850 {
851 agx_instr *I = list_first_entry(&block->instructions, agx_instr, link);
852 assert(I != NULL);
853
854 return agx_before_instr(I);
855 }
856
857 static inline agx_cursor
agx_before_block(agx_block * block)858 agx_before_block(agx_block *block)
859 {
860 if (list_is_empty(&block->instructions))
861 return agx_after_block(block);
862 else
863 return agx_before_nonempty_block(block);
864 }
865
866 static inline bool
instr_after_logical_end(const agx_instr * I)867 instr_after_logical_end(const agx_instr *I)
868 {
869 switch (I->op) {
870 case AGX_OPCODE_JMP_EXEC_ANY:
871 case AGX_OPCODE_JMP_EXEC_NONE:
872 case AGX_OPCODE_POP_EXEC:
873 case AGX_OPCODE_BREAK:
874 case AGX_OPCODE_IF_ICMP:
875 case AGX_OPCODE_WHILE_ICMP:
876 case AGX_OPCODE_IF_FCMP:
877 case AGX_OPCODE_WHILE_FCMP:
878 case AGX_OPCODE_STOP:
879 case AGX_OPCODE_EXPORT:
880 return true;
881 default:
882 return false;
883 }
884 }
885
886 /*
887 * Get a cursor inserting at the logical end of the block. In particular, this
888 * is before branches or control flow instructions, which occur after the
889 * logical end but before the physical end.
890 */
891 static inline agx_cursor
agx_after_block_logical(agx_block * block)892 agx_after_block_logical(agx_block *block)
893 {
894 /* Search for the first instruction that's not past the logical end */
895 agx_foreach_instr_in_block_rev(block, I) {
896 if (!instr_after_logical_end(I))
897 return agx_after_instr(I);
898 }
899
900 /* If we got here, the block is either empty or entirely control flow */
901 return agx_before_block(block);
902 }
903
904 /* Get a cursor at the start of a function, after any preloads */
905 static inline agx_cursor
agx_before_function(agx_context * ctx)906 agx_before_function(agx_context *ctx)
907 {
908 agx_block *block = agx_start_block(ctx);
909
910 agx_foreach_instr_in_block(block, I) {
911 if (I->op != AGX_OPCODE_PRELOAD)
912 return agx_before_instr(I);
913 }
914
915 /* The whole block is preloads, so insert at the end */
916 return agx_after_block(block);
917 }
918
919 /* IR builder in terms of cursor infrastructure */
920
921 typedef struct {
922 agx_context *shader;
923 agx_cursor cursor;
924 } agx_builder;
925
926 static inline agx_builder
agx_init_builder(agx_context * ctx,agx_cursor cursor)927 agx_init_builder(agx_context *ctx, agx_cursor cursor)
928 {
929 return (agx_builder){
930 .shader = ctx,
931 .cursor = cursor,
932 };
933 }
934
935 /* Insert an instruction at the cursor and move the cursor */
936
937 static inline void
agx_builder_insert(agx_cursor * cursor,agx_instr * I)938 agx_builder_insert(agx_cursor *cursor, agx_instr *I)
939 {
940 switch (cursor->option) {
941 case agx_cursor_after_instr:
942 list_add(&I->link, &cursor->instr->link);
943 cursor->instr = I;
944 return;
945
946 case agx_cursor_after_block:
947 list_addtail(&I->link, &cursor->block->instructions);
948 cursor->option = agx_cursor_after_instr;
949 cursor->instr = I;
950 return;
951
952 case agx_cursor_before_instr:
953 list_addtail(&I->link, &cursor->instr->link);
954 cursor->option = agx_cursor_after_instr;
955 cursor->instr = I;
956 return;
957 }
958
959 unreachable("Invalid cursor option");
960 }
961
962 bool agx_instr_accepts_uniform(enum agx_opcode op, unsigned src_index,
963 unsigned value, enum agx_size size);
964
965 /* Routines defined for AIR */
966 void agx_print_index(agx_index index, bool is_float, FILE *fp);
967 void agx_print_instr(const agx_instr *I, FILE *fp);
968 void agx_print_block(const agx_block *block, FILE *fp);
969 void agx_print_shader(const agx_context *ctx, FILE *fp);
970 void agx_optimizer(agx_context *ctx);
971 void agx_lower_divergent_shuffle(agx_context *ctx);
972 void agx_lower_pseudo(agx_context *ctx);
973 void agx_lower_spill(agx_context *ctx);
974 void agx_lower_uniform_sources(agx_context *ctx);
975 void agx_opt_cse(agx_context *ctx);
976 void agx_opt_compact_constants(agx_context *ctx);
977 void agx_opt_promote_constants(agx_context *ctx);
978 void agx_dce(agx_context *ctx, bool partial);
979 void agx_pressure_schedule(agx_context *ctx);
980 void agx_spill(agx_context *ctx, unsigned k);
981 void agx_repair_ssa(agx_context *ctx);
982 void agx_reindex_ssa(agx_context *ctx);
983 void agx_ra(agx_context *ctx);
984 void agx_lower_64bit_postra(agx_context *ctx);
985 void agx_insert_waits(agx_context *ctx);
986 void agx_opt_empty_else(agx_context *ctx);
987 void agx_opt_break_if(agx_context *ctx);
988 void agx_opt_jmp_none(agx_context *ctx);
989 void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
990
991 #ifndef NDEBUG
992 void agx_validate(agx_context *ctx, const char *after_str);
993 #else
994 static inline void
agx_validate(UNUSED agx_context * ctx,UNUSED const char * after_str)995 agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str)
996 {
997 return;
998 }
999 #endif
1000
1001 enum agx_size agx_split_width(const agx_instr *I);
1002 bool agx_allows_16bit_immediate(agx_instr *I);
1003
1004 static inline bool
agx_is_float_src(const agx_instr * I,unsigned s)1005 agx_is_float_src(const agx_instr *I, unsigned s)
1006 {
1007 struct agx_opcode_info info = agx_opcodes_info[I->op];
1008 bool fcmp = (I->op == AGX_OPCODE_FCMPSEL || I->op == AGX_OPCODE_FCMP);
1009
1010 /* fcmp takes first 2 as floats but returns an integer */
1011 return info.is_float || (s < 2 && fcmp);
1012 }
1013
1014 struct agx_copy {
1015 /* Base register destination of the copy */
1016 unsigned dest;
1017
1018 /* Destination is memory */
1019 bool dest_mem;
1020
1021 /* Source of the copy */
1022 agx_index src;
1023
1024 /* Whether the copy has been handled. Callers must leave to false. */
1025 bool done;
1026 };
1027
1028 void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
1029 unsigned n);
1030
1031 void agx_compute_liveness(agx_context *ctx);
1032 void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
1033
1034 bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
1035 bool agx_nir_lower_load_mask(nir_shader *shader);
1036 bool agx_nir_lower_address(nir_shader *shader);
1037 bool agx_nir_lower_ubo(nir_shader *shader);
1038 bool agx_nir_lower_shared_bitsize(nir_shader *shader);
1039 bool agx_nir_lower_frag_sidefx(nir_shader *s);
1040
1041 struct agx_cycle_estimate {
1042 /* ALU throughput */
1043 unsigned alu;
1044
1045 /* Floating point and SCIB (select, conditional, integer, and boolean)
1046 * throughput.
1047 */
1048 unsigned f_scib;
1049
1050 /* IC (Integer and complex) throughput */
1051 unsigned ic;
1052 };
1053
1054 struct agx_cycle_estimate agx_estimate_cycles(agx_context *ctx);
1055
1056 extern int agx_compiler_debug;
1057
1058 #ifdef __cplusplus
1059 } /* extern C */
1060 #endif
1061