xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_ra_predicates.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3.h"
7 #include "ir3_ra.h"
8 #include "ir3_shader.h"
9 
10 /* Represents a def that is currently live. We keep track of both the pre-RA def
11  * a register refers to and, in case of spilling and reloading, the def of the
12  * reloaded instruction. This allows us to assign reloaded defs to sources and
13  * prevents additional reloads.
14  */
15 struct live_def {
16    /* The pre-RA def. */
17    struct ir3_register *def;
18 
19    /* The reloaded def. NULL if def was not reloaded. */
20    struct ir3_register *reloaded_def;
21 
22    /* Set when used for a src marked first-kill. We cannot immediately free the
23     * register because then it might get reused for another src in the same
24     * instruction. Instead, we free it after an instruction's sources have been
25     * processed.
26     */
27    bool killed;
28 };
29 
30 /* Per-block liveness information. Stores live defs per supported register,
31  * indexed by register component.
32  */
33 struct block_liveness {
34    /* Live-in defs taken from the intersections the block's predecessors
35     * live-out defs.
36     */
37    struct live_def *live_in_defs;
38 
39    /* Currently live defs. Starts from live-in and is updated while processing
40     * the instructions in a block. Contains the live-out defs after the whole
41     * block has been processed.
42     */
43    struct live_def *live_defs;
44 };
45 
46 struct ra_predicates_ctx {
47    struct ir3 *ir;
48    unsigned num_regs;
49    struct ir3_liveness *liveness;
50    struct block_liveness *blocks_liveness;
51 
52    /* Number of precolored defs that have not been processed yet. When this
53     * drops to zero, we can stop trying to avoid allocating p0.x (the only
54     * register currently used for precoloring).
55     */
56    unsigned outstanding_precolored_defs;
57 };
58 
59 static bool
has_free_regs(struct ra_predicates_ctx * ctx,struct block_liveness * live)60 has_free_regs(struct ra_predicates_ctx *ctx, struct block_liveness *live)
61 {
62    for (unsigned i = 0; i < ctx->num_regs; ++i) {
63       if (live->live_defs[i].def == NULL)
64          return true;
65    }
66 
67    return false;
68 }
69 
70 static bool
try_avoid_comp(struct ra_predicates_ctx * ctx,struct block_liveness * live,unsigned comp)71 try_avoid_comp(struct ra_predicates_ctx *ctx, struct block_liveness *live,
72                unsigned comp)
73 {
74    /* Currently, only p0.x is ever used for a precolored register so just try to
75     * avoid that one if we have any precolored defs.
76     */
77    return comp == 0 && ctx->outstanding_precolored_defs > 0;
78 }
79 
80 static bool
reg_is_free(struct ra_predicates_ctx * ctx,struct block_liveness * live,unsigned comp)81 reg_is_free(struct ra_predicates_ctx *ctx, struct block_liveness *live,
82             unsigned comp)
83 {
84    assert(comp < ctx->num_regs);
85 
86    return live->live_defs[comp].def == NULL;
87 }
88 
89 static unsigned
alloc_reg_comp(struct ra_predicates_ctx * ctx,struct block_liveness * live)90 alloc_reg_comp(struct ra_predicates_ctx *ctx, struct block_liveness *live)
91 {
92    for (unsigned i = 0; i < ctx->num_regs; ++i) {
93       if (live->live_defs[i].def == NULL && !try_avoid_comp(ctx, live, i))
94          return i;
95    }
96 
97    for (unsigned i = 0; i < ctx->num_regs; ++i) {
98       if (live->live_defs[i].def == NULL)
99          return i;
100    }
101 
102    unreachable("Reg availability should have been checked before");
103 }
104 
105 static struct live_def *
assign_reg(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def,struct ir3_register * reloaded_def,unsigned comp)106 assign_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
107            struct ir3_register *def, struct ir3_register *reloaded_def,
108            unsigned comp)
109 {
110    assert(comp < ctx->num_regs);
111 
112    struct ir3_register *current_def =
113       (reloaded_def == NULL) ? def : reloaded_def;
114 
115    current_def->num = regid(REG_P0, comp);
116 
117    struct live_def *live_def = &live->live_defs[comp];
118    assert((live_def->def == NULL) && (live_def->reloaded_def == NULL));
119 
120    live_def->def = def;
121    live_def->reloaded_def = reloaded_def;
122    return live_def;
123 }
124 
125 static struct live_def *
alloc_reg(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def,struct ir3_register * reloaded_def)126 alloc_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
127           struct ir3_register *def, struct ir3_register *reloaded_def)
128 {
129    /* Try to assign the precolored register if it's free. If not, use normal
130     * allocation and reload whenever a precolored source needs it.
131     * NOTE: this means we currently only support precolored sources, not dests.
132     */
133    if (def->num != INVALID_REG) {
134       assert(ctx->outstanding_precolored_defs > 0);
135       ctx->outstanding_precolored_defs--;
136       unsigned comp = reg_comp(def);
137 
138       if (reg_is_free(ctx, live, comp))
139          return assign_reg(ctx, live, def, reloaded_def, comp);
140    }
141 
142    unsigned comp = alloc_reg_comp(ctx, live);
143    return assign_reg(ctx, live, def, reloaded_def, comp);
144 }
145 
146 static void
free_reg(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * reg)147 free_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
148          struct ir3_register *reg)
149 {
150    assert((reg->flags & IR3_REG_PREDICATE) && (reg->num != INVALID_REG));
151 
152    unsigned comp = reg_comp(reg);
153    assert(comp < ctx->num_regs);
154 
155    struct live_def *reg_live_def = &live->live_defs[comp];
156    assert((reg_live_def->def == reg) || (reg_live_def->reloaded_def == reg));
157 
158    reg_live_def->def = NULL;
159    reg_live_def->reloaded_def = NULL;
160    reg_live_def->killed = false;
161 }
162 
163 static struct ir3_instruction *
first_non_allocated_use_after(struct ir3_register * def,struct ir3_instruction * after)164 first_non_allocated_use_after(struct ir3_register *def,
165                               struct ir3_instruction *after)
166 {
167    uint32_t first_ip = UINT32_MAX;
168    struct ir3_instruction *first = NULL;
169 
170    foreach_ssa_use (use, def->instr) {
171       if (!ir3_block_dominates(after->block, use->block))
172          continue;
173 
174       /* Do not filter-out after itself. This ensures that if after is a use of
175        * def, def will not get selected to get spilled because there must be
176        * another register with a further first use. We have to ensure that def
177        * doesn't get spilled in this case because otherwise, we might spill a
178        * register used by an earlier source of after.
179        */
180       if (use->ip < after->ip)
181          continue;
182       if (use->ip >= first_ip)
183          continue;
184 
185       first_ip = use->ip;
186       first = use;
187    }
188 
189    return first;
190 }
191 
192 static bool
is_predicate_use(struct ir3_instruction * instr,unsigned src_n)193 is_predicate_use(struct ir3_instruction *instr, unsigned src_n)
194 {
195    if (__is_false_dep(instr, src_n))
196       return false;
197    return ra_reg_is_predicate(instr->srcs[src_n]);
198 }
199 
200 /* Spill a register by simply removing one from the live defs. We don't need to
201  * store its value anywhere since it can be rematerialized (see reload). We
202  * chose the register whose def's first use is the furthest.
203  */
204 static void
spill(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_instruction * spill_location)205 spill(struct ra_predicates_ctx *ctx, struct block_liveness *live,
206       struct ir3_instruction *spill_location)
207 {
208    unsigned furthest_first_use = 0;
209    unsigned spill_reg = ~0;
210 
211    for (unsigned i = 0; i < ctx->num_regs; ++i) {
212       struct ir3_register *candidate = live->live_defs[i].def;
213       assert(candidate != NULL);
214 
215       struct ir3_instruction *first_use =
216          first_non_allocated_use_after(candidate, spill_location);
217 
218       if (first_use == NULL) {
219          spill_reg = i;
220          break;
221       }
222 
223       if (first_use->ip > furthest_first_use) {
224          furthest_first_use = first_use->ip;
225          spill_reg = i;
226       }
227    }
228 
229    assert(spill_reg != ~0);
230 
231    live->live_defs[spill_reg].def = NULL;
232    live->live_defs[spill_reg].reloaded_def = NULL;
233 }
234 
235 static struct live_def *
find_live_def(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def)236 find_live_def(struct ra_predicates_ctx *ctx, struct block_liveness *live,
237               struct ir3_register *def)
238 {
239    for (unsigned i = 0; i < ctx->num_regs; ++i) {
240       struct live_def *live_def = &live->live_defs[i];
241       if (live_def->def == def)
242          return live_def;
243    }
244 
245    return NULL;
246 }
247 
248 static struct ir3_register *
get_def(struct live_def * live_def)249 get_def(struct live_def *live_def)
250 {
251    return live_def->reloaded_def == NULL ? live_def->def
252                                          : live_def->reloaded_def;
253 }
254 
255 /* Reload a def into s specific register, which must be free. Reloading is
256  * implemented by cloning the instruction that produced the def and moving it in
257  * front of the use.
258  */
259 static struct live_def *
reload_into(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def,struct ir3_instruction * use,unsigned comp)260 reload_into(struct ra_predicates_ctx *ctx, struct block_liveness *live,
261             struct ir3_register *def, struct ir3_instruction *use,
262             unsigned comp)
263 {
264    struct ir3_instruction *reloaded_instr = NULL;
265    bool def_is_allocated = !(def->flags & IR3_REG_UNUSED);
266 
267    if (!def_is_allocated && use->block == def->instr->block) {
268       /* If def has not been allocated a register yet, no source is currently
269        * using it. If it's in the same block as the current use, just move it in
270        * front of it.
271        */
272       reloaded_instr = def->instr;
273    } else {
274       /* If the def is either 1) already allocated or 2) in a different block
275        * than the current use, we have to clone it. For 1) because its allocated
276        * register isn't currently live (we wouldn't be reloading it otherwise).
277        * For 2) because it might have other uses in blocks that aren't
278        * successors of the use.
279        */
280       reloaded_instr = ir3_instr_clone(def->instr);
281    }
282 
283    reloaded_instr->block = use->block;
284 
285    /* Keep track of the original def for validation. */
286    reloaded_instr->data = def;
287 
288    ir3_instr_move_before(reloaded_instr, use);
289    struct ir3_register *reloaded_def = reloaded_instr->dsts[0];
290    return assign_reg(ctx, live, def, reloaded_def, comp);
291 }
292 
293 /* Reload a def into a register, spilling one if necessary. */
294 static struct live_def *
reload(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def,struct ir3_instruction * use)295 reload(struct ra_predicates_ctx *ctx, struct block_liveness *live,
296        struct ir3_register *def, struct ir3_instruction *use)
297 {
298    if (!has_free_regs(ctx, live))
299       spill(ctx, live, use);
300 
301    unsigned comp = alloc_reg_comp(ctx, live);
302    return reload_into(ctx, live, def, use, comp);
303 }
304 
305 static int
ra_block(struct ra_predicates_ctx * ctx,struct ir3_block * block)306 ra_block(struct ra_predicates_ctx *ctx, struct ir3_block *block)
307 {
308    struct block_liveness *live = &ctx->blocks_liveness[block->index];
309 
310    foreach_instr (instr, &block->instr_list) {
311       /* Assign registers to sources based on their defs. */
312       foreach_src (src, instr) {
313          if (!ra_reg_is_predicate(src))
314             continue;
315 
316          struct live_def *live_def = find_live_def(ctx, live, src->def);
317          if (src->num != INVALID_REG &&
318              (!live_def || get_def(live_def)->num != src->num)) {
319             /* If src is precolored and its def is either not live or is live in
320              * the wrong register, reload it into the correct one.
321              */
322             unsigned comp = reg_comp(src);
323 
324             if (!reg_is_free(ctx, live, comp))
325                free_reg(ctx, live, get_def(&live->live_defs[comp]));
326             if (live_def)
327                free_reg(ctx, live, get_def(live_def));
328 
329             live_def = reload_into(ctx, live, src->def, instr, comp);
330          } else if (!live_def) {
331             live_def = reload(ctx, live, src->def, instr);
332          }
333 
334          assert(live_def != NULL);
335 
336          struct ir3_register *def = get_def(live_def);
337 
338          assert((src->num == INVALID_REG) || (src->num == def->num));
339          src->num = def->num;
340          src->def = def;
341 
342          /* Mark the def as used to make sure we won't move it anymore. */
343          def->flags &= ~IR3_REG_UNUSED;
344 
345          /* If this source kills the def, don't free the register right away to
346           * prevent it being reused for another source of this instruction. We
347           * can free it after all sources of this instruction have been
348           * processed.
349           */
350          if (src->flags & IR3_REG_FIRST_KILL)
351             live_def->killed = true;
352       }
353 
354       /* After all sources of an instruction have been processed, we can  free
355        * the registers that were killed by a source.
356        */
357       for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
358          struct live_def *live_def = &live->live_defs[reg];
359          if (live_def->def == NULL)
360             continue;
361 
362          if (live_def->killed)
363             free_reg(ctx, live, get_def(live_def));
364       }
365 
366       /* Allocate registers for new defs. */
367       foreach_dst (dst, instr) {
368          if (!ra_reg_is_predicate(dst))
369             continue;
370 
371          /* Mark it as unused until we encounter the first use. This allows us
372           * to know when it is legal to move the instruction.
373           */
374          dst->flags |= IR3_REG_UNUSED;
375 
376          /* For validation, we keep track of which def an instruction produces.
377           * Normally, this will be the instruction's dst but in case of
378           * reloading, it will point to the original instruction's dst.
379           */
380          dst->instr->data = dst;
381 
382          /* If we don't have any free registers, ignore the def for now. If we
383           * start spilling right away, we might end-up with a cascade of spills
384           * when there are a lot of defs before their first uses.
385           */
386          if (!has_free_regs(ctx, live))
387             continue;
388 
389          alloc_reg(ctx, live, dst, NULL);
390       }
391    }
392 
393    /* Process loop back edges. Since we ignore them while calculating a block's
394     * live-in defs in init_block_liveness, we now make sure that we satisfy our
395     * successor's live-in requirements by producing the correct defs in the
396     * required registers.
397     */
398    for (unsigned i = 0; i < 2; ++i) {
399       struct ir3_block *succ = block->successors[i];
400       if (!succ)
401          continue;
402 
403       struct live_def *succ_live_in =
404          ctx->blocks_liveness[succ->index].live_in_defs;
405 
406       /* If live_in_defs has not been set yet, it's not a back edge. */
407       if (!succ_live_in)
408          continue;
409 
410       for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
411          struct live_def *succ_def = &succ_live_in[reg];
412          if (!succ_def->def)
413             continue;
414 
415          struct live_def *cur_def = &live->live_defs[reg];
416 
417          /* Same def in the same register, nothing to be done. */
418          if (cur_def->def == succ_def->def)
419             continue;
420 
421          /* Different def in the same register, free it first. */
422          if (cur_def->def)
423             free_reg(ctx, live, get_def(cur_def));
424 
425          /* Reload the def in the required register right before the block's
426           * terminator.
427           */
428          struct ir3_instruction *use = ir3_block_get_terminator(block);
429          reload_into(ctx, live, succ_def->def, use, reg);
430       }
431    }
432 
433    return 0;
434 }
435 
436 /* Propagate live-out defs of a block's predecessors to the block's live-in
437  * defs. This takes the intersection of all predecessors live-out defs. That is,
438  * a def will be live-in if it's live-out in the same register in all
439  * predecessors.
440  */
441 static void
init_block_liveness(struct ra_predicates_ctx * ctx,struct ir3_block * block)442 init_block_liveness(struct ra_predicates_ctx *ctx, struct ir3_block *block)
443 {
444    struct block_liveness *live = &ctx->blocks_liveness[block->index];
445    live->live_defs = rzalloc_array(ctx, struct live_def, ctx->num_regs);
446    BITSET_WORD *live_in = ctx->liveness->live_in[block->index];
447 
448    for (unsigned i = 0; i < block->predecessors_count; ++i) {
449       struct ir3_block *pred = block->predecessors[i];
450       assert(pred != NULL);
451 
452       struct block_liveness *pred_live = &ctx->blocks_liveness[pred->index];
453 
454       /* If the predecessor has not been processed yet it means it's the back
455        * edge of a loop. We ignore it now, take the live-out defs of the block's
456        * other predecessors, and make sure the live-out defs of the back edge
457        * match this block's live-in defs after processing the back edge.
458        */
459       if (pred_live->live_defs == NULL)
460          continue;
461 
462       for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
463          struct live_def *cur_def = &live->live_defs[reg];
464          struct live_def *pred_def = &pred_live->live_defs[reg];
465 
466          if (i == 0 && pred_def->def != NULL) {
467             /* If the first predecessor has a def in reg, use it if it's live-in
468              * in this block.
469              */
470             if (BITSET_TEST(live_in, pred_def->def->name))
471                *cur_def = *pred_def;
472          } else if (cur_def->def != pred_def->def) {
473             /* Different predecessors have different live-out defs in reg so we
474              * cannot use it as live-in.
475              */
476             cur_def->def = NULL;
477             cur_def->reloaded_def = NULL;
478          }
479       }
480    }
481 
482    live->live_in_defs = rzalloc_array(ctx, struct live_def, ctx->num_regs);
483    memcpy(live->live_in_defs, live->live_defs,
484           sizeof(struct live_def) * ctx->num_regs);
485 }
486 
487 static void
precolor_def(struct ra_predicates_ctx * ctx,struct ir3_register * def)488 precolor_def(struct ra_predicates_ctx *ctx, struct ir3_register *def)
489 {
490    foreach_ssa_use (use, def->instr) {
491       foreach_src (src, use) {
492          if (src->def != def)
493             continue;
494          if (src->num == INVALID_REG)
495             continue;
496 
497          def->num = src->num;
498          ctx->outstanding_precolored_defs++;
499 
500          /* We can only precolor a def once. */
501          return;
502       }
503    }
504 }
505 
506 /* Precolor the defs of precolored sources so that we can try to assign the
507  * correct register immediately.
508  */
509 static void
precolor_defs(struct ra_predicates_ctx * ctx)510 precolor_defs(struct ra_predicates_ctx *ctx)
511 {
512    for (unsigned i = 1; i < ctx->liveness->definitions_count; ++i) {
513       struct ir3_register *def = ctx->liveness->definitions[i];
514       precolor_def(ctx, def);
515    }
516 }
517 
518 void
ir3_ra_predicates(struct ir3_shader_variant * v)519 ir3_ra_predicates(struct ir3_shader_variant *v)
520 {
521    struct ra_predicates_ctx *ctx = rzalloc(NULL, struct ra_predicates_ctx);
522    ctx->ir = v->ir;
523    ctx->num_regs = v->compiler->num_predicates;
524    ctx->liveness = ir3_calc_liveness_for(ctx, v->ir, ra_reg_is_predicate,
525                                          ra_reg_is_predicate);
526    ctx->blocks_liveness =
527       rzalloc_array(ctx, struct block_liveness, ctx->liveness->block_count);
528    ir3_count_instructions_ra(ctx->ir);
529    ir3_find_ssa_uses_for(ctx->ir, ctx, is_predicate_use);
530    precolor_defs(ctx);
531 
532    foreach_block (block, &v->ir->block_list) {
533       init_block_liveness(ctx, block);
534       ra_block(ctx, block);
535    }
536 
537    /* Remove instructions that became unused. This happens when a def was never
538     * used directly but only through its reloaded clones.
539     * Note that index 0 in the liveness definitions is always NULL.
540     */
541    for (unsigned i = 1; i < ctx->liveness->definitions_count; ++i) {
542       struct ir3_register *def = ctx->liveness->definitions[i];
543 
544       if (def->flags & IR3_REG_UNUSED)
545          list_delinit(&def->instr->node);
546    }
547 
548    ralloc_free(ctx);
549 }
550