1 /*
2 * Copyright 2024 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "ir3.h"
7 #include "ir3_ra.h"
8 #include "ir3_shader.h"
9
10 /* Represents a def that is currently live. We keep track of both the pre-RA def
11 * a register refers to and, in case of spilling and reloading, the def of the
12 * reloaded instruction. This allows us to assign reloaded defs to sources and
13 * prevents additional reloads.
14 */
15 struct live_def {
16 /* The pre-RA def. */
17 struct ir3_register *def;
18
19 /* The reloaded def. NULL if def was not reloaded. */
20 struct ir3_register *reloaded_def;
21
22 /* Set when used for a src marked first-kill. We cannot immediately free the
23 * register because then it might get reused for another src in the same
24 * instruction. Instead, we free it after an instruction's sources have been
25 * processed.
26 */
27 bool killed;
28 };
29
30 /* Per-block liveness information. Stores live defs per supported register,
31 * indexed by register component.
32 */
33 struct block_liveness {
34 /* Live-in defs taken from the intersections the block's predecessors
35 * live-out defs.
36 */
37 struct live_def *live_in_defs;
38
39 /* Currently live defs. Starts from live-in and is updated while processing
40 * the instructions in a block. Contains the live-out defs after the whole
41 * block has been processed.
42 */
43 struct live_def *live_defs;
44 };
45
46 struct ra_predicates_ctx {
47 struct ir3 *ir;
48 unsigned num_regs;
49 struct ir3_liveness *liveness;
50 struct block_liveness *blocks_liveness;
51
52 /* Number of precolored defs that have not been processed yet. When this
53 * drops to zero, we can stop trying to avoid allocating p0.x (the only
54 * register currently used for precoloring).
55 */
56 unsigned outstanding_precolored_defs;
57 };
58
59 static bool
has_free_regs(struct ra_predicates_ctx * ctx,struct block_liveness * live)60 has_free_regs(struct ra_predicates_ctx *ctx, struct block_liveness *live)
61 {
62 for (unsigned i = 0; i < ctx->num_regs; ++i) {
63 if (live->live_defs[i].def == NULL)
64 return true;
65 }
66
67 return false;
68 }
69
70 static bool
try_avoid_comp(struct ra_predicates_ctx * ctx,struct block_liveness * live,unsigned comp)71 try_avoid_comp(struct ra_predicates_ctx *ctx, struct block_liveness *live,
72 unsigned comp)
73 {
74 /* Currently, only p0.x is ever used for a precolored register so just try to
75 * avoid that one if we have any precolored defs.
76 */
77 return comp == 0 && ctx->outstanding_precolored_defs > 0;
78 }
79
80 static bool
reg_is_free(struct ra_predicates_ctx * ctx,struct block_liveness * live,unsigned comp)81 reg_is_free(struct ra_predicates_ctx *ctx, struct block_liveness *live,
82 unsigned comp)
83 {
84 assert(comp < ctx->num_regs);
85
86 return live->live_defs[comp].def == NULL;
87 }
88
89 static unsigned
alloc_reg_comp(struct ra_predicates_ctx * ctx,struct block_liveness * live)90 alloc_reg_comp(struct ra_predicates_ctx *ctx, struct block_liveness *live)
91 {
92 for (unsigned i = 0; i < ctx->num_regs; ++i) {
93 if (live->live_defs[i].def == NULL && !try_avoid_comp(ctx, live, i))
94 return i;
95 }
96
97 for (unsigned i = 0; i < ctx->num_regs; ++i) {
98 if (live->live_defs[i].def == NULL)
99 return i;
100 }
101
102 unreachable("Reg availability should have been checked before");
103 }
104
105 static struct live_def *
assign_reg(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def,struct ir3_register * reloaded_def,unsigned comp)106 assign_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
107 struct ir3_register *def, struct ir3_register *reloaded_def,
108 unsigned comp)
109 {
110 assert(comp < ctx->num_regs);
111
112 struct ir3_register *current_def =
113 (reloaded_def == NULL) ? def : reloaded_def;
114
115 current_def->num = regid(REG_P0, comp);
116
117 struct live_def *live_def = &live->live_defs[comp];
118 assert((live_def->def == NULL) && (live_def->reloaded_def == NULL));
119
120 live_def->def = def;
121 live_def->reloaded_def = reloaded_def;
122 return live_def;
123 }
124
125 static struct live_def *
alloc_reg(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def,struct ir3_register * reloaded_def)126 alloc_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
127 struct ir3_register *def, struct ir3_register *reloaded_def)
128 {
129 /* Try to assign the precolored register if it's free. If not, use normal
130 * allocation and reload whenever a precolored source needs it.
131 * NOTE: this means we currently only support precolored sources, not dests.
132 */
133 if (def->num != INVALID_REG) {
134 assert(ctx->outstanding_precolored_defs > 0);
135 ctx->outstanding_precolored_defs--;
136 unsigned comp = reg_comp(def);
137
138 if (reg_is_free(ctx, live, comp))
139 return assign_reg(ctx, live, def, reloaded_def, comp);
140 }
141
142 unsigned comp = alloc_reg_comp(ctx, live);
143 return assign_reg(ctx, live, def, reloaded_def, comp);
144 }
145
146 static void
free_reg(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * reg)147 free_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
148 struct ir3_register *reg)
149 {
150 assert((reg->flags & IR3_REG_PREDICATE) && (reg->num != INVALID_REG));
151
152 unsigned comp = reg_comp(reg);
153 assert(comp < ctx->num_regs);
154
155 struct live_def *reg_live_def = &live->live_defs[comp];
156 assert((reg_live_def->def == reg) || (reg_live_def->reloaded_def == reg));
157
158 reg_live_def->def = NULL;
159 reg_live_def->reloaded_def = NULL;
160 reg_live_def->killed = false;
161 }
162
163 static struct ir3_instruction *
first_non_allocated_use_after(struct ir3_register * def,struct ir3_instruction * after)164 first_non_allocated_use_after(struct ir3_register *def,
165 struct ir3_instruction *after)
166 {
167 uint32_t first_ip = UINT32_MAX;
168 struct ir3_instruction *first = NULL;
169
170 foreach_ssa_use (use, def->instr) {
171 if (!ir3_block_dominates(after->block, use->block))
172 continue;
173
174 /* Do not filter-out after itself. This ensures that if after is a use of
175 * def, def will not get selected to get spilled because there must be
176 * another register with a further first use. We have to ensure that def
177 * doesn't get spilled in this case because otherwise, we might spill a
178 * register used by an earlier source of after.
179 */
180 if (use->ip < after->ip)
181 continue;
182 if (use->ip >= first_ip)
183 continue;
184
185 first_ip = use->ip;
186 first = use;
187 }
188
189 return first;
190 }
191
192 static bool
is_predicate_use(struct ir3_instruction * instr,unsigned src_n)193 is_predicate_use(struct ir3_instruction *instr, unsigned src_n)
194 {
195 if (__is_false_dep(instr, src_n))
196 return false;
197 return ra_reg_is_predicate(instr->srcs[src_n]);
198 }
199
200 /* Spill a register by simply removing one from the live defs. We don't need to
201 * store its value anywhere since it can be rematerialized (see reload). We
202 * chose the register whose def's first use is the furthest.
203 */
204 static void
spill(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_instruction * spill_location)205 spill(struct ra_predicates_ctx *ctx, struct block_liveness *live,
206 struct ir3_instruction *spill_location)
207 {
208 unsigned furthest_first_use = 0;
209 unsigned spill_reg = ~0;
210
211 for (unsigned i = 0; i < ctx->num_regs; ++i) {
212 struct ir3_register *candidate = live->live_defs[i].def;
213 assert(candidate != NULL);
214
215 struct ir3_instruction *first_use =
216 first_non_allocated_use_after(candidate, spill_location);
217
218 if (first_use == NULL) {
219 spill_reg = i;
220 break;
221 }
222
223 if (first_use->ip > furthest_first_use) {
224 furthest_first_use = first_use->ip;
225 spill_reg = i;
226 }
227 }
228
229 assert(spill_reg != ~0);
230
231 live->live_defs[spill_reg].def = NULL;
232 live->live_defs[spill_reg].reloaded_def = NULL;
233 }
234
235 static struct live_def *
find_live_def(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def)236 find_live_def(struct ra_predicates_ctx *ctx, struct block_liveness *live,
237 struct ir3_register *def)
238 {
239 for (unsigned i = 0; i < ctx->num_regs; ++i) {
240 struct live_def *live_def = &live->live_defs[i];
241 if (live_def->def == def)
242 return live_def;
243 }
244
245 return NULL;
246 }
247
248 static struct ir3_register *
get_def(struct live_def * live_def)249 get_def(struct live_def *live_def)
250 {
251 return live_def->reloaded_def == NULL ? live_def->def
252 : live_def->reloaded_def;
253 }
254
255 /* Reload a def into s specific register, which must be free. Reloading is
256 * implemented by cloning the instruction that produced the def and moving it in
257 * front of the use.
258 */
259 static struct live_def *
reload_into(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def,struct ir3_instruction * use,unsigned comp)260 reload_into(struct ra_predicates_ctx *ctx, struct block_liveness *live,
261 struct ir3_register *def, struct ir3_instruction *use,
262 unsigned comp)
263 {
264 struct ir3_instruction *reloaded_instr = NULL;
265 bool def_is_allocated = !(def->flags & IR3_REG_UNUSED);
266
267 if (!def_is_allocated && use->block == def->instr->block) {
268 /* If def has not been allocated a register yet, no source is currently
269 * using it. If it's in the same block as the current use, just move it in
270 * front of it.
271 */
272 reloaded_instr = def->instr;
273 } else {
274 /* If the def is either 1) already allocated or 2) in a different block
275 * than the current use, we have to clone it. For 1) because its allocated
276 * register isn't currently live (we wouldn't be reloading it otherwise).
277 * For 2) because it might have other uses in blocks that aren't
278 * successors of the use.
279 */
280 reloaded_instr = ir3_instr_clone(def->instr);
281 }
282
283 reloaded_instr->block = use->block;
284
285 /* Keep track of the original def for validation. */
286 reloaded_instr->data = def;
287
288 ir3_instr_move_before(reloaded_instr, use);
289 struct ir3_register *reloaded_def = reloaded_instr->dsts[0];
290 return assign_reg(ctx, live, def, reloaded_def, comp);
291 }
292
293 /* Reload a def into a register, spilling one if necessary. */
294 static struct live_def *
reload(struct ra_predicates_ctx * ctx,struct block_liveness * live,struct ir3_register * def,struct ir3_instruction * use)295 reload(struct ra_predicates_ctx *ctx, struct block_liveness *live,
296 struct ir3_register *def, struct ir3_instruction *use)
297 {
298 if (!has_free_regs(ctx, live))
299 spill(ctx, live, use);
300
301 unsigned comp = alloc_reg_comp(ctx, live);
302 return reload_into(ctx, live, def, use, comp);
303 }
304
305 static int
ra_block(struct ra_predicates_ctx * ctx,struct ir3_block * block)306 ra_block(struct ra_predicates_ctx *ctx, struct ir3_block *block)
307 {
308 struct block_liveness *live = &ctx->blocks_liveness[block->index];
309
310 foreach_instr (instr, &block->instr_list) {
311 /* Assign registers to sources based on their defs. */
312 foreach_src (src, instr) {
313 if (!ra_reg_is_predicate(src))
314 continue;
315
316 struct live_def *live_def = find_live_def(ctx, live, src->def);
317 if (src->num != INVALID_REG &&
318 (!live_def || get_def(live_def)->num != src->num)) {
319 /* If src is precolored and its def is either not live or is live in
320 * the wrong register, reload it into the correct one.
321 */
322 unsigned comp = reg_comp(src);
323
324 if (!reg_is_free(ctx, live, comp))
325 free_reg(ctx, live, get_def(&live->live_defs[comp]));
326 if (live_def)
327 free_reg(ctx, live, get_def(live_def));
328
329 live_def = reload_into(ctx, live, src->def, instr, comp);
330 } else if (!live_def) {
331 live_def = reload(ctx, live, src->def, instr);
332 }
333
334 assert(live_def != NULL);
335
336 struct ir3_register *def = get_def(live_def);
337
338 assert((src->num == INVALID_REG) || (src->num == def->num));
339 src->num = def->num;
340 src->def = def;
341
342 /* Mark the def as used to make sure we won't move it anymore. */
343 def->flags &= ~IR3_REG_UNUSED;
344
345 /* If this source kills the def, don't free the register right away to
346 * prevent it being reused for another source of this instruction. We
347 * can free it after all sources of this instruction have been
348 * processed.
349 */
350 if (src->flags & IR3_REG_FIRST_KILL)
351 live_def->killed = true;
352 }
353
354 /* After all sources of an instruction have been processed, we can free
355 * the registers that were killed by a source.
356 */
357 for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
358 struct live_def *live_def = &live->live_defs[reg];
359 if (live_def->def == NULL)
360 continue;
361
362 if (live_def->killed)
363 free_reg(ctx, live, get_def(live_def));
364 }
365
366 /* Allocate registers for new defs. */
367 foreach_dst (dst, instr) {
368 if (!ra_reg_is_predicate(dst))
369 continue;
370
371 /* Mark it as unused until we encounter the first use. This allows us
372 * to know when it is legal to move the instruction.
373 */
374 dst->flags |= IR3_REG_UNUSED;
375
376 /* For validation, we keep track of which def an instruction produces.
377 * Normally, this will be the instruction's dst but in case of
378 * reloading, it will point to the original instruction's dst.
379 */
380 dst->instr->data = dst;
381
382 /* If we don't have any free registers, ignore the def for now. If we
383 * start spilling right away, we might end-up with a cascade of spills
384 * when there are a lot of defs before their first uses.
385 */
386 if (!has_free_regs(ctx, live))
387 continue;
388
389 alloc_reg(ctx, live, dst, NULL);
390 }
391 }
392
393 /* Process loop back edges. Since we ignore them while calculating a block's
394 * live-in defs in init_block_liveness, we now make sure that we satisfy our
395 * successor's live-in requirements by producing the correct defs in the
396 * required registers.
397 */
398 for (unsigned i = 0; i < 2; ++i) {
399 struct ir3_block *succ = block->successors[i];
400 if (!succ)
401 continue;
402
403 struct live_def *succ_live_in =
404 ctx->blocks_liveness[succ->index].live_in_defs;
405
406 /* If live_in_defs has not been set yet, it's not a back edge. */
407 if (!succ_live_in)
408 continue;
409
410 for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
411 struct live_def *succ_def = &succ_live_in[reg];
412 if (!succ_def->def)
413 continue;
414
415 struct live_def *cur_def = &live->live_defs[reg];
416
417 /* Same def in the same register, nothing to be done. */
418 if (cur_def->def == succ_def->def)
419 continue;
420
421 /* Different def in the same register, free it first. */
422 if (cur_def->def)
423 free_reg(ctx, live, get_def(cur_def));
424
425 /* Reload the def in the required register right before the block's
426 * terminator.
427 */
428 struct ir3_instruction *use = ir3_block_get_terminator(block);
429 reload_into(ctx, live, succ_def->def, use, reg);
430 }
431 }
432
433 return 0;
434 }
435
436 /* Propagate live-out defs of a block's predecessors to the block's live-in
437 * defs. This takes the intersection of all predecessors live-out defs. That is,
438 * a def will be live-in if it's live-out in the same register in all
439 * predecessors.
440 */
441 static void
init_block_liveness(struct ra_predicates_ctx * ctx,struct ir3_block * block)442 init_block_liveness(struct ra_predicates_ctx *ctx, struct ir3_block *block)
443 {
444 struct block_liveness *live = &ctx->blocks_liveness[block->index];
445 live->live_defs = rzalloc_array(ctx, struct live_def, ctx->num_regs);
446 BITSET_WORD *live_in = ctx->liveness->live_in[block->index];
447
448 for (unsigned i = 0; i < block->predecessors_count; ++i) {
449 struct ir3_block *pred = block->predecessors[i];
450 assert(pred != NULL);
451
452 struct block_liveness *pred_live = &ctx->blocks_liveness[pred->index];
453
454 /* If the predecessor has not been processed yet it means it's the back
455 * edge of a loop. We ignore it now, take the live-out defs of the block's
456 * other predecessors, and make sure the live-out defs of the back edge
457 * match this block's live-in defs after processing the back edge.
458 */
459 if (pred_live->live_defs == NULL)
460 continue;
461
462 for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
463 struct live_def *cur_def = &live->live_defs[reg];
464 struct live_def *pred_def = &pred_live->live_defs[reg];
465
466 if (i == 0 && pred_def->def != NULL) {
467 /* If the first predecessor has a def in reg, use it if it's live-in
468 * in this block.
469 */
470 if (BITSET_TEST(live_in, pred_def->def->name))
471 *cur_def = *pred_def;
472 } else if (cur_def->def != pred_def->def) {
473 /* Different predecessors have different live-out defs in reg so we
474 * cannot use it as live-in.
475 */
476 cur_def->def = NULL;
477 cur_def->reloaded_def = NULL;
478 }
479 }
480 }
481
482 live->live_in_defs = rzalloc_array(ctx, struct live_def, ctx->num_regs);
483 memcpy(live->live_in_defs, live->live_defs,
484 sizeof(struct live_def) * ctx->num_regs);
485 }
486
487 static void
precolor_def(struct ra_predicates_ctx * ctx,struct ir3_register * def)488 precolor_def(struct ra_predicates_ctx *ctx, struct ir3_register *def)
489 {
490 foreach_ssa_use (use, def->instr) {
491 foreach_src (src, use) {
492 if (src->def != def)
493 continue;
494 if (src->num == INVALID_REG)
495 continue;
496
497 def->num = src->num;
498 ctx->outstanding_precolored_defs++;
499
500 /* We can only precolor a def once. */
501 return;
502 }
503 }
504 }
505
506 /* Precolor the defs of precolored sources so that we can try to assign the
507 * correct register immediately.
508 */
509 static void
precolor_defs(struct ra_predicates_ctx * ctx)510 precolor_defs(struct ra_predicates_ctx *ctx)
511 {
512 for (unsigned i = 1; i < ctx->liveness->definitions_count; ++i) {
513 struct ir3_register *def = ctx->liveness->definitions[i];
514 precolor_def(ctx, def);
515 }
516 }
517
518 void
ir3_ra_predicates(struct ir3_shader_variant * v)519 ir3_ra_predicates(struct ir3_shader_variant *v)
520 {
521 struct ra_predicates_ctx *ctx = rzalloc(NULL, struct ra_predicates_ctx);
522 ctx->ir = v->ir;
523 ctx->num_regs = v->compiler->num_predicates;
524 ctx->liveness = ir3_calc_liveness_for(ctx, v->ir, ra_reg_is_predicate,
525 ra_reg_is_predicate);
526 ctx->blocks_liveness =
527 rzalloc_array(ctx, struct block_liveness, ctx->liveness->block_count);
528 ir3_count_instructions_ra(ctx->ir);
529 ir3_find_ssa_uses_for(ctx->ir, ctx, is_predicate_use);
530 precolor_defs(ctx);
531
532 foreach_block (block, &v->ir->block_list) {
533 init_block_liveness(ctx, block);
534 ra_block(ctx, block);
535 }
536
537 /* Remove instructions that became unused. This happens when a def was never
538 * used directly but only through its reloaded clones.
539 * Note that index 0 in the liveness definitions is always NULL.
540 */
541 for (unsigned i = 1; i < ctx->liveness->definitions_count; ++i) {
542 struct ir3_register *def = ctx->liveness->definitions[i];
543
544 if (def->flags & IR3_REG_UNUSED)
545 list_delinit(&def->instr->node);
546 }
547
548 ralloc_free(ctx);
549 }
550