xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_lower_shared_phi.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Valve Corporation.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3.h"
7 #include "util/ralloc.h"
8 
9 /* RA cannot handle phis of shared registers where there are extra physical
10  * sources, or the sources have extra physical destinations, because these edges
11  * are critical edges that we cannot resolve copies along.  Here's a contrived
12  * example:
13  *
14  * loop {
15  *    if non-uniform {
16  *       if uniform {
17  *          x_1 = ...;
18  *          continue;
19  *       }
20  *       x_2 = ...;
21  *    } else {
22  *       break;
23  *    }
24  *    // continue block
25  *    x_3 = phi(x_1, x_2)
26  * }
27  *
28  * Assuming x_1 and x_2 are uniform, x_3 will also be uniform, because all
29  * threads that stay in the loop take the same branch to the continue block,
30  * however execution may fall through from the assignment to x_2 to the
31  * break statement because the outer if is non-uniform, and then it will fall
32  * through again to the continue block. In cases like this we have to demote the
33  * phi to normal registers and insert movs around it (which will probably be
34  * coalesced).
35  */
36 
37 static void
lower_phi(void * ctx,struct ir3_instruction * phi)38 lower_phi(void *ctx, struct ir3_instruction *phi)
39 {
40    struct ir3_block *block = phi->block;
41    for (unsigned i = 0; i < block->predecessors_count; i++) {
42       struct ir3_block *pred = block->predecessors[i];
43       if (phi->srcs[i]->def) {
44          struct ir3_instruction *pred_mov = ir3_instr_create(pred, OPC_MOV, 1, 1);
45          pred_mov->uses = _mesa_pointer_set_create(ctx);
46          __ssa_dst(pred_mov)->flags |= (phi->srcs[i]->flags & IR3_REG_HALF);
47          unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
48             (phi->srcs[i]->flags & IR3_REG_HALF);
49          ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
50             phi->srcs[i]->def;
51          pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
52             (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
53 
54          _mesa_set_remove_key(phi->srcs[i]->def->instr->uses, phi);
55          _mesa_set_add(phi->srcs[i]->def->instr->uses, pred_mov);
56          phi->srcs[i]->def = pred_mov->dsts[0];
57       }
58       phi->srcs[i]->flags &= ~IR3_REG_SHARED;
59    }
60 
61    phi->dsts[0]->flags &= ~IR3_REG_SHARED;
62 
63    struct ir3_instruction *shared_mov =
64       ir3_MOV(block, phi,
65               (phi->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
66    shared_mov->uses = _mesa_pointer_set_create(ctx);
67    shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
68    ir3_instr_move_after_phis(shared_mov, block);
69 
70    foreach_ssa_use (use, phi) {
71       for (unsigned i = 0; i < use->srcs_count; i++) {
72          if (use->srcs[i]->def == phi->dsts[0])
73             use->srcs[i]->def = shared_mov->dsts[0];
74       }
75    }
76 }
77 
78 bool
ir3_lower_shared_phis(struct ir3 * ir)79 ir3_lower_shared_phis(struct ir3 *ir)
80 {
81    void *mem_ctx = ralloc_context(NULL);
82    bool progress = false;
83 
84    ir3_find_ssa_uses(ir, mem_ctx, false);
85 
86    foreach_block (block, &ir->block_list) {
87       bool pred_physical_edge = false;
88       for (unsigned i = 0; i < block->predecessors_count; i++) {
89          unsigned successors_count =
90             block->predecessors[i]->successors[1] ? 2 : 1;
91          if (block->predecessors[i]->physical_successors_count > successors_count) {
92             pred_physical_edge = true;
93             break;
94          }
95       }
96 
97       if (!pred_physical_edge &&
98           block->physical_predecessors_count == block->predecessors_count)
99          continue;
100 
101       foreach_instr_safe (phi, &block->instr_list) {
102          if (phi->opc != OPC_META_PHI)
103             break;
104 
105          if (!(phi->dsts[0]->flags & IR3_REG_SHARED))
106             continue;
107 
108          lower_phi(mem_ctx, phi);
109          progress = true;
110       }
111    }
112 
113    ralloc_free(mem_ctx);
114    return progress;
115 }
116 
117