xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_array_to_ssa.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /* This pass lowers array accesses to SSA.
7  *
8  * After this pass, instructions writing arrays implicitly read the contents of
9  * the array defined in instr->dsts[0]->def (possibly a phi node), perform the
10  * operation, and store to instr->dsts[0].
11  *
12  * This makes arrays appear like "normal" SSA values, even if the false
13  * dependencies mean that they always stay in CSSA form (i.e. able to removed
14  * out-of-SSA with no copies.) While hopefully they shouldn't induce copies in
15  * most cases, we can't make that guarantee while also splitting spilling from
16  * RA and guaranteeing a certain number of registers are used, so we have to
17  * insert the phi nodes to be able to know when copying should happen.
18  *
19  * The implementation is based on the idea in "Simple and Efficient Construction
20  * of Static Single Assignment Form" of scanning backwards to find the
21  * definition. However, since we're not doing this on-the-fly we can simplify
22  * things a little by doing a pre-pass to get the last definition of each array
23  * in each block. Then we optimize trivial phis in a separate pass, "on the fly"
24  * so that we don't have to rewrite (and keep track of) users.
25  */
26 
27 #include <stdlib.h>
28 #include "ir3.h"
29 
30 struct array_state {
31    struct ir3_register *live_in_definition;
32    struct ir3_register *live_out_definition;
33    bool constructed;
34    bool optimized;
35 };
36 
37 struct array_ctx {
38    struct array_state *states;
39    struct ir3 *ir;
40    unsigned array_count;
41 };
42 
43 static struct array_state *
get_state(struct array_ctx * ctx,struct ir3_block * block,unsigned id)44 get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
45 {
46    return &ctx->states[ctx->array_count * block->index + id];
47 }
48 
49 static struct ir3_register *read_value_beginning(struct array_ctx *ctx,
50                                                  struct ir3_block *block,
51                                                  struct ir3_array *arr);
52 
53 static struct ir3_register *
read_value_end(struct array_ctx * ctx,struct ir3_block * block,struct ir3_array * arr)54 read_value_end(struct array_ctx *ctx, struct ir3_block *block,
55                struct ir3_array *arr)
56 {
57    struct array_state *state = get_state(ctx, block, arr->id);
58    if (state->live_out_definition)
59       return state->live_out_definition;
60 
61    state->live_out_definition = read_value_beginning(ctx, block, arr);
62    return state->live_out_definition;
63 }
64 
65 /* Roughly equivalent to readValueRecursive from the paper: */
66 static struct ir3_register *
read_value_beginning(struct array_ctx * ctx,struct ir3_block * block,struct ir3_array * arr)67 read_value_beginning(struct array_ctx *ctx, struct ir3_block *block,
68                      struct ir3_array *arr)
69 {
70    struct array_state *state = get_state(ctx, block, arr->id);
71 
72    if (state->constructed)
73       return state->live_in_definition;
74 
75    if (block->predecessors_count == 0) {
76       state->constructed = true;
77       return NULL;
78    }
79 
80    if (block->predecessors_count == 1) {
81       state->live_in_definition =
82          read_value_end(ctx, block->predecessors[0], arr);
83       state->constructed = true;
84       return state->live_in_definition;
85    }
86 
87    unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0);
88    struct ir3_instruction *phi =
89       ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
90    list_del(&phi->node);
91    list_add(&phi->node, &block->instr_list);
92 
93    struct ir3_register *dst = __ssa_dst(phi);
94    dst->flags |= flags;
95    dst->array.id = arr->id;
96    dst->size = arr->length;
97 
98    state->live_in_definition = phi->dsts[0];
99    state->constructed = true;
100 
101    for (unsigned i = 0; i < block->predecessors_count; i++) {
102       struct ir3_register *src =
103          read_value_end(ctx, block->predecessors[i], arr);
104       struct ir3_register *src_reg;
105       if (src) {
106          src_reg = __ssa_src(phi, src->instr, flags);
107       } else {
108          src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA);
109       }
110       src_reg->array.id = arr->id;
111       src_reg->size = arr->length;
112    }
113    return phi->dsts[0];
114 }
115 
116 static struct ir3_register *
remove_trivial_phi(struct ir3_instruction * phi)117 remove_trivial_phi(struct ir3_instruction *phi)
118 {
119    /* Break cycles */
120    if (phi->data)
121       return phi->data;
122 
123    phi->data = phi->dsts[0];
124 
125    struct ir3_register *unique_def = NULL;
126    bool unique = true;
127    for (unsigned i = 0; i < phi->block->predecessors_count; i++) {
128       struct ir3_register *src = phi->srcs[i];
129 
130       /* If there are any undef sources, then the remaining sources may not
131        * dominate the phi node, even if they are all equal. So we need to
132        * bail out in this case.
133        *
134        * This seems to be a bug in the original paper.
135        */
136       if (!src->def) {
137          unique = false;
138          break;
139       }
140 
141       struct ir3_instruction *src_instr = src->def->instr;
142 
143       /* phi sources which point to the phi itself don't count for
144        * figuring out if the phi is trivial
145        */
146       if (src_instr == phi)
147          continue;
148 
149       if (src_instr->opc == OPC_META_PHI) {
150          src->def = remove_trivial_phi(src->def->instr);
151       }
152 
153       if (unique_def) {
154          if (unique_def != src->def) {
155             unique = false;
156             break;
157          }
158       } else {
159          unique_def = src->def;
160       }
161    }
162 
163    if (unique) {
164       phi->data = unique_def;
165       return unique_def;
166    } else {
167       return phi->dsts[0];
168    }
169 }
170 
171 static struct ir3_register *
lookup_value(struct ir3_register * reg)172 lookup_value(struct ir3_register *reg)
173 {
174    if (reg->instr->opc == OPC_META_PHI)
175       return reg->instr->data;
176    return reg;
177 }
178 
179 static struct ir3_register *
lookup_live_in(struct array_ctx * ctx,struct ir3_block * block,unsigned id)180 lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
181 {
182    struct array_state *state = get_state(ctx, block, id);
183    if (state->live_in_definition)
184       return lookup_value(state->live_in_definition);
185 
186    return NULL;
187 }
188 
189 bool
ir3_array_to_ssa(struct ir3 * ir)190 ir3_array_to_ssa(struct ir3 *ir)
191 {
192    struct array_ctx ctx = {};
193 
194    foreach_array (array, &ir->array_list) {
195       ctx.array_count = MAX2(ctx.array_count, array->id + 1);
196    }
197 
198    if (ctx.array_count == 0)
199       return false;
200 
201    unsigned i = 0;
202    foreach_block (block, &ir->block_list) {
203       block->index = i++;
204    }
205 
206    ctx.ir = ir;
207    ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state));
208 
209    foreach_block (block, &ir->block_list) {
210       foreach_instr (instr, &block->instr_list) {
211          foreach_dst (dst, instr) {
212             if (dst->flags & IR3_REG_ARRAY) {
213                struct array_state *state =
214                   get_state(&ctx, block, dst->array.id);
215                state->live_out_definition = dst;
216             }
217          }
218       }
219    }
220 
221    foreach_block (block, &ir->block_list) {
222       foreach_instr (instr, &block->instr_list) {
223          if (instr->opc == OPC_META_PHI)
224             continue;
225 
226          foreach_dst (reg, instr) {
227             if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) {
228                struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
229 
230                /* Construct any phi nodes necessary to read this value */
231                read_value_beginning(&ctx, block, arr);
232             }
233          }
234          foreach_src (reg, instr) {
235             if ((reg->flags & IR3_REG_ARRAY) && !reg->def) {
236                struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
237 
238                /* Construct any phi nodes necessary to read this value */
239                read_value_beginning(&ctx, block, arr);
240             }
241          }
242       }
243    }
244 
245    foreach_block (block, &ir->block_list) {
246       foreach_instr_safe (instr, &block->instr_list) {
247          if (instr->opc == OPC_META_PHI)
248             remove_trivial_phi(instr);
249          else
250             break;
251       }
252    }
253 
254    foreach_block (block, &ir->block_list) {
255       foreach_instr_safe (instr, &block->instr_list) {
256          if (instr->opc == OPC_META_PHI) {
257             if (!(instr->flags & IR3_REG_ARRAY))
258                continue;
259             if (instr->data != instr->dsts[0]) {
260                list_del(&instr->node);
261                continue;
262             }
263             for (unsigned i = 0; i < instr->srcs_count; i++) {
264                instr->srcs[i] = lookup_value(instr->srcs[i]);
265             }
266          } else {
267             foreach_dst (reg, instr) {
268                if ((reg->flags & IR3_REG_ARRAY)) {
269                   if (!reg->tied) {
270                      struct ir3_register *def =
271                         lookup_live_in(&ctx, block, reg->array.id);
272                      if (def)
273                         ir3_reg_set_last_array(instr, reg, def);
274                   }
275                   reg->flags |= IR3_REG_SSA;
276                }
277             }
278             foreach_src (reg, instr) {
279                if ((reg->flags & IR3_REG_ARRAY)) {
280                   /* It is assumed that before calling
281                    * ir3_array_to_ssa(), reg->def was set to the
282                    * previous writer of the array within the current
283                    * block or NULL if none.
284                    */
285                   if (!reg->def) {
286                      reg->def = lookup_live_in(&ctx, block, reg->array.id);
287                   }
288                   reg->flags |= IR3_REG_SSA;
289                }
290             }
291          }
292       }
293    }
294 
295    free(ctx.states);
296    return true;
297 }
298