xref: /aosp_15_r20/external/mesa3d/src/broadcom/compiler/vir_live_variables.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012 Intel Corporation
3  * Copyright © 2016 Broadcom
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #define MAX_INSTRUCTION (1 << 30)
26 
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
30 
31 /* Keeps track of conditional / partial writes in a block */
32 struct partial_update_state {
33         /* Instruction doing a conditional or partial write */
34         struct qinst *inst;
35         /* Instruction that set the flags for the conditional write */
36         struct qinst *flags_inst;
37 };
38 
39 static int
vir_reg_to_var(struct qreg reg)40 vir_reg_to_var(struct qreg reg)
41 {
42         if (reg.file == QFILE_TEMP)
43                 return reg.index;
44 
45         return -1;
46 }
47 
48 static void
vir_setup_use(struct v3d_compile * c,struct qblock * block,int ip,struct partial_update_state * partial_update_ht,struct qinst * inst,struct qreg src,struct qinst * flags_inst)49 vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
50               struct partial_update_state *partial_update_ht, struct qinst *inst,
51               struct qreg src, struct qinst *flags_inst)
52 {
53         int var = vir_reg_to_var(src);
54         if (var == -1)
55                 return;
56 
57         c->temp_start[var] = MIN2(c->temp_start[var], ip);
58         c->temp_end[var] = MAX2(c->temp_end[var], ip);
59 
60         /* The use[] bitset marks when the block makes
61          * use of a variable without having completely
62          * defined that variable within the block.
63          */
64         if (!BITSET_TEST(block->def, var)) {
65                 /* If this use of var is conditional and the condition
66                  * and flags match those of a previous instruction
67                  * in the same block partially defining var then we
68                  * consider var completely defined within the block.
69                  */
70                 if (BITSET_TEST(block->defout, var)) {
71                         struct partial_update_state *state =
72                                 &partial_update_ht[var];
73                         if (state->inst) {
74                                 if (vir_get_cond(inst) == vir_get_cond(state->inst) &&
75                                     flags_inst == state->flags_inst) {
76                                         return;
77                                 }
78                         }
79                 }
80 
81                 BITSET_SET(block->use, var);
82         }
83 }
84 
85 /* The def[] bitset marks when an initialization in a
86  * block completely screens off previous updates of
87  * that variable.
88  */
89 static void
vir_setup_def(struct v3d_compile * c,struct qblock * block,int ip,struct partial_update_state * partial_update,struct qinst * inst,struct qinst * flags_inst)90 vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
91               struct partial_update_state *partial_update, struct qinst *inst,
92               struct qinst *flags_inst)
93 {
94         if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
95                 return;
96 
97         int var = vir_reg_to_var(inst->dst);
98         if (var == -1)
99                 return;
100 
101         c->temp_start[var] = MIN2(c->temp_start[var], ip);
102         c->temp_end[var] = MAX2(c->temp_end[var], ip);
103 
104         /* Mark the block as having a (partial) def of the var. */
105         BITSET_SET(block->defout, var);
106 
107         /* If we've already tracked this as a def that screens off previous
108          * uses, or already used it within the block, there's nothing to do.
109          */
110         if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
111                 return;
112 
113         /* Easy, common case: unconditional full register update.*/
114         if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
115              inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&
116             inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
117             inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
118                 BITSET_SET(block->def, var);
119                 return;
120         }
121 
122         /* Keep track of conditional writes.
123          *
124          * Notice that the dst's live range for a conditional or partial writes
125          * will get extended up the control flow to the top of the program until
126          * we find a full write, making register allocation more difficult, so
127          * we should try our best to keep track of these and figure out if a
128          * combination of them actually writes the entire register so we can
129          * stop that process early and reduce liveness.
130          *
131          * FIXME: Track partial updates via pack/unpack.
132          */
133         struct partial_update_state *state = &partial_update[var];
134         if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
135             inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
136                 state->inst = inst;
137                 state->flags_inst = flags_inst;
138         }
139 }
140 
141 /* Sets up the def/use arrays for when variables are used-before-defined or
142  * defined-before-used in the block.
143  *
144  * Also initializes the temp_start/temp_end to cover just the instruction IPs
145  * where the variable is used, which will be extended later in
146  * vir_compute_start_end().
147  */
148 static void
vir_setup_def_use(struct v3d_compile * c)149 vir_setup_def_use(struct v3d_compile *c)
150 {
151         struct partial_update_state *partial_update =
152                 rzalloc_array(c, struct partial_update_state, c->num_temps);
153         int ip = 0;
154 
155         vir_for_each_block(block, c) {
156                 block->start_ip = ip;
157 
158                 memset(partial_update, 0,
159                        sizeof(struct partial_update_state) * c->num_temps);
160 
161                 struct qinst *flags_inst = NULL;
162 
163                 vir_for_each_inst(inst, block) {
164                         for (int i = 0; i < vir_get_nsrc(inst); i++) {
165                                 vir_setup_use(c, block, ip, partial_update,
166                                               inst, inst->src[i], flags_inst);
167                         }
168 
169                         vir_setup_def(c, block, ip, partial_update,
170                                       inst, flags_inst);
171 
172                         if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
173                             inst->qpu.flags.mpf != V3D_QPU_PF_NONE) {
174                                flags_inst = inst;
175                         }
176 
177                         if (inst->qpu.flags.auf != V3D_QPU_UF_NONE ||
178                             inst->qpu.flags.muf != V3D_QPU_UF_NONE) {
179                                 flags_inst = NULL;
180                         }
181 
182                         /* Payload registers: for fragment shaders, W,
183                          * centroid W, and Z will be initialized in r0/1/2
184                          * until v42, or r1/r2/r3 since v71.
185                          *
186                          * For compute shaders, payload is in r0/r2 up to v42,
187                          * r2/r3 since v71.
188                          *
189                          * Register allocation will force their nodes to those
190                          * registers.
191                          */
192                         if (inst->src[0].file == QFILE_REG) {
193                                 uint32_t min_payload_r = c->devinfo->ver >= 71 ? 1 : 0;
194                                 uint32_t max_payload_r = c->devinfo->ver >= 71 ? 3 : 2;
195                                 if (inst->src[0].index >= min_payload_r ||
196                                     inst->src[0].index <= max_payload_r) {
197                                         c->temp_start[inst->dst.index] = 0;
198                                 }
199                         }
200 
201                         ip++;
202                 }
203                 block->end_ip = ip;
204         }
205 
206         ralloc_free(partial_update);
207 }
208 
209 static bool
vir_live_variables_dataflow(struct v3d_compile * c,int bitset_words)210 vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
211 {
212         bool cont = false;
213 
214         vir_for_each_block_rev(block, c) {
215                 /* Update live_out: Any successor using the variable
216                  * on entrance needs us to have the variable live on
217                  * exit.
218                  */
219                 vir_for_each_successor(succ, block) {
220                         for (int i = 0; i < bitset_words; i++) {
221                                 BITSET_WORD new_live_out = (succ->live_in[i] &
222                                                             ~block->live_out[i]);
223                                 if (new_live_out) {
224                                         block->live_out[i] |= new_live_out;
225                                         cont = true;
226                                 }
227                         }
228                 }
229 
230                 /* Update live_in */
231                 for (int i = 0; i < bitset_words; i++) {
232                         BITSET_WORD new_live_in = (block->use[i] |
233                                                    (block->live_out[i] &
234                                                     ~block->def[i]));
235                         if (new_live_in & ~block->live_in[i]) {
236                                 block->live_in[i] |= new_live_in;
237                                 cont = true;
238                         }
239                 }
240         }
241 
242         return cont;
243 }
244 
245 static bool
vir_live_variables_defin_defout_dataflow(struct v3d_compile * c,int bitset_words)246 vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
247 {
248         bool cont = false;
249 
250         vir_for_each_block_rev(block, c) {
251                 /* Propagate defin/defout down the successors to produce the
252                  * union of blocks with a reachable (partial) definition of
253                  * the var.
254                  *
255                  * This keeps a conditional first write to a reg from
256                  * extending its lifetime back to the start of the program.
257                  */
258                 vir_for_each_successor(succ, block) {
259                         for (int i = 0; i < bitset_words; i++) {
260                                 BITSET_WORD new_def = (block->defout[i] &
261                                                        ~succ->defin[i]);
262                                 succ->defin[i] |= new_def;
263                                 succ->defout[i] |= new_def;
264                                 cont |= new_def;
265                         }
266                 }
267         }
268 
269         return cont;
270 }
271 
272 /**
273  * Extend the start/end ranges for each variable to account for the
274  * new information calculated from control flow.
275  */
276 static void
vir_compute_start_end(struct v3d_compile * c,int num_vars)277 vir_compute_start_end(struct v3d_compile *c, int num_vars)
278 {
279         vir_for_each_block(block, c) {
280                 for (int i = 0; i < num_vars; i++) {
281                         if (BITSET_TEST(block->live_in, i) &&
282                             BITSET_TEST(block->defin, i)) {
283                                 c->temp_start[i] = MIN2(c->temp_start[i],
284                                                         block->start_ip);
285                                 c->temp_end[i] = MAX2(c->temp_end[i],
286                                                       block->start_ip);
287                         }
288 
289                         if (BITSET_TEST(block->live_out, i) &&
290                             BITSET_TEST(block->defout, i)) {
291                                 c->temp_start[i] = MIN2(c->temp_start[i],
292                                                         block->end_ip);
293                                 c->temp_end[i] = MAX2(c->temp_end[i],
294                                                       block->end_ip);
295                         }
296                 }
297         }
298 }
299 
300 void
vir_calculate_live_intervals(struct v3d_compile * c)301 vir_calculate_live_intervals(struct v3d_compile *c)
302 {
303         int bitset_words = BITSET_WORDS(c->num_temps);
304 
305         /* We may be called more than once if we've rearranged the program to
306          * try to get register allocation to succeed.
307          */
308         if (c->temp_start) {
309                 ralloc_free(c->temp_start);
310                 ralloc_free(c->temp_end);
311 
312                 vir_for_each_block(block, c) {
313                         ralloc_free(block->def);
314                         ralloc_free(block->defin);
315                         ralloc_free(block->defout);
316                         ralloc_free(block->use);
317                         ralloc_free(block->live_in);
318                         ralloc_free(block->live_out);
319                 }
320         }
321 
322         c->temp_start = rzalloc_array(c, int, c->num_temps);
323         c->temp_end = rzalloc_array(c, int, c->num_temps);
324 
325         for (int i = 0; i < c->num_temps; i++) {
326                 c->temp_start[i] = MAX_INSTRUCTION;
327                 c->temp_end[i] = -1;
328         }
329 
330         vir_for_each_block(block, c) {
331                 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
332                 block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
333                 block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
334                 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
335                 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
336                 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
337         }
338 
339         vir_setup_def_use(c);
340 
341         while (vir_live_variables_dataflow(c, bitset_words))
342                 ;
343 
344         while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
345                 ;
346 
347         vir_compute_start_end(c, c->num_temps);
348 
349         c->live_intervals_valid = true;
350 }
351