xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/bi_pressure_schedule.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2022 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <[email protected]>
25  */
26 
27 /* Bottom-up local scheduler to reduce register pressure */
28 
29 #include "util/dag.h"
30 #include "compiler.h"
31 
32 struct sched_ctx {
33    /* Dependency graph */
34    struct dag *dag;
35 
36    /* Live set */
37    BITSET_WORD *live;
38 };
39 
40 struct sched_node {
41    struct dag_node dag;
42 
43    /* Instruction this node represents */
44    bi_instr *instr;
45 };
46 
47 static void
add_dep(struct sched_node * a,struct sched_node * b)48 add_dep(struct sched_node *a, struct sched_node *b)
49 {
50    if (a && b)
51       dag_add_edge(&a->dag, &b->dag, 0);
52 }
53 
54 static struct dag *
create_dag(bi_context * ctx,bi_block * block,void * memctx)55 create_dag(bi_context *ctx, bi_block *block, void *memctx)
56 {
57    struct dag *dag = dag_create(ctx);
58 
59    struct sched_node **last_write =
60       calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
61    struct sched_node *coverage = NULL;
62    struct sched_node *preload = NULL;
63 
64    /* Last memory load, to serialize stores against */
65    struct sched_node *memory_load = NULL;
66 
67    /* Last memory store, to serialize loads and stores against */
68    struct sched_node *memory_store = NULL;
69 
70    bi_foreach_instr_in_block(block, I) {
71       /* Leave branches at the end */
72       if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
73          break;
74 
75       assert(I->branch_target == NULL);
76 
77       struct sched_node *node = rzalloc(memctx, struct sched_node);
78       node->instr = I;
79       dag_init_node(dag, &node->dag);
80 
81       /* Reads depend on writes, no other hazards in SSA */
82       bi_foreach_ssa_src(I, s)
83          add_dep(node, last_write[I->src[s].value]);
84 
85       bi_foreach_dest(I, d)
86          last_write[I->dest[d].value] = node;
87 
88       switch (bi_opcode_props[I->op].message) {
89       case BIFROST_MESSAGE_LOAD:
90          /* Regular memory loads needs to be serialized against
91           * other memory access. However, UBO memory is read-only
92           * so it can be moved around freely.
93           */
94          if (I->seg != BI_SEG_UBO) {
95             add_dep(node, memory_store);
96             memory_load = node;
97          }
98 
99          break;
100 
101       case BIFROST_MESSAGE_ATTRIBUTE:
102          /* Regular attribute loads can be reordered, but
103           * writeable attributes can't be. Our one use of
104           * writeable attributes are images.
105           */
106          if ((I->op == BI_OPCODE_LD_TEX) || (I->op == BI_OPCODE_LD_TEX_IMM) ||
107              (I->op == BI_OPCODE_LD_ATTR_TEX)) {
108             add_dep(node, memory_store);
109             memory_load = node;
110          }
111 
112          break;
113 
114       case BIFROST_MESSAGE_STORE:
115          assert(I->seg != BI_SEG_UBO);
116          add_dep(node, memory_load);
117          add_dep(node, memory_store);
118          memory_store = node;
119          break;
120 
121       case BIFROST_MESSAGE_ATOMIC:
122       case BIFROST_MESSAGE_BARRIER:
123          add_dep(node, memory_load);
124          add_dep(node, memory_store);
125          memory_load = node;
126          memory_store = node;
127          break;
128 
129       case BIFROST_MESSAGE_BLEND:
130       case BIFROST_MESSAGE_Z_STENCIL:
131       case BIFROST_MESSAGE_TILE:
132          add_dep(node, coverage);
133          coverage = node;
134          break;
135 
136       case BIFROST_MESSAGE_ATEST:
137          /* ATEST signals the end of shader side effects */
138          add_dep(node, memory_store);
139          memory_store = node;
140 
141          /* ATEST also updates coverage */
142          add_dep(node, coverage);
143          coverage = node;
144          break;
145       default:
146          break;
147       }
148 
149       add_dep(node, preload);
150 
151       if (I->op == BI_OPCODE_DISCARD_F32) {
152          /* Serialize against ATEST */
153          add_dep(node, coverage);
154          coverage = node;
155 
156          /* Also serialize against memory and barriers */
157          add_dep(node, memory_load);
158          add_dep(node, memory_store);
159          memory_load = node;
160          memory_store = node;
161       } else if ((I->op == BI_OPCODE_PHI) ||
162                  (I->op == BI_OPCODE_MOV_I32 &&
163                   I->src[0].type == BI_INDEX_REGISTER)) {
164          preload = node;
165       }
166    }
167 
168    free(last_write);
169 
170    return dag;
171 }
172 
173 /*
174  * Calculate the change in register pressure from scheduling a given
175  * instruction. Equivalently, calculate the difference in the number of live
176  * registers before and after the instruction, given the live set after the
177  * instruction. This calculation follows immediately from the dataflow
178  * definition of liveness:
179  *
180  *      live_in = (live_out - KILL) + GEN
181  */
182 static signed
calculate_pressure_delta(bi_instr * I,BITSET_WORD * live)183 calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
184 {
185    signed delta = 0;
186 
187    /* Destinations must be unique */
188    bi_foreach_dest(I, d) {
189       if (BITSET_TEST(live, I->dest[d].value))
190          delta -= bi_count_write_registers(I, d);
191    }
192 
193    bi_foreach_ssa_src(I, src) {
194       /* Filter duplicates */
195       bool dupe = false;
196 
197       for (unsigned i = 0; i < src; ++i) {
198          if (bi_is_equiv(I->src[i], I->src[src])) {
199             dupe = true;
200             break;
201          }
202       }
203 
204       if (!dupe && !BITSET_TEST(live, I->src[src].value))
205          delta += bi_count_read_registers(I, src);
206    }
207 
208    return delta;
209 }
210 
211 /*
212  * Choose the next instruction, bottom-up. For now we use a simple greedy
213  * heuristic: choose the instruction that has the best effect on liveness.
214  */
215 static struct sched_node *
choose_instr(struct sched_ctx * s)216 choose_instr(struct sched_ctx *s)
217 {
218    int32_t min_delta = INT32_MAX;
219    struct sched_node *best = NULL;
220 
221    list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
222       int32_t delta = calculate_pressure_delta(n->instr, s->live);
223 
224       if (delta < min_delta) {
225          best = n;
226          min_delta = delta;
227       }
228    }
229 
230    return best;
231 }
232 
233 static void
pressure_schedule_block(bi_context * ctx,bi_block * block,struct sched_ctx * s)234 pressure_schedule_block(bi_context *ctx, bi_block *block, struct sched_ctx *s)
235 {
236    /* off by a constant, that's ok */
237    signed pressure = 0;
238    signed orig_max_pressure = 0;
239    unsigned nr_ins = 0;
240 
241    memcpy(s->live, block->ssa_live_out,
242           BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
243 
244    bi_foreach_instr_in_block_rev(block, I) {
245       pressure += calculate_pressure_delta(I, s->live);
246       orig_max_pressure = MAX2(pressure, orig_max_pressure);
247       bi_liveness_ins_update_ssa(s->live, I);
248       nr_ins++;
249    }
250 
251    memcpy(s->live, block->ssa_live_out,
252           BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
253 
254    /* off by a constant, that's ok */
255    signed max_pressure = 0;
256    pressure = 0;
257 
258    struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
259    nr_ins = 0;
260 
261    while (!list_is_empty(&s->dag->heads)) {
262       struct sched_node *node = choose_instr(s);
263       pressure += calculate_pressure_delta(node->instr, s->live);
264       max_pressure = MAX2(pressure, max_pressure);
265       dag_prune_head(s->dag, &node->dag);
266 
267       schedule[nr_ins++] = node;
268       bi_liveness_ins_update_ssa(s->live, node->instr);
269    }
270 
271    /* Bail if it looks like it's worse */
272    if (max_pressure >= orig_max_pressure) {
273       free(schedule);
274       return;
275    }
276 
277    /* Apply the schedule */
278    for (unsigned i = 0; i < nr_ins; ++i) {
279       bi_remove_instruction(schedule[i]->instr);
280       list_add(&schedule[i]->instr->link, &block->instructions);
281    }
282 
283    free(schedule);
284 }
285 
286 void
bi_pressure_schedule(bi_context * ctx)287 bi_pressure_schedule(bi_context *ctx)
288 {
289    bi_compute_liveness_ssa(ctx);
290    void *memctx = ralloc_context(ctx);
291    BITSET_WORD *live =
292       ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
293 
294    bi_foreach_block(ctx, block) {
295       struct sched_ctx sctx = {.dag = create_dag(ctx, block, memctx),
296                                .live = live};
297 
298       pressure_schedule_block(ctx, block, &sctx);
299    }
300 
301    ralloc_free(memctx);
302 }
303