xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_postsched.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2019 Google, Inc.
3*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker  *
5*61046927SAndroid Build Coastguard Worker  * Authors:
6*61046927SAndroid Build Coastguard Worker  *    Rob Clark <[email protected]>
7*61046927SAndroid Build Coastguard Worker  */
8*61046927SAndroid Build Coastguard Worker 
9*61046927SAndroid Build Coastguard Worker #include "util/dag.h"
10*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
11*61046927SAndroid Build Coastguard Worker 
12*61046927SAndroid Build Coastguard Worker #include "ir3.h"
13*61046927SAndroid Build Coastguard Worker #include "ir3_compiler.h"
14*61046927SAndroid Build Coastguard Worker #include "ir3_context.h"
15*61046927SAndroid Build Coastguard Worker 
16*61046927SAndroid Build Coastguard Worker #if MESA_DEBUG
17*61046927SAndroid Build Coastguard Worker #define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
18*61046927SAndroid Build Coastguard Worker #else
19*61046927SAndroid Build Coastguard Worker #define SCHED_DEBUG 0
20*61046927SAndroid Build Coastguard Worker #endif
21*61046927SAndroid Build Coastguard Worker #define d(fmt, ...)                                                            \
22*61046927SAndroid Build Coastguard Worker    do {                                                                        \
23*61046927SAndroid Build Coastguard Worker       if (SCHED_DEBUG) {                                                       \
24*61046927SAndroid Build Coastguard Worker          mesa_logi("PSCHED: " fmt, ##__VA_ARGS__);                             \
25*61046927SAndroid Build Coastguard Worker       }                                                                        \
26*61046927SAndroid Build Coastguard Worker    } while (0)
27*61046927SAndroid Build Coastguard Worker 
28*61046927SAndroid Build Coastguard Worker #define di(instr, fmt, ...)                                                    \
29*61046927SAndroid Build Coastguard Worker    do {                                                                        \
30*61046927SAndroid Build Coastguard Worker       if (SCHED_DEBUG) {                                                       \
31*61046927SAndroid Build Coastguard Worker          struct log_stream *stream = mesa_log_streami();                       \
32*61046927SAndroid Build Coastguard Worker          mesa_log_stream_printf(stream, "PSCHED: " fmt ": ", ##__VA_ARGS__);   \
33*61046927SAndroid Build Coastguard Worker          ir3_print_instr_stream(stream, instr);                                \
34*61046927SAndroid Build Coastguard Worker          mesa_log_stream_destroy(stream);                                      \
35*61046927SAndroid Build Coastguard Worker       }                                                                        \
36*61046927SAndroid Build Coastguard Worker    } while (0)
37*61046927SAndroid Build Coastguard Worker 
38*61046927SAndroid Build Coastguard Worker #define SCHED_DEBUG_DUMP_DEPTH 1
39*61046927SAndroid Build Coastguard Worker 
40*61046927SAndroid Build Coastguard Worker /*
41*61046927SAndroid Build Coastguard Worker  * Post RA Instruction Scheduling
42*61046927SAndroid Build Coastguard Worker  */
43*61046927SAndroid Build Coastguard Worker 
44*61046927SAndroid Build Coastguard Worker struct ir3_postsched_ctx {
45*61046927SAndroid Build Coastguard Worker    struct ir3 *ir;
46*61046927SAndroid Build Coastguard Worker 
47*61046927SAndroid Build Coastguard Worker    struct ir3_shader_variant *v;
48*61046927SAndroid Build Coastguard Worker 
49*61046927SAndroid Build Coastguard Worker    void *mem_ctx;
50*61046927SAndroid Build Coastguard Worker    struct ir3_block *block; /* the current block */
51*61046927SAndroid Build Coastguard Worker    struct dag *dag;
52*61046927SAndroid Build Coastguard Worker 
53*61046927SAndroid Build Coastguard Worker    struct list_head unscheduled_list; /* unscheduled instructions */
54*61046927SAndroid Build Coastguard Worker 
55*61046927SAndroid Build Coastguard Worker    unsigned ip;
56*61046927SAndroid Build Coastguard Worker 
57*61046927SAndroid Build Coastguard Worker    int ss_delay;
58*61046927SAndroid Build Coastguard Worker    int sy_delay;
59*61046927SAndroid Build Coastguard Worker };
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node {
62*61046927SAndroid Build Coastguard Worker    struct dag_node dag; /* must be first for util_dynarray_foreach */
63*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *instr;
64*61046927SAndroid Build Coastguard Worker    bool partially_evaluated_path;
65*61046927SAndroid Build Coastguard Worker 
66*61046927SAndroid Build Coastguard Worker    unsigned earliest_ip;
67*61046927SAndroid Build Coastguard Worker 
68*61046927SAndroid Build Coastguard Worker    bool has_sy_src, has_ss_src;
69*61046927SAndroid Build Coastguard Worker 
70*61046927SAndroid Build Coastguard Worker    unsigned max_delay;
71*61046927SAndroid Build Coastguard Worker };
72*61046927SAndroid Build Coastguard Worker 
73*61046927SAndroid Build Coastguard Worker #define foreach_sched_node(__n, __list)                                        \
74*61046927SAndroid Build Coastguard Worker    list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link)
75*61046927SAndroid Build Coastguard Worker 
76*61046927SAndroid Build Coastguard Worker static bool
has_sy_src(struct ir3_instruction * instr)77*61046927SAndroid Build Coastguard Worker has_sy_src(struct ir3_instruction *instr)
78*61046927SAndroid Build Coastguard Worker {
79*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *node = instr->data;
80*61046927SAndroid Build Coastguard Worker    return node->has_sy_src;
81*61046927SAndroid Build Coastguard Worker }
82*61046927SAndroid Build Coastguard Worker 
83*61046927SAndroid Build Coastguard Worker static bool
has_ss_src(struct ir3_instruction * instr)84*61046927SAndroid Build Coastguard Worker has_ss_src(struct ir3_instruction *instr)
85*61046927SAndroid Build Coastguard Worker {
86*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *node = instr->data;
87*61046927SAndroid Build Coastguard Worker    return node->has_ss_src;
88*61046927SAndroid Build Coastguard Worker }
89*61046927SAndroid Build Coastguard Worker 
90*61046927SAndroid Build Coastguard Worker #ifndef NDEBUG
91*61046927SAndroid Build Coastguard Worker static void
sched_dag_validate_cb(const struct dag_node * node,void * data)92*61046927SAndroid Build Coastguard Worker sched_dag_validate_cb(const struct dag_node *node, void *data)
93*61046927SAndroid Build Coastguard Worker {
94*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *n = (struct ir3_postsched_node *)node;
95*61046927SAndroid Build Coastguard Worker 
96*61046927SAndroid Build Coastguard Worker    ir3_print_instr(n->instr);
97*61046927SAndroid Build Coastguard Worker }
98*61046927SAndroid Build Coastguard Worker #endif
99*61046927SAndroid Build Coastguard Worker 
100*61046927SAndroid Build Coastguard Worker static void
schedule(struct ir3_postsched_ctx * ctx,struct ir3_instruction * instr)101*61046927SAndroid Build Coastguard Worker schedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
102*61046927SAndroid Build Coastguard Worker {
103*61046927SAndroid Build Coastguard Worker    assert(ctx->block == instr->block);
104*61046927SAndroid Build Coastguard Worker 
105*61046927SAndroid Build Coastguard Worker    /* remove from unscheduled_list:
106*61046927SAndroid Build Coastguard Worker     */
107*61046927SAndroid Build Coastguard Worker    list_delinit(&instr->node);
108*61046927SAndroid Build Coastguard Worker 
109*61046927SAndroid Build Coastguard Worker    di(instr, "schedule");
110*61046927SAndroid Build Coastguard Worker 
111*61046927SAndroid Build Coastguard Worker    bool counts_for_delay = is_alu(instr) || is_flow(instr);
112*61046927SAndroid Build Coastguard Worker 
113*61046927SAndroid Build Coastguard Worker    unsigned delay_cycles = counts_for_delay ? 1 + instr->repeat : 0;
114*61046927SAndroid Build Coastguard Worker 
115*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *n = instr->data;
116*61046927SAndroid Build Coastguard Worker 
117*61046927SAndroid Build Coastguard Worker    /* We insert any nop's needed to get to earliest_ip, then advance
118*61046927SAndroid Build Coastguard Worker     * delay_cycles by scheduling the instruction.
119*61046927SAndroid Build Coastguard Worker     */
120*61046927SAndroid Build Coastguard Worker    ctx->ip = MAX2(ctx->ip, n->earliest_ip) + delay_cycles;
121*61046927SAndroid Build Coastguard Worker 
122*61046927SAndroid Build Coastguard Worker    util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
123*61046927SAndroid Build Coastguard Worker       unsigned delay = (unsigned)(uintptr_t)edge->data;
124*61046927SAndroid Build Coastguard Worker       struct ir3_postsched_node *child =
125*61046927SAndroid Build Coastguard Worker          container_of(edge->child, struct ir3_postsched_node, dag);
126*61046927SAndroid Build Coastguard Worker       child->earliest_ip = MAX2(child->earliest_ip, ctx->ip + delay);
127*61046927SAndroid Build Coastguard Worker    }
128*61046927SAndroid Build Coastguard Worker 
129*61046927SAndroid Build Coastguard Worker    list_addtail(&instr->node, &instr->block->instr_list);
130*61046927SAndroid Build Coastguard Worker 
131*61046927SAndroid Build Coastguard Worker    dag_prune_head(ctx->dag, &n->dag);
132*61046927SAndroid Build Coastguard Worker 
133*61046927SAndroid Build Coastguard Worker    if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH))
134*61046927SAndroid Build Coastguard Worker       return;
135*61046927SAndroid Build Coastguard Worker 
136*61046927SAndroid Build Coastguard Worker    if (is_ss_producer(instr)) {
137*61046927SAndroid Build Coastguard Worker       ctx->ss_delay = soft_ss_delay(instr);
138*61046927SAndroid Build Coastguard Worker    } else if (has_ss_src(instr)) {
139*61046927SAndroid Build Coastguard Worker       ctx->ss_delay = 0;
140*61046927SAndroid Build Coastguard Worker    } else if (ctx->ss_delay > 0) {
141*61046927SAndroid Build Coastguard Worker       ctx->ss_delay--;
142*61046927SAndroid Build Coastguard Worker    }
143*61046927SAndroid Build Coastguard Worker 
144*61046927SAndroid Build Coastguard Worker    if (is_sy_producer(instr)) {
145*61046927SAndroid Build Coastguard Worker       ctx->sy_delay = soft_sy_delay(instr, ctx->block->shader);
146*61046927SAndroid Build Coastguard Worker    } else if (has_sy_src(instr)) {
147*61046927SAndroid Build Coastguard Worker       ctx->sy_delay = 0;
148*61046927SAndroid Build Coastguard Worker    } else if (ctx->sy_delay > 0) {
149*61046927SAndroid Build Coastguard Worker       ctx->sy_delay--;
150*61046927SAndroid Build Coastguard Worker    }
151*61046927SAndroid Build Coastguard Worker }
152*61046927SAndroid Build Coastguard Worker 
153*61046927SAndroid Build Coastguard Worker static unsigned
node_delay(struct ir3_postsched_ctx * ctx,struct ir3_postsched_node * n)154*61046927SAndroid Build Coastguard Worker node_delay(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n)
155*61046927SAndroid Build Coastguard Worker {
156*61046927SAndroid Build Coastguard Worker    return MAX2(n->earliest_ip, ctx->ip) - ctx->ip;
157*61046927SAndroid Build Coastguard Worker }
158*61046927SAndroid Build Coastguard Worker 
159*61046927SAndroid Build Coastguard Worker static unsigned
node_delay_soft(struct ir3_postsched_ctx * ctx,struct ir3_postsched_node * n)160*61046927SAndroid Build Coastguard Worker node_delay_soft(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n)
161*61046927SAndroid Build Coastguard Worker {
162*61046927SAndroid Build Coastguard Worker    unsigned delay = node_delay(ctx, n);
163*61046927SAndroid Build Coastguard Worker 
164*61046927SAndroid Build Coastguard Worker    /* This takes into account that as when we schedule multiple tex or sfu, the
165*61046927SAndroid Build Coastguard Worker     * first user has to wait for all of them to complete.
166*61046927SAndroid Build Coastguard Worker     */
167*61046927SAndroid Build Coastguard Worker    if (n->has_ss_src)
168*61046927SAndroid Build Coastguard Worker       delay = MAX2(delay, ctx->ss_delay);
169*61046927SAndroid Build Coastguard Worker    if (n->has_sy_src)
170*61046927SAndroid Build Coastguard Worker       delay = MAX2(delay, ctx->sy_delay);
171*61046927SAndroid Build Coastguard Worker 
172*61046927SAndroid Build Coastguard Worker    return delay;
173*61046927SAndroid Build Coastguard Worker }
174*61046927SAndroid Build Coastguard Worker 
175*61046927SAndroid Build Coastguard Worker static void
dump_node(struct ir3_postsched_ctx * ctx,struct ir3_postsched_node * n,int level)176*61046927SAndroid Build Coastguard Worker dump_node(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n,
177*61046927SAndroid Build Coastguard Worker           int level)
178*61046927SAndroid Build Coastguard Worker {
179*61046927SAndroid Build Coastguard Worker    if (level > SCHED_DEBUG_DUMP_DEPTH)
180*61046927SAndroid Build Coastguard Worker       return;
181*61046927SAndroid Build Coastguard Worker 
182*61046927SAndroid Build Coastguard Worker    di(n->instr, "%*s%smaxdel=%d, node_delay=%d,node_delay_soft=%d, %d parents ",
183*61046927SAndroid Build Coastguard Worker       level * 2, "", (level > 0 ? "-> " : ""), n->max_delay, node_delay(ctx, n),
184*61046927SAndroid Build Coastguard Worker       node_delay_soft(ctx, n), n->dag.parent_count);
185*61046927SAndroid Build Coastguard Worker 
186*61046927SAndroid Build Coastguard Worker    util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
187*61046927SAndroid Build Coastguard Worker       struct ir3_postsched_node *child =
188*61046927SAndroid Build Coastguard Worker          (struct ir3_postsched_node *)edge->child;
189*61046927SAndroid Build Coastguard Worker 
190*61046927SAndroid Build Coastguard Worker       dump_node(ctx, child, level + 1);
191*61046927SAndroid Build Coastguard Worker    }
192*61046927SAndroid Build Coastguard Worker }
193*61046927SAndroid Build Coastguard Worker 
194*61046927SAndroid Build Coastguard Worker static void
dump_state(struct ir3_postsched_ctx * ctx)195*61046927SAndroid Build Coastguard Worker dump_state(struct ir3_postsched_ctx *ctx)
196*61046927SAndroid Build Coastguard Worker {
197*61046927SAndroid Build Coastguard Worker    if (!SCHED_DEBUG)
198*61046927SAndroid Build Coastguard Worker       return;
199*61046927SAndroid Build Coastguard Worker 
200*61046927SAndroid Build Coastguard Worker    foreach_sched_node (n, &ctx->dag->heads) {
201*61046927SAndroid Build Coastguard Worker       dump_node(ctx, n, 0);
202*61046927SAndroid Build Coastguard Worker    }
203*61046927SAndroid Build Coastguard Worker }
204*61046927SAndroid Build Coastguard Worker 
205*61046927SAndroid Build Coastguard Worker /* find instruction to schedule: */
206*61046927SAndroid Build Coastguard Worker static struct ir3_instruction *
choose_instr(struct ir3_postsched_ctx * ctx)207*61046927SAndroid Build Coastguard Worker choose_instr(struct ir3_postsched_ctx *ctx)
208*61046927SAndroid Build Coastguard Worker {
209*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *chosen = NULL;
210*61046927SAndroid Build Coastguard Worker 
211*61046927SAndroid Build Coastguard Worker    dump_state(ctx);
212*61046927SAndroid Build Coastguard Worker 
213*61046927SAndroid Build Coastguard Worker    foreach_sched_node (n, &ctx->dag->heads) {
214*61046927SAndroid Build Coastguard Worker       if (!is_meta(n->instr))
215*61046927SAndroid Build Coastguard Worker          continue;
216*61046927SAndroid Build Coastguard Worker 
217*61046927SAndroid Build Coastguard Worker       if (!chosen || (chosen->max_delay < n->max_delay))
218*61046927SAndroid Build Coastguard Worker          chosen = n;
219*61046927SAndroid Build Coastguard Worker    }
220*61046927SAndroid Build Coastguard Worker 
221*61046927SAndroid Build Coastguard Worker    if (chosen) {
222*61046927SAndroid Build Coastguard Worker       di(chosen->instr, "prio: chose (meta)");
223*61046927SAndroid Build Coastguard Worker       return chosen->instr;
224*61046927SAndroid Build Coastguard Worker    }
225*61046927SAndroid Build Coastguard Worker 
226*61046927SAndroid Build Coastguard Worker    /* Try to schedule inputs with a higher priority, if possible, as
227*61046927SAndroid Build Coastguard Worker     * the last bary.f unlocks varying storage to unblock more VS
228*61046927SAndroid Build Coastguard Worker     * warps.
229*61046927SAndroid Build Coastguard Worker     */
230*61046927SAndroid Build Coastguard Worker    foreach_sched_node (n, &ctx->dag->heads) {
231*61046927SAndroid Build Coastguard Worker       if (!is_input(n->instr))
232*61046927SAndroid Build Coastguard Worker          continue;
233*61046927SAndroid Build Coastguard Worker 
234*61046927SAndroid Build Coastguard Worker       if (!chosen || (chosen->max_delay < n->max_delay))
235*61046927SAndroid Build Coastguard Worker          chosen = n;
236*61046927SAndroid Build Coastguard Worker    }
237*61046927SAndroid Build Coastguard Worker 
238*61046927SAndroid Build Coastguard Worker    if (chosen) {
239*61046927SAndroid Build Coastguard Worker       di(chosen->instr, "prio: chose (input)");
240*61046927SAndroid Build Coastguard Worker       return chosen->instr;
241*61046927SAndroid Build Coastguard Worker    }
242*61046927SAndroid Build Coastguard Worker 
243*61046927SAndroid Build Coastguard Worker    /* Next prioritize discards: */
244*61046927SAndroid Build Coastguard Worker    foreach_sched_node (n, &ctx->dag->heads) {
245*61046927SAndroid Build Coastguard Worker       unsigned d = node_delay(ctx, n);
246*61046927SAndroid Build Coastguard Worker 
247*61046927SAndroid Build Coastguard Worker       if (d > 0)
248*61046927SAndroid Build Coastguard Worker          continue;
249*61046927SAndroid Build Coastguard Worker 
250*61046927SAndroid Build Coastguard Worker       if (!is_kill_or_demote(n->instr))
251*61046927SAndroid Build Coastguard Worker          continue;
252*61046927SAndroid Build Coastguard Worker 
253*61046927SAndroid Build Coastguard Worker       if (!chosen || (chosen->max_delay < n->max_delay))
254*61046927SAndroid Build Coastguard Worker          chosen = n;
255*61046927SAndroid Build Coastguard Worker    }
256*61046927SAndroid Build Coastguard Worker 
257*61046927SAndroid Build Coastguard Worker    if (chosen) {
258*61046927SAndroid Build Coastguard Worker       di(chosen->instr, "csp: chose (kill, hard ready)");
259*61046927SAndroid Build Coastguard Worker       return chosen->instr;
260*61046927SAndroid Build Coastguard Worker    }
261*61046927SAndroid Build Coastguard Worker 
262*61046927SAndroid Build Coastguard Worker    /* Next prioritize expensive instructions: */
263*61046927SAndroid Build Coastguard Worker    foreach_sched_node (n, &ctx->dag->heads) {
264*61046927SAndroid Build Coastguard Worker       unsigned d = node_delay_soft(ctx, n);
265*61046927SAndroid Build Coastguard Worker 
266*61046927SAndroid Build Coastguard Worker       if (d > 0)
267*61046927SAndroid Build Coastguard Worker          continue;
268*61046927SAndroid Build Coastguard Worker 
269*61046927SAndroid Build Coastguard Worker       if (!(is_ss_producer(n->instr) || is_sy_producer(n->instr)))
270*61046927SAndroid Build Coastguard Worker          continue;
271*61046927SAndroid Build Coastguard Worker 
272*61046927SAndroid Build Coastguard Worker       if (!chosen || (chosen->max_delay < n->max_delay))
273*61046927SAndroid Build Coastguard Worker          chosen = n;
274*61046927SAndroid Build Coastguard Worker    }
275*61046927SAndroid Build Coastguard Worker 
276*61046927SAndroid Build Coastguard Worker    if (chosen) {
277*61046927SAndroid Build Coastguard Worker       di(chosen->instr, "csp: chose (sfu/tex, soft ready)");
278*61046927SAndroid Build Coastguard Worker       return chosen->instr;
279*61046927SAndroid Build Coastguard Worker    }
280*61046927SAndroid Build Coastguard Worker 
281*61046927SAndroid Build Coastguard Worker    /* Next try to find a ready leader w/ soft delay (ie. including extra
282*61046927SAndroid Build Coastguard Worker     * delay for things like tex fetch which can be synchronized w/ sync
283*61046927SAndroid Build Coastguard Worker     * bit (but we probably do want to schedule some other instructions
284*61046927SAndroid Build Coastguard Worker     * while we wait). We also allow a small amount of nops, to prefer now-nops
285*61046927SAndroid Build Coastguard Worker     * over future-nops up to a point, as that gives better results.
286*61046927SAndroid Build Coastguard Worker     */
287*61046927SAndroid Build Coastguard Worker    unsigned chosen_delay = 0;
288*61046927SAndroid Build Coastguard Worker    foreach_sched_node (n, &ctx->dag->heads) {
289*61046927SAndroid Build Coastguard Worker       unsigned d = node_delay_soft(ctx, n);
290*61046927SAndroid Build Coastguard Worker 
291*61046927SAndroid Build Coastguard Worker       if (d > 3)
292*61046927SAndroid Build Coastguard Worker          continue;
293*61046927SAndroid Build Coastguard Worker 
294*61046927SAndroid Build Coastguard Worker       if (!chosen || d < chosen_delay) {
295*61046927SAndroid Build Coastguard Worker          chosen = n;
296*61046927SAndroid Build Coastguard Worker          chosen_delay = d;
297*61046927SAndroid Build Coastguard Worker          continue;
298*61046927SAndroid Build Coastguard Worker       }
299*61046927SAndroid Build Coastguard Worker 
300*61046927SAndroid Build Coastguard Worker       if (d > chosen_delay)
301*61046927SAndroid Build Coastguard Worker          continue;
302*61046927SAndroid Build Coastguard Worker 
303*61046927SAndroid Build Coastguard Worker       if (chosen->max_delay < n->max_delay) {
304*61046927SAndroid Build Coastguard Worker          chosen = n;
305*61046927SAndroid Build Coastguard Worker          chosen_delay = d;
306*61046927SAndroid Build Coastguard Worker       }
307*61046927SAndroid Build Coastguard Worker    }
308*61046927SAndroid Build Coastguard Worker 
309*61046927SAndroid Build Coastguard Worker    if (chosen) {
310*61046927SAndroid Build Coastguard Worker       di(chosen->instr, "csp: chose (soft ready)");
311*61046927SAndroid Build Coastguard Worker       return chosen->instr;
312*61046927SAndroid Build Coastguard Worker    }
313*61046927SAndroid Build Coastguard Worker 
314*61046927SAndroid Build Coastguard Worker    /* Otherwise choose leader with maximum cost:
315*61046927SAndroid Build Coastguard Worker     */
316*61046927SAndroid Build Coastguard Worker    foreach_sched_node (n, &ctx->dag->heads) {
317*61046927SAndroid Build Coastguard Worker       if (!chosen || chosen->max_delay < n->max_delay)
318*61046927SAndroid Build Coastguard Worker          chosen = n;
319*61046927SAndroid Build Coastguard Worker    }
320*61046927SAndroid Build Coastguard Worker 
321*61046927SAndroid Build Coastguard Worker    if (chosen) {
322*61046927SAndroid Build Coastguard Worker       di(chosen->instr, "csp: chose (leader)");
323*61046927SAndroid Build Coastguard Worker       return chosen->instr;
324*61046927SAndroid Build Coastguard Worker    }
325*61046927SAndroid Build Coastguard Worker 
326*61046927SAndroid Build Coastguard Worker    return NULL;
327*61046927SAndroid Build Coastguard Worker }
328*61046927SAndroid Build Coastguard Worker 
329*61046927SAndroid Build Coastguard Worker struct ir3_postsched_deps_state {
330*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_ctx *ctx;
331*61046927SAndroid Build Coastguard Worker 
332*61046927SAndroid Build Coastguard Worker    enum { F, R } direction;
333*61046927SAndroid Build Coastguard Worker 
334*61046927SAndroid Build Coastguard Worker    bool merged;
335*61046927SAndroid Build Coastguard Worker 
336*61046927SAndroid Build Coastguard Worker    /* Track the mapping between sched node (instruction) that last
337*61046927SAndroid Build Coastguard Worker     * wrote a given register (in whichever direction we are iterating
338*61046927SAndroid Build Coastguard Worker     * the block)
339*61046927SAndroid Build Coastguard Worker     *
340*61046927SAndroid Build Coastguard Worker     * Note, this table is twice as big as the # of regs, to deal with
341*61046927SAndroid Build Coastguard Worker     * half-precision regs.  The approach differs depending on whether
342*61046927SAndroid Build Coastguard Worker     * the half and full precision register files are "merged" (conflict,
343*61046927SAndroid Build Coastguard Worker     * ie. a6xx+) in which case we use "regs" for both full precision and half
344*61046927SAndroid Build Coastguard Worker     * precision dependencies and consider each full precision dep
345*61046927SAndroid Build Coastguard Worker     * as two half-precision dependencies, vs older separate (non-
346*61046927SAndroid Build Coastguard Worker     * conflicting) in which case the separate "half_regs" table is used for
347*61046927SAndroid Build Coastguard Worker     * half-precision deps. See ir3_reg_file_offset().
348*61046927SAndroid Build Coastguard Worker     */
349*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *regs[2 * GPR_REG_SIZE];
350*61046927SAndroid Build Coastguard Worker    unsigned dst_n[2 * GPR_REG_SIZE];
351*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *half_regs[GPR_REG_SIZE];
352*61046927SAndroid Build Coastguard Worker    unsigned half_dst_n[GPR_REG_SIZE];
353*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *shared_regs[2 * SHARED_REG_SIZE];
354*61046927SAndroid Build Coastguard Worker    unsigned shared_dst_n[2 * SHARED_REG_SIZE];
355*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *nongpr_regs[2 * NONGPR_REG_SIZE];
356*61046927SAndroid Build Coastguard Worker    unsigned nongpr_dst_n[2 * NONGPR_REG_SIZE];
357*61046927SAndroid Build Coastguard Worker };
358*61046927SAndroid Build Coastguard Worker 
359*61046927SAndroid Build Coastguard Worker static void
add_dep(struct ir3_postsched_deps_state * state,struct ir3_postsched_node * before,struct ir3_postsched_node * after,unsigned d)360*61046927SAndroid Build Coastguard Worker add_dep(struct ir3_postsched_deps_state *state,
361*61046927SAndroid Build Coastguard Worker         struct ir3_postsched_node *before, struct ir3_postsched_node *after,
362*61046927SAndroid Build Coastguard Worker         unsigned d)
363*61046927SAndroid Build Coastguard Worker {
364*61046927SAndroid Build Coastguard Worker    if (!before || !after)
365*61046927SAndroid Build Coastguard Worker       return;
366*61046927SAndroid Build Coastguard Worker 
367*61046927SAndroid Build Coastguard Worker    assert(before != after);
368*61046927SAndroid Build Coastguard Worker 
369*61046927SAndroid Build Coastguard Worker    if (state->direction == F) {
370*61046927SAndroid Build Coastguard Worker       dag_add_edge_max_data(&before->dag, &after->dag, (uintptr_t)d);
371*61046927SAndroid Build Coastguard Worker    } else {
372*61046927SAndroid Build Coastguard Worker       dag_add_edge_max_data(&after->dag, &before->dag, 0);
373*61046927SAndroid Build Coastguard Worker    }
374*61046927SAndroid Build Coastguard Worker }
375*61046927SAndroid Build Coastguard Worker 
376*61046927SAndroid Build Coastguard Worker static void
add_single_reg_dep(struct ir3_postsched_deps_state * state,struct ir3_postsched_node * node,struct ir3_postsched_node ** dep_ptr,unsigned * dst_n_ptr,unsigned num,int src_n,int dst_n)377*61046927SAndroid Build Coastguard Worker add_single_reg_dep(struct ir3_postsched_deps_state *state,
378*61046927SAndroid Build Coastguard Worker                    struct ir3_postsched_node *node,
379*61046927SAndroid Build Coastguard Worker                    struct ir3_postsched_node **dep_ptr,
380*61046927SAndroid Build Coastguard Worker                    unsigned *dst_n_ptr, unsigned num, int src_n,
381*61046927SAndroid Build Coastguard Worker                    int dst_n)
382*61046927SAndroid Build Coastguard Worker {
383*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *dep = *dep_ptr;
384*61046927SAndroid Build Coastguard Worker 
385*61046927SAndroid Build Coastguard Worker    unsigned d = 0;
386*61046927SAndroid Build Coastguard Worker    if (src_n >= 0 && dep && state->direction == F) {
387*61046927SAndroid Build Coastguard Worker       struct ir3_compiler *compiler = state->ctx->ir->compiler;
388*61046927SAndroid Build Coastguard Worker       /* get the dst_n this corresponds to */
389*61046927SAndroid Build Coastguard Worker       unsigned dst_n = *dst_n_ptr;
390*61046927SAndroid Build Coastguard Worker       d = ir3_delayslots_with_repeat(compiler, dep->instr, node->instr, dst_n, src_n);
391*61046927SAndroid Build Coastguard Worker       if (is_sy_producer(dep->instr))
392*61046927SAndroid Build Coastguard Worker          node->has_sy_src = true;
393*61046927SAndroid Build Coastguard Worker       if (needs_ss(compiler, dep->instr, node->instr))
394*61046927SAndroid Build Coastguard Worker          node->has_ss_src = true;
395*61046927SAndroid Build Coastguard Worker    }
396*61046927SAndroid Build Coastguard Worker 
397*61046927SAndroid Build Coastguard Worker    if (src_n >= 0 && dep && state->direction == R) {
398*61046927SAndroid Build Coastguard Worker       /* If node generates a WAR hazard (because it doesn't consume its sources
399*61046927SAndroid Build Coastguard Worker        * immediately, dep needs (ss) to sync its dest. Even though this isn't a
400*61046927SAndroid Build Coastguard Worker        * (ss) source (but rather a dest), the effect is exactly the same so we
401*61046927SAndroid Build Coastguard Worker        * model it as such.
402*61046927SAndroid Build Coastguard Worker        */
403*61046927SAndroid Build Coastguard Worker       if (is_war_hazard_producer(node->instr)) {
404*61046927SAndroid Build Coastguard Worker          dep->has_ss_src = true;
405*61046927SAndroid Build Coastguard Worker       }
406*61046927SAndroid Build Coastguard Worker    }
407*61046927SAndroid Build Coastguard Worker 
408*61046927SAndroid Build Coastguard Worker    add_dep(state, dep, node, d);
409*61046927SAndroid Build Coastguard Worker    if (src_n < 0) {
410*61046927SAndroid Build Coastguard Worker       *dep_ptr = node;
411*61046927SAndroid Build Coastguard Worker       *dst_n_ptr = dst_n;
412*61046927SAndroid Build Coastguard Worker    }
413*61046927SAndroid Build Coastguard Worker }
414*61046927SAndroid Build Coastguard Worker 
415*61046927SAndroid Build Coastguard Worker /* This is where we handled full vs half-precision, and potential conflicts
416*61046927SAndroid Build Coastguard Worker  * between half and full precision that result in additional dependencies.
417*61046927SAndroid Build Coastguard Worker  * The 'reg' arg is really just to know half vs full precision.
418*61046927SAndroid Build Coastguard Worker  *
419*61046927SAndroid Build Coastguard Worker  * If src_n is positive, then this adds a dependency on a source register, and
420*61046927SAndroid Build Coastguard Worker  * src_n is the index passed into ir3_delayslots() for calculating the delay:
421*61046927SAndroid Build Coastguard Worker  * it corresponds to node->instr->srcs[src_n]. If src_n is negative, then
422*61046927SAndroid Build Coastguard Worker  * this is for the destination register corresponding to dst_n.
423*61046927SAndroid Build Coastguard Worker  */
424*61046927SAndroid Build Coastguard Worker static void
add_reg_dep(struct ir3_postsched_deps_state * state,struct ir3_postsched_node * node,const struct ir3_register * reg,unsigned num,int src_n,int dst_n)425*61046927SAndroid Build Coastguard Worker add_reg_dep(struct ir3_postsched_deps_state *state,
426*61046927SAndroid Build Coastguard Worker             struct ir3_postsched_node *node, const struct ir3_register *reg,
427*61046927SAndroid Build Coastguard Worker             unsigned num, int src_n, int dst_n)
428*61046927SAndroid Build Coastguard Worker {
429*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node **regs;
430*61046927SAndroid Build Coastguard Worker    unsigned *dst_n_ptr;
431*61046927SAndroid Build Coastguard Worker    enum ir3_reg_file file;
432*61046927SAndroid Build Coastguard Worker    unsigned size = reg_elem_size(reg);
433*61046927SAndroid Build Coastguard Worker    unsigned offset = ir3_reg_file_offset(reg, num, state->merged, &file);
434*61046927SAndroid Build Coastguard Worker    switch (file) {
435*61046927SAndroid Build Coastguard Worker    case IR3_FILE_FULL:
436*61046927SAndroid Build Coastguard Worker       assert(offset + size <= ARRAY_SIZE(state->regs));
437*61046927SAndroid Build Coastguard Worker       regs = state->regs;
438*61046927SAndroid Build Coastguard Worker       dst_n_ptr = state->dst_n;
439*61046927SAndroid Build Coastguard Worker       break;
440*61046927SAndroid Build Coastguard Worker    case IR3_FILE_HALF:
441*61046927SAndroid Build Coastguard Worker       assert(offset + 1 <= ARRAY_SIZE(state->half_regs));
442*61046927SAndroid Build Coastguard Worker       regs = state->half_regs;
443*61046927SAndroid Build Coastguard Worker       dst_n_ptr = state->half_dst_n;
444*61046927SAndroid Build Coastguard Worker       break;
445*61046927SAndroid Build Coastguard Worker    case IR3_FILE_SHARED:
446*61046927SAndroid Build Coastguard Worker       assert(offset + size <= ARRAY_SIZE(state->shared_regs));
447*61046927SAndroid Build Coastguard Worker       regs = state->shared_regs;
448*61046927SAndroid Build Coastguard Worker       dst_n_ptr = state->shared_dst_n;
449*61046927SAndroid Build Coastguard Worker       break;
450*61046927SAndroid Build Coastguard Worker    case IR3_FILE_NONGPR:
451*61046927SAndroid Build Coastguard Worker       assert(offset + size <= ARRAY_SIZE(state->nongpr_regs));
452*61046927SAndroid Build Coastguard Worker       regs = state->nongpr_regs;
453*61046927SAndroid Build Coastguard Worker       dst_n_ptr = state->nongpr_dst_n;
454*61046927SAndroid Build Coastguard Worker       break;
455*61046927SAndroid Build Coastguard Worker    }
456*61046927SAndroid Build Coastguard Worker 
457*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < size; i++)
458*61046927SAndroid Build Coastguard Worker       add_single_reg_dep(state, node, &regs[offset + i], &dst_n_ptr[offset + i], num, src_n, dst_n);
459*61046927SAndroid Build Coastguard Worker }
460*61046927SAndroid Build Coastguard Worker 
461*61046927SAndroid Build Coastguard Worker static void
calculate_deps(struct ir3_postsched_deps_state * state,struct ir3_postsched_node * node)462*61046927SAndroid Build Coastguard Worker calculate_deps(struct ir3_postsched_deps_state *state,
463*61046927SAndroid Build Coastguard Worker                struct ir3_postsched_node *node)
464*61046927SAndroid Build Coastguard Worker {
465*61046927SAndroid Build Coastguard Worker    /* Add dependencies on instructions that previously (or next,
466*61046927SAndroid Build Coastguard Worker     * in the reverse direction) wrote any of our src registers:
467*61046927SAndroid Build Coastguard Worker     */
468*61046927SAndroid Build Coastguard Worker    foreach_src_n (reg, i, node->instr) {
469*61046927SAndroid Build Coastguard Worker       if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
470*61046927SAndroid Build Coastguard Worker          continue;
471*61046927SAndroid Build Coastguard Worker 
472*61046927SAndroid Build Coastguard Worker       if (reg->flags & IR3_REG_RELATIV) {
473*61046927SAndroid Build Coastguard Worker          /* mark entire array as read: */
474*61046927SAndroid Build Coastguard Worker          for (unsigned j = 0; j < reg->size; j++) {
475*61046927SAndroid Build Coastguard Worker             add_reg_dep(state, node, reg, reg->array.base + j, i, -1);
476*61046927SAndroid Build Coastguard Worker          }
477*61046927SAndroid Build Coastguard Worker       } else {
478*61046927SAndroid Build Coastguard Worker          assert(reg->wrmask >= 1);
479*61046927SAndroid Build Coastguard Worker          u_foreach_bit (b, reg->wrmask) {
480*61046927SAndroid Build Coastguard Worker             add_reg_dep(state, node, reg, reg->num + b, i, -1);
481*61046927SAndroid Build Coastguard Worker          }
482*61046927SAndroid Build Coastguard Worker       }
483*61046927SAndroid Build Coastguard Worker    }
484*61046927SAndroid Build Coastguard Worker 
485*61046927SAndroid Build Coastguard Worker    /* And then after we update the state for what this instruction
486*61046927SAndroid Build Coastguard Worker     * wrote:
487*61046927SAndroid Build Coastguard Worker     */
488*61046927SAndroid Build Coastguard Worker    foreach_dst_n (reg, i, node->instr) {
489*61046927SAndroid Build Coastguard Worker       if (reg->wrmask == 0)
490*61046927SAndroid Build Coastguard Worker          continue;
491*61046927SAndroid Build Coastguard Worker       if (reg->flags & IR3_REG_RELATIV) {
492*61046927SAndroid Build Coastguard Worker          /* mark the entire array as written: */
493*61046927SAndroid Build Coastguard Worker          for (unsigned j = 0; j < reg->size; j++) {
494*61046927SAndroid Build Coastguard Worker             add_reg_dep(state, node, reg, reg->array.base + j, -1, i);
495*61046927SAndroid Build Coastguard Worker          }
496*61046927SAndroid Build Coastguard Worker       } else {
497*61046927SAndroid Build Coastguard Worker          assert(reg->wrmask >= 1);
498*61046927SAndroid Build Coastguard Worker          u_foreach_bit (b, reg->wrmask) {
499*61046927SAndroid Build Coastguard Worker             add_reg_dep(state, node, reg, reg->num + b, -1, i);
500*61046927SAndroid Build Coastguard Worker          }
501*61046927SAndroid Build Coastguard Worker       }
502*61046927SAndroid Build Coastguard Worker    }
503*61046927SAndroid Build Coastguard Worker }
504*61046927SAndroid Build Coastguard Worker 
505*61046927SAndroid Build Coastguard Worker static void
calculate_forward_deps(struct ir3_postsched_ctx * ctx)506*61046927SAndroid Build Coastguard Worker calculate_forward_deps(struct ir3_postsched_ctx *ctx)
507*61046927SAndroid Build Coastguard Worker {
508*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_deps_state state = {
509*61046927SAndroid Build Coastguard Worker       .ctx = ctx,
510*61046927SAndroid Build Coastguard Worker       .direction = F,
511*61046927SAndroid Build Coastguard Worker       .merged = ctx->v->mergedregs,
512*61046927SAndroid Build Coastguard Worker    };
513*61046927SAndroid Build Coastguard Worker 
514*61046927SAndroid Build Coastguard Worker    foreach_instr (instr, &ctx->unscheduled_list) {
515*61046927SAndroid Build Coastguard Worker       calculate_deps(&state, instr->data);
516*61046927SAndroid Build Coastguard Worker    }
517*61046927SAndroid Build Coastguard Worker }
518*61046927SAndroid Build Coastguard Worker 
519*61046927SAndroid Build Coastguard Worker static void
calculate_reverse_deps(struct ir3_postsched_ctx * ctx)520*61046927SAndroid Build Coastguard Worker calculate_reverse_deps(struct ir3_postsched_ctx *ctx)
521*61046927SAndroid Build Coastguard Worker {
522*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_deps_state state = {
523*61046927SAndroid Build Coastguard Worker       .ctx = ctx,
524*61046927SAndroid Build Coastguard Worker       .direction = R,
525*61046927SAndroid Build Coastguard Worker       .merged = ctx->v->mergedregs,
526*61046927SAndroid Build Coastguard Worker    };
527*61046927SAndroid Build Coastguard Worker 
528*61046927SAndroid Build Coastguard Worker    foreach_instr_rev (instr, &ctx->unscheduled_list) {
529*61046927SAndroid Build Coastguard Worker       calculate_deps(&state, instr->data);
530*61046927SAndroid Build Coastguard Worker    }
531*61046927SAndroid Build Coastguard Worker }
532*61046927SAndroid Build Coastguard Worker 
533*61046927SAndroid Build Coastguard Worker static void
sched_node_init(struct ir3_postsched_ctx * ctx,struct ir3_instruction * instr)534*61046927SAndroid Build Coastguard Worker sched_node_init(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
535*61046927SAndroid Build Coastguard Worker {
536*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *n =
537*61046927SAndroid Build Coastguard Worker       rzalloc(ctx->mem_ctx, struct ir3_postsched_node);
538*61046927SAndroid Build Coastguard Worker 
539*61046927SAndroid Build Coastguard Worker    dag_init_node(ctx->dag, &n->dag);
540*61046927SAndroid Build Coastguard Worker 
541*61046927SAndroid Build Coastguard Worker    n->instr = instr;
542*61046927SAndroid Build Coastguard Worker    instr->data = n;
543*61046927SAndroid Build Coastguard Worker }
544*61046927SAndroid Build Coastguard Worker 
545*61046927SAndroid Build Coastguard Worker static void
sched_dag_max_delay_cb(struct dag_node * node,void * state)546*61046927SAndroid Build Coastguard Worker sched_dag_max_delay_cb(struct dag_node *node, void *state)
547*61046927SAndroid Build Coastguard Worker {
548*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_node *n = (struct ir3_postsched_node *)node;
549*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_ctx *ctx = state;
550*61046927SAndroid Build Coastguard Worker    uint32_t max_delay = 0;
551*61046927SAndroid Build Coastguard Worker 
552*61046927SAndroid Build Coastguard Worker    util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
553*61046927SAndroid Build Coastguard Worker       struct ir3_postsched_node *child =
554*61046927SAndroid Build Coastguard Worker          (struct ir3_postsched_node *)edge->child;
555*61046927SAndroid Build Coastguard Worker       unsigned delay = edge->data;
556*61046927SAndroid Build Coastguard Worker       unsigned sy_delay = 0;
557*61046927SAndroid Build Coastguard Worker       unsigned ss_delay = 0;
558*61046927SAndroid Build Coastguard Worker 
559*61046927SAndroid Build Coastguard Worker       if (child->has_sy_src && is_sy_producer(n->instr)) {
560*61046927SAndroid Build Coastguard Worker          sy_delay = soft_sy_delay(n->instr, ctx->block->shader);
561*61046927SAndroid Build Coastguard Worker       }
562*61046927SAndroid Build Coastguard Worker 
563*61046927SAndroid Build Coastguard Worker       if (child->has_ss_src &&
564*61046927SAndroid Build Coastguard Worker           needs_ss(ctx->v->compiler, n->instr, child->instr)) {
565*61046927SAndroid Build Coastguard Worker          ss_delay = soft_ss_delay(n->instr);
566*61046927SAndroid Build Coastguard Worker       }
567*61046927SAndroid Build Coastguard Worker 
568*61046927SAndroid Build Coastguard Worker       delay = MAX3(delay, sy_delay, ss_delay);
569*61046927SAndroid Build Coastguard Worker       max_delay = MAX2(child->max_delay + delay, max_delay);
570*61046927SAndroid Build Coastguard Worker    }
571*61046927SAndroid Build Coastguard Worker 
572*61046927SAndroid Build Coastguard Worker    n->max_delay = MAX2(n->max_delay, max_delay);
573*61046927SAndroid Build Coastguard Worker }
574*61046927SAndroid Build Coastguard Worker 
575*61046927SAndroid Build Coastguard Worker static void
sched_dag_init(struct ir3_postsched_ctx * ctx)576*61046927SAndroid Build Coastguard Worker sched_dag_init(struct ir3_postsched_ctx *ctx)
577*61046927SAndroid Build Coastguard Worker {
578*61046927SAndroid Build Coastguard Worker    ctx->mem_ctx = ralloc_context(NULL);
579*61046927SAndroid Build Coastguard Worker 
580*61046927SAndroid Build Coastguard Worker    ctx->dag = dag_create(ctx->mem_ctx);
581*61046927SAndroid Build Coastguard Worker 
582*61046927SAndroid Build Coastguard Worker    foreach_instr (instr, &ctx->unscheduled_list)
583*61046927SAndroid Build Coastguard Worker       sched_node_init(ctx, instr);
584*61046927SAndroid Build Coastguard Worker 
585*61046927SAndroid Build Coastguard Worker    calculate_forward_deps(ctx);
586*61046927SAndroid Build Coastguard Worker    calculate_reverse_deps(ctx);
587*61046927SAndroid Build Coastguard Worker 
588*61046927SAndroid Build Coastguard Worker    /*
589*61046927SAndroid Build Coastguard Worker     * To avoid expensive texture fetches, etc, from being moved ahead
590*61046927SAndroid Build Coastguard Worker     * of kills, track the kills we've seen so far, so we can add an
591*61046927SAndroid Build Coastguard Worker     * extra dependency on them for tex/mem instructions
592*61046927SAndroid Build Coastguard Worker     */
593*61046927SAndroid Build Coastguard Worker    struct util_dynarray kills;
594*61046927SAndroid Build Coastguard Worker    util_dynarray_init(&kills, ctx->mem_ctx);
595*61046927SAndroid Build Coastguard Worker 
596*61046927SAndroid Build Coastguard Worker    /* The last bary.f with the (ei) flag must be scheduled before any kills,
597*61046927SAndroid Build Coastguard Worker     * or the hw gets angry. Keep track of inputs here so we can add the
598*61046927SAndroid Build Coastguard Worker     * false dep on the kill instruction.
599*61046927SAndroid Build Coastguard Worker     */
600*61046927SAndroid Build Coastguard Worker    struct util_dynarray inputs;
601*61046927SAndroid Build Coastguard Worker    util_dynarray_init(&inputs, ctx->mem_ctx);
602*61046927SAndroid Build Coastguard Worker 
603*61046927SAndroid Build Coastguard Worker    /*
604*61046927SAndroid Build Coastguard Worker     * Normal srcs won't be in SSA at this point, those are dealt with in
605*61046927SAndroid Build Coastguard Worker     * calculate_forward_deps() and calculate_reverse_deps().  But we still
606*61046927SAndroid Build Coastguard Worker     * have the false-dep information in SSA form, so go ahead and add
607*61046927SAndroid Build Coastguard Worker     * dependencies for that here:
608*61046927SAndroid Build Coastguard Worker     */
609*61046927SAndroid Build Coastguard Worker    foreach_instr (instr, &ctx->unscheduled_list) {
610*61046927SAndroid Build Coastguard Worker       struct ir3_postsched_node *n = instr->data;
611*61046927SAndroid Build Coastguard Worker 
612*61046927SAndroid Build Coastguard Worker       foreach_ssa_src_n (src, i, instr) {
613*61046927SAndroid Build Coastguard Worker          if (src->block != instr->block)
614*61046927SAndroid Build Coastguard Worker             continue;
615*61046927SAndroid Build Coastguard Worker 
616*61046927SAndroid Build Coastguard Worker          /* we can end up with unused false-deps.. just skip them: */
617*61046927SAndroid Build Coastguard Worker          if (src->flags & IR3_INSTR_UNUSED)
618*61046927SAndroid Build Coastguard Worker             continue;
619*61046927SAndroid Build Coastguard Worker 
620*61046927SAndroid Build Coastguard Worker          struct ir3_postsched_node *sn = src->data;
621*61046927SAndroid Build Coastguard Worker 
622*61046927SAndroid Build Coastguard Worker          /* don't consider dependencies in other blocks: */
623*61046927SAndroid Build Coastguard Worker          if (src->block != instr->block)
624*61046927SAndroid Build Coastguard Worker             continue;
625*61046927SAndroid Build Coastguard Worker 
626*61046927SAndroid Build Coastguard Worker          dag_add_edge_max_data(&sn->dag, &n->dag, 0);
627*61046927SAndroid Build Coastguard Worker       }
628*61046927SAndroid Build Coastguard Worker 
629*61046927SAndroid Build Coastguard Worker       if (is_input(instr)) {
630*61046927SAndroid Build Coastguard Worker          util_dynarray_append(&inputs, struct ir3_instruction *, instr);
631*61046927SAndroid Build Coastguard Worker       } else if (is_kill_or_demote(instr)) {
632*61046927SAndroid Build Coastguard Worker          util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) {
633*61046927SAndroid Build Coastguard Worker             struct ir3_instruction *input = *instrp;
634*61046927SAndroid Build Coastguard Worker             struct ir3_postsched_node *in = input->data;
635*61046927SAndroid Build Coastguard Worker             dag_add_edge_max_data(&in->dag, &n->dag, 0);
636*61046927SAndroid Build Coastguard Worker          }
637*61046927SAndroid Build Coastguard Worker          util_dynarray_append(&kills, struct ir3_instruction *, instr);
638*61046927SAndroid Build Coastguard Worker       } else if (is_tex(instr) || is_mem(instr)) {
639*61046927SAndroid Build Coastguard Worker          util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) {
640*61046927SAndroid Build Coastguard Worker             struct ir3_instruction *kill = *instrp;
641*61046927SAndroid Build Coastguard Worker             struct ir3_postsched_node *kn = kill->data;
642*61046927SAndroid Build Coastguard Worker             dag_add_edge_max_data(&kn->dag, &n->dag, 0);
643*61046927SAndroid Build Coastguard Worker          }
644*61046927SAndroid Build Coastguard Worker       }
645*61046927SAndroid Build Coastguard Worker    }
646*61046927SAndroid Build Coastguard Worker 
647*61046927SAndroid Build Coastguard Worker #ifndef NDEBUG
648*61046927SAndroid Build Coastguard Worker    dag_validate(ctx->dag, sched_dag_validate_cb, NULL);
649*61046927SAndroid Build Coastguard Worker #endif
650*61046927SAndroid Build Coastguard Worker 
651*61046927SAndroid Build Coastguard Worker    // TODO do we want to do this after reverse-dependencies?
652*61046927SAndroid Build Coastguard Worker    dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, ctx);
653*61046927SAndroid Build Coastguard Worker }
654*61046927SAndroid Build Coastguard Worker 
655*61046927SAndroid Build Coastguard Worker static void
sched_dag_destroy(struct ir3_postsched_ctx * ctx)656*61046927SAndroid Build Coastguard Worker sched_dag_destroy(struct ir3_postsched_ctx *ctx)
657*61046927SAndroid Build Coastguard Worker {
658*61046927SAndroid Build Coastguard Worker    ralloc_free(ctx->mem_ctx);
659*61046927SAndroid Build Coastguard Worker    ctx->mem_ctx = NULL;
660*61046927SAndroid Build Coastguard Worker    ctx->dag = NULL;
661*61046927SAndroid Build Coastguard Worker }
662*61046927SAndroid Build Coastguard Worker 
663*61046927SAndroid Build Coastguard Worker static void
sched_block(struct ir3_postsched_ctx * ctx,struct ir3_block * block)664*61046927SAndroid Build Coastguard Worker sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
665*61046927SAndroid Build Coastguard Worker {
666*61046927SAndroid Build Coastguard Worker    ctx->block = block;
667*61046927SAndroid Build Coastguard Worker    ctx->sy_delay = 0;
668*61046927SAndroid Build Coastguard Worker    ctx->ss_delay = 0;
669*61046927SAndroid Build Coastguard Worker 
670*61046927SAndroid Build Coastguard Worker    /* The terminator has to stay at the end. Instead of trying to set up
671*61046927SAndroid Build Coastguard Worker     * dependencies to achieve this, it's easier to just remove it now and add it
672*61046927SAndroid Build Coastguard Worker     * back after scheduling.
673*61046927SAndroid Build Coastguard Worker     */
674*61046927SAndroid Build Coastguard Worker    struct ir3_instruction *terminator = ir3_block_take_terminator(block);
675*61046927SAndroid Build Coastguard Worker 
676*61046927SAndroid Build Coastguard Worker    /* move all instructions to the unscheduled list, and
677*61046927SAndroid Build Coastguard Worker     * empty the block's instruction list (to which we will
678*61046927SAndroid Build Coastguard Worker     * be inserting).
679*61046927SAndroid Build Coastguard Worker     */
680*61046927SAndroid Build Coastguard Worker    list_replace(&block->instr_list, &ctx->unscheduled_list);
681*61046927SAndroid Build Coastguard Worker    list_inithead(&block->instr_list);
682*61046927SAndroid Build Coastguard Worker 
683*61046927SAndroid Build Coastguard Worker    // TODO once we are using post-sched for everything we can
684*61046927SAndroid Build Coastguard Worker    // just not stick in NOP's prior to post-sched, and drop this.
685*61046927SAndroid Build Coastguard Worker    // for now keep this, since it makes post-sched optional:
686*61046927SAndroid Build Coastguard Worker    foreach_instr_safe (instr, &ctx->unscheduled_list) {
687*61046927SAndroid Build Coastguard Worker       switch (instr->opc) {
688*61046927SAndroid Build Coastguard Worker       case OPC_NOP:
689*61046927SAndroid Build Coastguard Worker          list_delinit(&instr->node);
690*61046927SAndroid Build Coastguard Worker          break;
691*61046927SAndroid Build Coastguard Worker       default:
692*61046927SAndroid Build Coastguard Worker          break;
693*61046927SAndroid Build Coastguard Worker       }
694*61046927SAndroid Build Coastguard Worker    }
695*61046927SAndroid Build Coastguard Worker 
696*61046927SAndroid Build Coastguard Worker    sched_dag_init(ctx);
697*61046927SAndroid Build Coastguard Worker 
698*61046927SAndroid Build Coastguard Worker    /* First schedule all meta:input instructions, followed by
699*61046927SAndroid Build Coastguard Worker     * tex-prefetch.  We want all of the instructions that load
700*61046927SAndroid Build Coastguard Worker     * values into registers before the shader starts to go
701*61046927SAndroid Build Coastguard Worker     * before any other instructions.  But in particular we
702*61046927SAndroid Build Coastguard Worker     * want inputs to come before prefetches.  This is because
703*61046927SAndroid Build Coastguard Worker     * a FS's bary_ij input may not actually be live in the
704*61046927SAndroid Build Coastguard Worker     * shader, but it should not be scheduled on top of any
705*61046927SAndroid Build Coastguard Worker     * other input (but can be overwritten by a tex prefetch)
706*61046927SAndroid Build Coastguard Worker     */
707*61046927SAndroid Build Coastguard Worker    foreach_instr_safe (instr, &ctx->unscheduled_list)
708*61046927SAndroid Build Coastguard Worker       if (instr->opc == OPC_META_INPUT)
709*61046927SAndroid Build Coastguard Worker          schedule(ctx, instr);
710*61046927SAndroid Build Coastguard Worker 
711*61046927SAndroid Build Coastguard Worker    foreach_instr_safe (instr, &ctx->unscheduled_list)
712*61046927SAndroid Build Coastguard Worker       if (instr->opc == OPC_META_TEX_PREFETCH)
713*61046927SAndroid Build Coastguard Worker          schedule(ctx, instr);
714*61046927SAndroid Build Coastguard Worker 
715*61046927SAndroid Build Coastguard Worker    foreach_instr_safe (instr, &ctx->unscheduled_list)
716*61046927SAndroid Build Coastguard Worker       if (instr->opc == OPC_PUSH_CONSTS_LOAD_MACRO)
717*61046927SAndroid Build Coastguard Worker          schedule(ctx, instr);
718*61046927SAndroid Build Coastguard Worker 
719*61046927SAndroid Build Coastguard Worker    while (!list_is_empty(&ctx->unscheduled_list)) {
720*61046927SAndroid Build Coastguard Worker       struct ir3_instruction *instr = choose_instr(ctx);
721*61046927SAndroid Build Coastguard Worker 
722*61046927SAndroid Build Coastguard Worker       unsigned delay = node_delay(ctx, instr->data);
723*61046927SAndroid Build Coastguard Worker       d("delay=%u", delay);
724*61046927SAndroid Build Coastguard Worker 
725*61046927SAndroid Build Coastguard Worker       assert(delay <= 6);
726*61046927SAndroid Build Coastguard Worker 
727*61046927SAndroid Build Coastguard Worker       schedule(ctx, instr);
728*61046927SAndroid Build Coastguard Worker    }
729*61046927SAndroid Build Coastguard Worker 
730*61046927SAndroid Build Coastguard Worker    sched_dag_destroy(ctx);
731*61046927SAndroid Build Coastguard Worker 
732*61046927SAndroid Build Coastguard Worker    if (terminator)
733*61046927SAndroid Build Coastguard Worker       list_addtail(&terminator->node, &block->instr_list);
734*61046927SAndroid Build Coastguard Worker }
735*61046927SAndroid Build Coastguard Worker 
736*61046927SAndroid Build Coastguard Worker static bool
is_self_mov(struct ir3_instruction * instr)737*61046927SAndroid Build Coastguard Worker is_self_mov(struct ir3_instruction *instr)
738*61046927SAndroid Build Coastguard Worker {
739*61046927SAndroid Build Coastguard Worker    if (!is_same_type_mov(instr))
740*61046927SAndroid Build Coastguard Worker       return false;
741*61046927SAndroid Build Coastguard Worker 
742*61046927SAndroid Build Coastguard Worker    if (instr->dsts[0]->num != instr->srcs[0]->num)
743*61046927SAndroid Build Coastguard Worker       return false;
744*61046927SAndroid Build Coastguard Worker 
745*61046927SAndroid Build Coastguard Worker    if (instr->dsts[0]->flags & IR3_REG_RELATIV)
746*61046927SAndroid Build Coastguard Worker       return false;
747*61046927SAndroid Build Coastguard Worker 
748*61046927SAndroid Build Coastguard Worker    if (instr->cat1.round != ROUND_ZERO)
749*61046927SAndroid Build Coastguard Worker       return false;
750*61046927SAndroid Build Coastguard Worker 
751*61046927SAndroid Build Coastguard Worker    if (instr->srcs[0]->flags &
752*61046927SAndroid Build Coastguard Worker        (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_FNEG |
753*61046927SAndroid Build Coastguard Worker         IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))
754*61046927SAndroid Build Coastguard Worker       return false;
755*61046927SAndroid Build Coastguard Worker 
756*61046927SAndroid Build Coastguard Worker    return true;
757*61046927SAndroid Build Coastguard Worker }
758*61046927SAndroid Build Coastguard Worker 
759*61046927SAndroid Build Coastguard Worker /* sometimes we end up w/ in-place mov's, ie. mov.u32u32 r1.y, r1.y
760*61046927SAndroid Build Coastguard Worker  * as a result of places were before RA we are not sure that it is
761*61046927SAndroid Build Coastguard Worker  * safe to eliminate.  We could eliminate these earlier, but sometimes
762*61046927SAndroid Build Coastguard Worker  * they are tangled up in false-dep's, etc, so it is easier just to
763*61046927SAndroid Build Coastguard Worker  * let them exist until after RA
764*61046927SAndroid Build Coastguard Worker  */
765*61046927SAndroid Build Coastguard Worker static void
cleanup_self_movs(struct ir3 * ir)766*61046927SAndroid Build Coastguard Worker cleanup_self_movs(struct ir3 *ir)
767*61046927SAndroid Build Coastguard Worker {
768*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
769*61046927SAndroid Build Coastguard Worker       foreach_instr_safe (instr, &block->instr_list) {
770*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < instr->deps_count; i++) {
771*61046927SAndroid Build Coastguard Worker             if (instr->deps[i] && is_self_mov(instr->deps[i])) {
772*61046927SAndroid Build Coastguard Worker                instr->deps[i] = NULL;
773*61046927SAndroid Build Coastguard Worker             }
774*61046927SAndroid Build Coastguard Worker          }
775*61046927SAndroid Build Coastguard Worker 
776*61046927SAndroid Build Coastguard Worker          if (is_self_mov(instr))
777*61046927SAndroid Build Coastguard Worker             list_delinit(&instr->node);
778*61046927SAndroid Build Coastguard Worker       }
779*61046927SAndroid Build Coastguard Worker    }
780*61046927SAndroid Build Coastguard Worker }
781*61046927SAndroid Build Coastguard Worker 
782*61046927SAndroid Build Coastguard Worker bool
ir3_postsched(struct ir3 * ir,struct ir3_shader_variant * v)783*61046927SAndroid Build Coastguard Worker ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v)
784*61046927SAndroid Build Coastguard Worker {
785*61046927SAndroid Build Coastguard Worker    struct ir3_postsched_ctx ctx = {
786*61046927SAndroid Build Coastguard Worker       .ir = ir,
787*61046927SAndroid Build Coastguard Worker       .v = v,
788*61046927SAndroid Build Coastguard Worker    };
789*61046927SAndroid Build Coastguard Worker 
790*61046927SAndroid Build Coastguard Worker    cleanup_self_movs(ir);
791*61046927SAndroid Build Coastguard Worker 
792*61046927SAndroid Build Coastguard Worker    foreach_block (block, &ir->block_list) {
793*61046927SAndroid Build Coastguard Worker       sched_block(&ctx, block);
794*61046927SAndroid Build Coastguard Worker    }
795*61046927SAndroid Build Coastguard Worker 
796*61046927SAndroid Build Coastguard Worker    return true;
797*61046927SAndroid Build Coastguard Worker }
798