1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2019 Google, Inc.
3*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker *
5*61046927SAndroid Build Coastguard Worker * Authors:
6*61046927SAndroid Build Coastguard Worker * Rob Clark <[email protected]>
7*61046927SAndroid Build Coastguard Worker */
8*61046927SAndroid Build Coastguard Worker
9*61046927SAndroid Build Coastguard Worker #include "util/dag.h"
10*61046927SAndroid Build Coastguard Worker #include "util/u_math.h"
11*61046927SAndroid Build Coastguard Worker
12*61046927SAndroid Build Coastguard Worker #include "ir3.h"
13*61046927SAndroid Build Coastguard Worker #include "ir3_compiler.h"
14*61046927SAndroid Build Coastguard Worker #include "ir3_context.h"
15*61046927SAndroid Build Coastguard Worker
16*61046927SAndroid Build Coastguard Worker #if MESA_DEBUG
17*61046927SAndroid Build Coastguard Worker #define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
18*61046927SAndroid Build Coastguard Worker #else
19*61046927SAndroid Build Coastguard Worker #define SCHED_DEBUG 0
20*61046927SAndroid Build Coastguard Worker #endif
21*61046927SAndroid Build Coastguard Worker #define d(fmt, ...) \
22*61046927SAndroid Build Coastguard Worker do { \
23*61046927SAndroid Build Coastguard Worker if (SCHED_DEBUG) { \
24*61046927SAndroid Build Coastguard Worker mesa_logi("PSCHED: " fmt, ##__VA_ARGS__); \
25*61046927SAndroid Build Coastguard Worker } \
26*61046927SAndroid Build Coastguard Worker } while (0)
27*61046927SAndroid Build Coastguard Worker
28*61046927SAndroid Build Coastguard Worker #define di(instr, fmt, ...) \
29*61046927SAndroid Build Coastguard Worker do { \
30*61046927SAndroid Build Coastguard Worker if (SCHED_DEBUG) { \
31*61046927SAndroid Build Coastguard Worker struct log_stream *stream = mesa_log_streami(); \
32*61046927SAndroid Build Coastguard Worker mesa_log_stream_printf(stream, "PSCHED: " fmt ": ", ##__VA_ARGS__); \
33*61046927SAndroid Build Coastguard Worker ir3_print_instr_stream(stream, instr); \
34*61046927SAndroid Build Coastguard Worker mesa_log_stream_destroy(stream); \
35*61046927SAndroid Build Coastguard Worker } \
36*61046927SAndroid Build Coastguard Worker } while (0)
37*61046927SAndroid Build Coastguard Worker
38*61046927SAndroid Build Coastguard Worker #define SCHED_DEBUG_DUMP_DEPTH 1
39*61046927SAndroid Build Coastguard Worker
40*61046927SAndroid Build Coastguard Worker /*
41*61046927SAndroid Build Coastguard Worker * Post RA Instruction Scheduling
42*61046927SAndroid Build Coastguard Worker */
43*61046927SAndroid Build Coastguard Worker
44*61046927SAndroid Build Coastguard Worker struct ir3_postsched_ctx {
45*61046927SAndroid Build Coastguard Worker struct ir3 *ir;
46*61046927SAndroid Build Coastguard Worker
47*61046927SAndroid Build Coastguard Worker struct ir3_shader_variant *v;
48*61046927SAndroid Build Coastguard Worker
49*61046927SAndroid Build Coastguard Worker void *mem_ctx;
50*61046927SAndroid Build Coastguard Worker struct ir3_block *block; /* the current block */
51*61046927SAndroid Build Coastguard Worker struct dag *dag;
52*61046927SAndroid Build Coastguard Worker
53*61046927SAndroid Build Coastguard Worker struct list_head unscheduled_list; /* unscheduled instructions */
54*61046927SAndroid Build Coastguard Worker
55*61046927SAndroid Build Coastguard Worker unsigned ip;
56*61046927SAndroid Build Coastguard Worker
57*61046927SAndroid Build Coastguard Worker int ss_delay;
58*61046927SAndroid Build Coastguard Worker int sy_delay;
59*61046927SAndroid Build Coastguard Worker };
60*61046927SAndroid Build Coastguard Worker
61*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node {
62*61046927SAndroid Build Coastguard Worker struct dag_node dag; /* must be first for util_dynarray_foreach */
63*61046927SAndroid Build Coastguard Worker struct ir3_instruction *instr;
64*61046927SAndroid Build Coastguard Worker bool partially_evaluated_path;
65*61046927SAndroid Build Coastguard Worker
66*61046927SAndroid Build Coastguard Worker unsigned earliest_ip;
67*61046927SAndroid Build Coastguard Worker
68*61046927SAndroid Build Coastguard Worker bool has_sy_src, has_ss_src;
69*61046927SAndroid Build Coastguard Worker
70*61046927SAndroid Build Coastguard Worker unsigned max_delay;
71*61046927SAndroid Build Coastguard Worker };
72*61046927SAndroid Build Coastguard Worker
73*61046927SAndroid Build Coastguard Worker #define foreach_sched_node(__n, __list) \
74*61046927SAndroid Build Coastguard Worker list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link)
75*61046927SAndroid Build Coastguard Worker
76*61046927SAndroid Build Coastguard Worker static bool
has_sy_src(struct ir3_instruction * instr)77*61046927SAndroid Build Coastguard Worker has_sy_src(struct ir3_instruction *instr)
78*61046927SAndroid Build Coastguard Worker {
79*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *node = instr->data;
80*61046927SAndroid Build Coastguard Worker return node->has_sy_src;
81*61046927SAndroid Build Coastguard Worker }
82*61046927SAndroid Build Coastguard Worker
83*61046927SAndroid Build Coastguard Worker static bool
has_ss_src(struct ir3_instruction * instr)84*61046927SAndroid Build Coastguard Worker has_ss_src(struct ir3_instruction *instr)
85*61046927SAndroid Build Coastguard Worker {
86*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *node = instr->data;
87*61046927SAndroid Build Coastguard Worker return node->has_ss_src;
88*61046927SAndroid Build Coastguard Worker }
89*61046927SAndroid Build Coastguard Worker
90*61046927SAndroid Build Coastguard Worker #ifndef NDEBUG
91*61046927SAndroid Build Coastguard Worker static void
sched_dag_validate_cb(const struct dag_node * node,void * data)92*61046927SAndroid Build Coastguard Worker sched_dag_validate_cb(const struct dag_node *node, void *data)
93*61046927SAndroid Build Coastguard Worker {
94*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *n = (struct ir3_postsched_node *)node;
95*61046927SAndroid Build Coastguard Worker
96*61046927SAndroid Build Coastguard Worker ir3_print_instr(n->instr);
97*61046927SAndroid Build Coastguard Worker }
98*61046927SAndroid Build Coastguard Worker #endif
99*61046927SAndroid Build Coastguard Worker
100*61046927SAndroid Build Coastguard Worker static void
schedule(struct ir3_postsched_ctx * ctx,struct ir3_instruction * instr)101*61046927SAndroid Build Coastguard Worker schedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
102*61046927SAndroid Build Coastguard Worker {
103*61046927SAndroid Build Coastguard Worker assert(ctx->block == instr->block);
104*61046927SAndroid Build Coastguard Worker
105*61046927SAndroid Build Coastguard Worker /* remove from unscheduled_list:
106*61046927SAndroid Build Coastguard Worker */
107*61046927SAndroid Build Coastguard Worker list_delinit(&instr->node);
108*61046927SAndroid Build Coastguard Worker
109*61046927SAndroid Build Coastguard Worker di(instr, "schedule");
110*61046927SAndroid Build Coastguard Worker
111*61046927SAndroid Build Coastguard Worker bool counts_for_delay = is_alu(instr) || is_flow(instr);
112*61046927SAndroid Build Coastguard Worker
113*61046927SAndroid Build Coastguard Worker unsigned delay_cycles = counts_for_delay ? 1 + instr->repeat : 0;
114*61046927SAndroid Build Coastguard Worker
115*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *n = instr->data;
116*61046927SAndroid Build Coastguard Worker
117*61046927SAndroid Build Coastguard Worker /* We insert any nop's needed to get to earliest_ip, then advance
118*61046927SAndroid Build Coastguard Worker * delay_cycles by scheduling the instruction.
119*61046927SAndroid Build Coastguard Worker */
120*61046927SAndroid Build Coastguard Worker ctx->ip = MAX2(ctx->ip, n->earliest_ip) + delay_cycles;
121*61046927SAndroid Build Coastguard Worker
122*61046927SAndroid Build Coastguard Worker util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
123*61046927SAndroid Build Coastguard Worker unsigned delay = (unsigned)(uintptr_t)edge->data;
124*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *child =
125*61046927SAndroid Build Coastguard Worker container_of(edge->child, struct ir3_postsched_node, dag);
126*61046927SAndroid Build Coastguard Worker child->earliest_ip = MAX2(child->earliest_ip, ctx->ip + delay);
127*61046927SAndroid Build Coastguard Worker }
128*61046927SAndroid Build Coastguard Worker
129*61046927SAndroid Build Coastguard Worker list_addtail(&instr->node, &instr->block->instr_list);
130*61046927SAndroid Build Coastguard Worker
131*61046927SAndroid Build Coastguard Worker dag_prune_head(ctx->dag, &n->dag);
132*61046927SAndroid Build Coastguard Worker
133*61046927SAndroid Build Coastguard Worker if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH))
134*61046927SAndroid Build Coastguard Worker return;
135*61046927SAndroid Build Coastguard Worker
136*61046927SAndroid Build Coastguard Worker if (is_ss_producer(instr)) {
137*61046927SAndroid Build Coastguard Worker ctx->ss_delay = soft_ss_delay(instr);
138*61046927SAndroid Build Coastguard Worker } else if (has_ss_src(instr)) {
139*61046927SAndroid Build Coastguard Worker ctx->ss_delay = 0;
140*61046927SAndroid Build Coastguard Worker } else if (ctx->ss_delay > 0) {
141*61046927SAndroid Build Coastguard Worker ctx->ss_delay--;
142*61046927SAndroid Build Coastguard Worker }
143*61046927SAndroid Build Coastguard Worker
144*61046927SAndroid Build Coastguard Worker if (is_sy_producer(instr)) {
145*61046927SAndroid Build Coastguard Worker ctx->sy_delay = soft_sy_delay(instr, ctx->block->shader);
146*61046927SAndroid Build Coastguard Worker } else if (has_sy_src(instr)) {
147*61046927SAndroid Build Coastguard Worker ctx->sy_delay = 0;
148*61046927SAndroid Build Coastguard Worker } else if (ctx->sy_delay > 0) {
149*61046927SAndroid Build Coastguard Worker ctx->sy_delay--;
150*61046927SAndroid Build Coastguard Worker }
151*61046927SAndroid Build Coastguard Worker }
152*61046927SAndroid Build Coastguard Worker
153*61046927SAndroid Build Coastguard Worker static unsigned
node_delay(struct ir3_postsched_ctx * ctx,struct ir3_postsched_node * n)154*61046927SAndroid Build Coastguard Worker node_delay(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n)
155*61046927SAndroid Build Coastguard Worker {
156*61046927SAndroid Build Coastguard Worker return MAX2(n->earliest_ip, ctx->ip) - ctx->ip;
157*61046927SAndroid Build Coastguard Worker }
158*61046927SAndroid Build Coastguard Worker
159*61046927SAndroid Build Coastguard Worker static unsigned
node_delay_soft(struct ir3_postsched_ctx * ctx,struct ir3_postsched_node * n)160*61046927SAndroid Build Coastguard Worker node_delay_soft(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n)
161*61046927SAndroid Build Coastguard Worker {
162*61046927SAndroid Build Coastguard Worker unsigned delay = node_delay(ctx, n);
163*61046927SAndroid Build Coastguard Worker
164*61046927SAndroid Build Coastguard Worker /* This takes into account that as when we schedule multiple tex or sfu, the
165*61046927SAndroid Build Coastguard Worker * first user has to wait for all of them to complete.
166*61046927SAndroid Build Coastguard Worker */
167*61046927SAndroid Build Coastguard Worker if (n->has_ss_src)
168*61046927SAndroid Build Coastguard Worker delay = MAX2(delay, ctx->ss_delay);
169*61046927SAndroid Build Coastguard Worker if (n->has_sy_src)
170*61046927SAndroid Build Coastguard Worker delay = MAX2(delay, ctx->sy_delay);
171*61046927SAndroid Build Coastguard Worker
172*61046927SAndroid Build Coastguard Worker return delay;
173*61046927SAndroid Build Coastguard Worker }
174*61046927SAndroid Build Coastguard Worker
175*61046927SAndroid Build Coastguard Worker static void
dump_node(struct ir3_postsched_ctx * ctx,struct ir3_postsched_node * n,int level)176*61046927SAndroid Build Coastguard Worker dump_node(struct ir3_postsched_ctx *ctx, struct ir3_postsched_node *n,
177*61046927SAndroid Build Coastguard Worker int level)
178*61046927SAndroid Build Coastguard Worker {
179*61046927SAndroid Build Coastguard Worker if (level > SCHED_DEBUG_DUMP_DEPTH)
180*61046927SAndroid Build Coastguard Worker return;
181*61046927SAndroid Build Coastguard Worker
182*61046927SAndroid Build Coastguard Worker di(n->instr, "%*s%smaxdel=%d, node_delay=%d,node_delay_soft=%d, %d parents ",
183*61046927SAndroid Build Coastguard Worker level * 2, "", (level > 0 ? "-> " : ""), n->max_delay, node_delay(ctx, n),
184*61046927SAndroid Build Coastguard Worker node_delay_soft(ctx, n), n->dag.parent_count);
185*61046927SAndroid Build Coastguard Worker
186*61046927SAndroid Build Coastguard Worker util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
187*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *child =
188*61046927SAndroid Build Coastguard Worker (struct ir3_postsched_node *)edge->child;
189*61046927SAndroid Build Coastguard Worker
190*61046927SAndroid Build Coastguard Worker dump_node(ctx, child, level + 1);
191*61046927SAndroid Build Coastguard Worker }
192*61046927SAndroid Build Coastguard Worker }
193*61046927SAndroid Build Coastguard Worker
194*61046927SAndroid Build Coastguard Worker static void
dump_state(struct ir3_postsched_ctx * ctx)195*61046927SAndroid Build Coastguard Worker dump_state(struct ir3_postsched_ctx *ctx)
196*61046927SAndroid Build Coastguard Worker {
197*61046927SAndroid Build Coastguard Worker if (!SCHED_DEBUG)
198*61046927SAndroid Build Coastguard Worker return;
199*61046927SAndroid Build Coastguard Worker
200*61046927SAndroid Build Coastguard Worker foreach_sched_node (n, &ctx->dag->heads) {
201*61046927SAndroid Build Coastguard Worker dump_node(ctx, n, 0);
202*61046927SAndroid Build Coastguard Worker }
203*61046927SAndroid Build Coastguard Worker }
204*61046927SAndroid Build Coastguard Worker
205*61046927SAndroid Build Coastguard Worker /* find instruction to schedule: */
206*61046927SAndroid Build Coastguard Worker static struct ir3_instruction *
choose_instr(struct ir3_postsched_ctx * ctx)207*61046927SAndroid Build Coastguard Worker choose_instr(struct ir3_postsched_ctx *ctx)
208*61046927SAndroid Build Coastguard Worker {
209*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *chosen = NULL;
210*61046927SAndroid Build Coastguard Worker
211*61046927SAndroid Build Coastguard Worker dump_state(ctx);
212*61046927SAndroid Build Coastguard Worker
213*61046927SAndroid Build Coastguard Worker foreach_sched_node (n, &ctx->dag->heads) {
214*61046927SAndroid Build Coastguard Worker if (!is_meta(n->instr))
215*61046927SAndroid Build Coastguard Worker continue;
216*61046927SAndroid Build Coastguard Worker
217*61046927SAndroid Build Coastguard Worker if (!chosen || (chosen->max_delay < n->max_delay))
218*61046927SAndroid Build Coastguard Worker chosen = n;
219*61046927SAndroid Build Coastguard Worker }
220*61046927SAndroid Build Coastguard Worker
221*61046927SAndroid Build Coastguard Worker if (chosen) {
222*61046927SAndroid Build Coastguard Worker di(chosen->instr, "prio: chose (meta)");
223*61046927SAndroid Build Coastguard Worker return chosen->instr;
224*61046927SAndroid Build Coastguard Worker }
225*61046927SAndroid Build Coastguard Worker
226*61046927SAndroid Build Coastguard Worker /* Try to schedule inputs with a higher priority, if possible, as
227*61046927SAndroid Build Coastguard Worker * the last bary.f unlocks varying storage to unblock more VS
228*61046927SAndroid Build Coastguard Worker * warps.
229*61046927SAndroid Build Coastguard Worker */
230*61046927SAndroid Build Coastguard Worker foreach_sched_node (n, &ctx->dag->heads) {
231*61046927SAndroid Build Coastguard Worker if (!is_input(n->instr))
232*61046927SAndroid Build Coastguard Worker continue;
233*61046927SAndroid Build Coastguard Worker
234*61046927SAndroid Build Coastguard Worker if (!chosen || (chosen->max_delay < n->max_delay))
235*61046927SAndroid Build Coastguard Worker chosen = n;
236*61046927SAndroid Build Coastguard Worker }
237*61046927SAndroid Build Coastguard Worker
238*61046927SAndroid Build Coastguard Worker if (chosen) {
239*61046927SAndroid Build Coastguard Worker di(chosen->instr, "prio: chose (input)");
240*61046927SAndroid Build Coastguard Worker return chosen->instr;
241*61046927SAndroid Build Coastguard Worker }
242*61046927SAndroid Build Coastguard Worker
243*61046927SAndroid Build Coastguard Worker /* Next prioritize discards: */
244*61046927SAndroid Build Coastguard Worker foreach_sched_node (n, &ctx->dag->heads) {
245*61046927SAndroid Build Coastguard Worker unsigned d = node_delay(ctx, n);
246*61046927SAndroid Build Coastguard Worker
247*61046927SAndroid Build Coastguard Worker if (d > 0)
248*61046927SAndroid Build Coastguard Worker continue;
249*61046927SAndroid Build Coastguard Worker
250*61046927SAndroid Build Coastguard Worker if (!is_kill_or_demote(n->instr))
251*61046927SAndroid Build Coastguard Worker continue;
252*61046927SAndroid Build Coastguard Worker
253*61046927SAndroid Build Coastguard Worker if (!chosen || (chosen->max_delay < n->max_delay))
254*61046927SAndroid Build Coastguard Worker chosen = n;
255*61046927SAndroid Build Coastguard Worker }
256*61046927SAndroid Build Coastguard Worker
257*61046927SAndroid Build Coastguard Worker if (chosen) {
258*61046927SAndroid Build Coastguard Worker di(chosen->instr, "csp: chose (kill, hard ready)");
259*61046927SAndroid Build Coastguard Worker return chosen->instr;
260*61046927SAndroid Build Coastguard Worker }
261*61046927SAndroid Build Coastguard Worker
262*61046927SAndroid Build Coastguard Worker /* Next prioritize expensive instructions: */
263*61046927SAndroid Build Coastguard Worker foreach_sched_node (n, &ctx->dag->heads) {
264*61046927SAndroid Build Coastguard Worker unsigned d = node_delay_soft(ctx, n);
265*61046927SAndroid Build Coastguard Worker
266*61046927SAndroid Build Coastguard Worker if (d > 0)
267*61046927SAndroid Build Coastguard Worker continue;
268*61046927SAndroid Build Coastguard Worker
269*61046927SAndroid Build Coastguard Worker if (!(is_ss_producer(n->instr) || is_sy_producer(n->instr)))
270*61046927SAndroid Build Coastguard Worker continue;
271*61046927SAndroid Build Coastguard Worker
272*61046927SAndroid Build Coastguard Worker if (!chosen || (chosen->max_delay < n->max_delay))
273*61046927SAndroid Build Coastguard Worker chosen = n;
274*61046927SAndroid Build Coastguard Worker }
275*61046927SAndroid Build Coastguard Worker
276*61046927SAndroid Build Coastguard Worker if (chosen) {
277*61046927SAndroid Build Coastguard Worker di(chosen->instr, "csp: chose (sfu/tex, soft ready)");
278*61046927SAndroid Build Coastguard Worker return chosen->instr;
279*61046927SAndroid Build Coastguard Worker }
280*61046927SAndroid Build Coastguard Worker
281*61046927SAndroid Build Coastguard Worker /* Next try to find a ready leader w/ soft delay (ie. including extra
282*61046927SAndroid Build Coastguard Worker * delay for things like tex fetch which can be synchronized w/ sync
283*61046927SAndroid Build Coastguard Worker * bit (but we probably do want to schedule some other instructions
284*61046927SAndroid Build Coastguard Worker * while we wait). We also allow a small amount of nops, to prefer now-nops
285*61046927SAndroid Build Coastguard Worker * over future-nops up to a point, as that gives better results.
286*61046927SAndroid Build Coastguard Worker */
287*61046927SAndroid Build Coastguard Worker unsigned chosen_delay = 0;
288*61046927SAndroid Build Coastguard Worker foreach_sched_node (n, &ctx->dag->heads) {
289*61046927SAndroid Build Coastguard Worker unsigned d = node_delay_soft(ctx, n);
290*61046927SAndroid Build Coastguard Worker
291*61046927SAndroid Build Coastguard Worker if (d > 3)
292*61046927SAndroid Build Coastguard Worker continue;
293*61046927SAndroid Build Coastguard Worker
294*61046927SAndroid Build Coastguard Worker if (!chosen || d < chosen_delay) {
295*61046927SAndroid Build Coastguard Worker chosen = n;
296*61046927SAndroid Build Coastguard Worker chosen_delay = d;
297*61046927SAndroid Build Coastguard Worker continue;
298*61046927SAndroid Build Coastguard Worker }
299*61046927SAndroid Build Coastguard Worker
300*61046927SAndroid Build Coastguard Worker if (d > chosen_delay)
301*61046927SAndroid Build Coastguard Worker continue;
302*61046927SAndroid Build Coastguard Worker
303*61046927SAndroid Build Coastguard Worker if (chosen->max_delay < n->max_delay) {
304*61046927SAndroid Build Coastguard Worker chosen = n;
305*61046927SAndroid Build Coastguard Worker chosen_delay = d;
306*61046927SAndroid Build Coastguard Worker }
307*61046927SAndroid Build Coastguard Worker }
308*61046927SAndroid Build Coastguard Worker
309*61046927SAndroid Build Coastguard Worker if (chosen) {
310*61046927SAndroid Build Coastguard Worker di(chosen->instr, "csp: chose (soft ready)");
311*61046927SAndroid Build Coastguard Worker return chosen->instr;
312*61046927SAndroid Build Coastguard Worker }
313*61046927SAndroid Build Coastguard Worker
314*61046927SAndroid Build Coastguard Worker /* Otherwise choose leader with maximum cost:
315*61046927SAndroid Build Coastguard Worker */
316*61046927SAndroid Build Coastguard Worker foreach_sched_node (n, &ctx->dag->heads) {
317*61046927SAndroid Build Coastguard Worker if (!chosen || chosen->max_delay < n->max_delay)
318*61046927SAndroid Build Coastguard Worker chosen = n;
319*61046927SAndroid Build Coastguard Worker }
320*61046927SAndroid Build Coastguard Worker
321*61046927SAndroid Build Coastguard Worker if (chosen) {
322*61046927SAndroid Build Coastguard Worker di(chosen->instr, "csp: chose (leader)");
323*61046927SAndroid Build Coastguard Worker return chosen->instr;
324*61046927SAndroid Build Coastguard Worker }
325*61046927SAndroid Build Coastguard Worker
326*61046927SAndroid Build Coastguard Worker return NULL;
327*61046927SAndroid Build Coastguard Worker }
328*61046927SAndroid Build Coastguard Worker
329*61046927SAndroid Build Coastguard Worker struct ir3_postsched_deps_state {
330*61046927SAndroid Build Coastguard Worker struct ir3_postsched_ctx *ctx;
331*61046927SAndroid Build Coastguard Worker
332*61046927SAndroid Build Coastguard Worker enum { F, R } direction;
333*61046927SAndroid Build Coastguard Worker
334*61046927SAndroid Build Coastguard Worker bool merged;
335*61046927SAndroid Build Coastguard Worker
336*61046927SAndroid Build Coastguard Worker /* Track the mapping between sched node (instruction) that last
337*61046927SAndroid Build Coastguard Worker * wrote a given register (in whichever direction we are iterating
338*61046927SAndroid Build Coastguard Worker * the block)
339*61046927SAndroid Build Coastguard Worker *
340*61046927SAndroid Build Coastguard Worker * Note, this table is twice as big as the # of regs, to deal with
341*61046927SAndroid Build Coastguard Worker * half-precision regs. The approach differs depending on whether
342*61046927SAndroid Build Coastguard Worker * the half and full precision register files are "merged" (conflict,
343*61046927SAndroid Build Coastguard Worker * ie. a6xx+) in which case we use "regs" for both full precision and half
344*61046927SAndroid Build Coastguard Worker * precision dependencies and consider each full precision dep
345*61046927SAndroid Build Coastguard Worker * as two half-precision dependencies, vs older separate (non-
346*61046927SAndroid Build Coastguard Worker * conflicting) in which case the separate "half_regs" table is used for
347*61046927SAndroid Build Coastguard Worker * half-precision deps. See ir3_reg_file_offset().
348*61046927SAndroid Build Coastguard Worker */
349*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *regs[2 * GPR_REG_SIZE];
350*61046927SAndroid Build Coastguard Worker unsigned dst_n[2 * GPR_REG_SIZE];
351*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *half_regs[GPR_REG_SIZE];
352*61046927SAndroid Build Coastguard Worker unsigned half_dst_n[GPR_REG_SIZE];
353*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *shared_regs[2 * SHARED_REG_SIZE];
354*61046927SAndroid Build Coastguard Worker unsigned shared_dst_n[2 * SHARED_REG_SIZE];
355*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *nongpr_regs[2 * NONGPR_REG_SIZE];
356*61046927SAndroid Build Coastguard Worker unsigned nongpr_dst_n[2 * NONGPR_REG_SIZE];
357*61046927SAndroid Build Coastguard Worker };
358*61046927SAndroid Build Coastguard Worker
359*61046927SAndroid Build Coastguard Worker static void
add_dep(struct ir3_postsched_deps_state * state,struct ir3_postsched_node * before,struct ir3_postsched_node * after,unsigned d)360*61046927SAndroid Build Coastguard Worker add_dep(struct ir3_postsched_deps_state *state,
361*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *before, struct ir3_postsched_node *after,
362*61046927SAndroid Build Coastguard Worker unsigned d)
363*61046927SAndroid Build Coastguard Worker {
364*61046927SAndroid Build Coastguard Worker if (!before || !after)
365*61046927SAndroid Build Coastguard Worker return;
366*61046927SAndroid Build Coastguard Worker
367*61046927SAndroid Build Coastguard Worker assert(before != after);
368*61046927SAndroid Build Coastguard Worker
369*61046927SAndroid Build Coastguard Worker if (state->direction == F) {
370*61046927SAndroid Build Coastguard Worker dag_add_edge_max_data(&before->dag, &after->dag, (uintptr_t)d);
371*61046927SAndroid Build Coastguard Worker } else {
372*61046927SAndroid Build Coastguard Worker dag_add_edge_max_data(&after->dag, &before->dag, 0);
373*61046927SAndroid Build Coastguard Worker }
374*61046927SAndroid Build Coastguard Worker }
375*61046927SAndroid Build Coastguard Worker
376*61046927SAndroid Build Coastguard Worker static void
add_single_reg_dep(struct ir3_postsched_deps_state * state,struct ir3_postsched_node * node,struct ir3_postsched_node ** dep_ptr,unsigned * dst_n_ptr,unsigned num,int src_n,int dst_n)377*61046927SAndroid Build Coastguard Worker add_single_reg_dep(struct ir3_postsched_deps_state *state,
378*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *node,
379*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node **dep_ptr,
380*61046927SAndroid Build Coastguard Worker unsigned *dst_n_ptr, unsigned num, int src_n,
381*61046927SAndroid Build Coastguard Worker int dst_n)
382*61046927SAndroid Build Coastguard Worker {
383*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *dep = *dep_ptr;
384*61046927SAndroid Build Coastguard Worker
385*61046927SAndroid Build Coastguard Worker unsigned d = 0;
386*61046927SAndroid Build Coastguard Worker if (src_n >= 0 && dep && state->direction == F) {
387*61046927SAndroid Build Coastguard Worker struct ir3_compiler *compiler = state->ctx->ir->compiler;
388*61046927SAndroid Build Coastguard Worker /* get the dst_n this corresponds to */
389*61046927SAndroid Build Coastguard Worker unsigned dst_n = *dst_n_ptr;
390*61046927SAndroid Build Coastguard Worker d = ir3_delayslots_with_repeat(compiler, dep->instr, node->instr, dst_n, src_n);
391*61046927SAndroid Build Coastguard Worker if (is_sy_producer(dep->instr))
392*61046927SAndroid Build Coastguard Worker node->has_sy_src = true;
393*61046927SAndroid Build Coastguard Worker if (needs_ss(compiler, dep->instr, node->instr))
394*61046927SAndroid Build Coastguard Worker node->has_ss_src = true;
395*61046927SAndroid Build Coastguard Worker }
396*61046927SAndroid Build Coastguard Worker
397*61046927SAndroid Build Coastguard Worker if (src_n >= 0 && dep && state->direction == R) {
398*61046927SAndroid Build Coastguard Worker /* If node generates a WAR hazard (because it doesn't consume its sources
399*61046927SAndroid Build Coastguard Worker * immediately, dep needs (ss) to sync its dest. Even though this isn't a
400*61046927SAndroid Build Coastguard Worker * (ss) source (but rather a dest), the effect is exactly the same so we
401*61046927SAndroid Build Coastguard Worker * model it as such.
402*61046927SAndroid Build Coastguard Worker */
403*61046927SAndroid Build Coastguard Worker if (is_war_hazard_producer(node->instr)) {
404*61046927SAndroid Build Coastguard Worker dep->has_ss_src = true;
405*61046927SAndroid Build Coastguard Worker }
406*61046927SAndroid Build Coastguard Worker }
407*61046927SAndroid Build Coastguard Worker
408*61046927SAndroid Build Coastguard Worker add_dep(state, dep, node, d);
409*61046927SAndroid Build Coastguard Worker if (src_n < 0) {
410*61046927SAndroid Build Coastguard Worker *dep_ptr = node;
411*61046927SAndroid Build Coastguard Worker *dst_n_ptr = dst_n;
412*61046927SAndroid Build Coastguard Worker }
413*61046927SAndroid Build Coastguard Worker }
414*61046927SAndroid Build Coastguard Worker
415*61046927SAndroid Build Coastguard Worker /* This is where we handled full vs half-precision, and potential conflicts
416*61046927SAndroid Build Coastguard Worker * between half and full precision that result in additional dependencies.
417*61046927SAndroid Build Coastguard Worker * The 'reg' arg is really just to know half vs full precision.
418*61046927SAndroid Build Coastguard Worker *
419*61046927SAndroid Build Coastguard Worker * If src_n is positive, then this adds a dependency on a source register, and
420*61046927SAndroid Build Coastguard Worker * src_n is the index passed into ir3_delayslots() for calculating the delay:
421*61046927SAndroid Build Coastguard Worker * it corresponds to node->instr->srcs[src_n]. If src_n is negative, then
422*61046927SAndroid Build Coastguard Worker * this is for the destination register corresponding to dst_n.
423*61046927SAndroid Build Coastguard Worker */
424*61046927SAndroid Build Coastguard Worker static void
add_reg_dep(struct ir3_postsched_deps_state * state,struct ir3_postsched_node * node,const struct ir3_register * reg,unsigned num,int src_n,int dst_n)425*61046927SAndroid Build Coastguard Worker add_reg_dep(struct ir3_postsched_deps_state *state,
426*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *node, const struct ir3_register *reg,
427*61046927SAndroid Build Coastguard Worker unsigned num, int src_n, int dst_n)
428*61046927SAndroid Build Coastguard Worker {
429*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node **regs;
430*61046927SAndroid Build Coastguard Worker unsigned *dst_n_ptr;
431*61046927SAndroid Build Coastguard Worker enum ir3_reg_file file;
432*61046927SAndroid Build Coastguard Worker unsigned size = reg_elem_size(reg);
433*61046927SAndroid Build Coastguard Worker unsigned offset = ir3_reg_file_offset(reg, num, state->merged, &file);
434*61046927SAndroid Build Coastguard Worker switch (file) {
435*61046927SAndroid Build Coastguard Worker case IR3_FILE_FULL:
436*61046927SAndroid Build Coastguard Worker assert(offset + size <= ARRAY_SIZE(state->regs));
437*61046927SAndroid Build Coastguard Worker regs = state->regs;
438*61046927SAndroid Build Coastguard Worker dst_n_ptr = state->dst_n;
439*61046927SAndroid Build Coastguard Worker break;
440*61046927SAndroid Build Coastguard Worker case IR3_FILE_HALF:
441*61046927SAndroid Build Coastguard Worker assert(offset + 1 <= ARRAY_SIZE(state->half_regs));
442*61046927SAndroid Build Coastguard Worker regs = state->half_regs;
443*61046927SAndroid Build Coastguard Worker dst_n_ptr = state->half_dst_n;
444*61046927SAndroid Build Coastguard Worker break;
445*61046927SAndroid Build Coastguard Worker case IR3_FILE_SHARED:
446*61046927SAndroid Build Coastguard Worker assert(offset + size <= ARRAY_SIZE(state->shared_regs));
447*61046927SAndroid Build Coastguard Worker regs = state->shared_regs;
448*61046927SAndroid Build Coastguard Worker dst_n_ptr = state->shared_dst_n;
449*61046927SAndroid Build Coastguard Worker break;
450*61046927SAndroid Build Coastguard Worker case IR3_FILE_NONGPR:
451*61046927SAndroid Build Coastguard Worker assert(offset + size <= ARRAY_SIZE(state->nongpr_regs));
452*61046927SAndroid Build Coastguard Worker regs = state->nongpr_regs;
453*61046927SAndroid Build Coastguard Worker dst_n_ptr = state->nongpr_dst_n;
454*61046927SAndroid Build Coastguard Worker break;
455*61046927SAndroid Build Coastguard Worker }
456*61046927SAndroid Build Coastguard Worker
457*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < size; i++)
458*61046927SAndroid Build Coastguard Worker add_single_reg_dep(state, node, ®s[offset + i], &dst_n_ptr[offset + i], num, src_n, dst_n);
459*61046927SAndroid Build Coastguard Worker }
460*61046927SAndroid Build Coastguard Worker
461*61046927SAndroid Build Coastguard Worker static void
calculate_deps(struct ir3_postsched_deps_state * state,struct ir3_postsched_node * node)462*61046927SAndroid Build Coastguard Worker calculate_deps(struct ir3_postsched_deps_state *state,
463*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *node)
464*61046927SAndroid Build Coastguard Worker {
465*61046927SAndroid Build Coastguard Worker /* Add dependencies on instructions that previously (or next,
466*61046927SAndroid Build Coastguard Worker * in the reverse direction) wrote any of our src registers:
467*61046927SAndroid Build Coastguard Worker */
468*61046927SAndroid Build Coastguard Worker foreach_src_n (reg, i, node->instr) {
469*61046927SAndroid Build Coastguard Worker if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
470*61046927SAndroid Build Coastguard Worker continue;
471*61046927SAndroid Build Coastguard Worker
472*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_RELATIV) {
473*61046927SAndroid Build Coastguard Worker /* mark entire array as read: */
474*61046927SAndroid Build Coastguard Worker for (unsigned j = 0; j < reg->size; j++) {
475*61046927SAndroid Build Coastguard Worker add_reg_dep(state, node, reg, reg->array.base + j, i, -1);
476*61046927SAndroid Build Coastguard Worker }
477*61046927SAndroid Build Coastguard Worker } else {
478*61046927SAndroid Build Coastguard Worker assert(reg->wrmask >= 1);
479*61046927SAndroid Build Coastguard Worker u_foreach_bit (b, reg->wrmask) {
480*61046927SAndroid Build Coastguard Worker add_reg_dep(state, node, reg, reg->num + b, i, -1);
481*61046927SAndroid Build Coastguard Worker }
482*61046927SAndroid Build Coastguard Worker }
483*61046927SAndroid Build Coastguard Worker }
484*61046927SAndroid Build Coastguard Worker
485*61046927SAndroid Build Coastguard Worker /* And then after we update the state for what this instruction
486*61046927SAndroid Build Coastguard Worker * wrote:
487*61046927SAndroid Build Coastguard Worker */
488*61046927SAndroid Build Coastguard Worker foreach_dst_n (reg, i, node->instr) {
489*61046927SAndroid Build Coastguard Worker if (reg->wrmask == 0)
490*61046927SAndroid Build Coastguard Worker continue;
491*61046927SAndroid Build Coastguard Worker if (reg->flags & IR3_REG_RELATIV) {
492*61046927SAndroid Build Coastguard Worker /* mark the entire array as written: */
493*61046927SAndroid Build Coastguard Worker for (unsigned j = 0; j < reg->size; j++) {
494*61046927SAndroid Build Coastguard Worker add_reg_dep(state, node, reg, reg->array.base + j, -1, i);
495*61046927SAndroid Build Coastguard Worker }
496*61046927SAndroid Build Coastguard Worker } else {
497*61046927SAndroid Build Coastguard Worker assert(reg->wrmask >= 1);
498*61046927SAndroid Build Coastguard Worker u_foreach_bit (b, reg->wrmask) {
499*61046927SAndroid Build Coastguard Worker add_reg_dep(state, node, reg, reg->num + b, -1, i);
500*61046927SAndroid Build Coastguard Worker }
501*61046927SAndroid Build Coastguard Worker }
502*61046927SAndroid Build Coastguard Worker }
503*61046927SAndroid Build Coastguard Worker }
504*61046927SAndroid Build Coastguard Worker
505*61046927SAndroid Build Coastguard Worker static void
calculate_forward_deps(struct ir3_postsched_ctx * ctx)506*61046927SAndroid Build Coastguard Worker calculate_forward_deps(struct ir3_postsched_ctx *ctx)
507*61046927SAndroid Build Coastguard Worker {
508*61046927SAndroid Build Coastguard Worker struct ir3_postsched_deps_state state = {
509*61046927SAndroid Build Coastguard Worker .ctx = ctx,
510*61046927SAndroid Build Coastguard Worker .direction = F,
511*61046927SAndroid Build Coastguard Worker .merged = ctx->v->mergedregs,
512*61046927SAndroid Build Coastguard Worker };
513*61046927SAndroid Build Coastguard Worker
514*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &ctx->unscheduled_list) {
515*61046927SAndroid Build Coastguard Worker calculate_deps(&state, instr->data);
516*61046927SAndroid Build Coastguard Worker }
517*61046927SAndroid Build Coastguard Worker }
518*61046927SAndroid Build Coastguard Worker
519*61046927SAndroid Build Coastguard Worker static void
calculate_reverse_deps(struct ir3_postsched_ctx * ctx)520*61046927SAndroid Build Coastguard Worker calculate_reverse_deps(struct ir3_postsched_ctx *ctx)
521*61046927SAndroid Build Coastguard Worker {
522*61046927SAndroid Build Coastguard Worker struct ir3_postsched_deps_state state = {
523*61046927SAndroid Build Coastguard Worker .ctx = ctx,
524*61046927SAndroid Build Coastguard Worker .direction = R,
525*61046927SAndroid Build Coastguard Worker .merged = ctx->v->mergedregs,
526*61046927SAndroid Build Coastguard Worker };
527*61046927SAndroid Build Coastguard Worker
528*61046927SAndroid Build Coastguard Worker foreach_instr_rev (instr, &ctx->unscheduled_list) {
529*61046927SAndroid Build Coastguard Worker calculate_deps(&state, instr->data);
530*61046927SAndroid Build Coastguard Worker }
531*61046927SAndroid Build Coastguard Worker }
532*61046927SAndroid Build Coastguard Worker
533*61046927SAndroid Build Coastguard Worker static void
sched_node_init(struct ir3_postsched_ctx * ctx,struct ir3_instruction * instr)534*61046927SAndroid Build Coastguard Worker sched_node_init(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
535*61046927SAndroid Build Coastguard Worker {
536*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *n =
537*61046927SAndroid Build Coastguard Worker rzalloc(ctx->mem_ctx, struct ir3_postsched_node);
538*61046927SAndroid Build Coastguard Worker
539*61046927SAndroid Build Coastguard Worker dag_init_node(ctx->dag, &n->dag);
540*61046927SAndroid Build Coastguard Worker
541*61046927SAndroid Build Coastguard Worker n->instr = instr;
542*61046927SAndroid Build Coastguard Worker instr->data = n;
543*61046927SAndroid Build Coastguard Worker }
544*61046927SAndroid Build Coastguard Worker
545*61046927SAndroid Build Coastguard Worker static void
sched_dag_max_delay_cb(struct dag_node * node,void * state)546*61046927SAndroid Build Coastguard Worker sched_dag_max_delay_cb(struct dag_node *node, void *state)
547*61046927SAndroid Build Coastguard Worker {
548*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *n = (struct ir3_postsched_node *)node;
549*61046927SAndroid Build Coastguard Worker struct ir3_postsched_ctx *ctx = state;
550*61046927SAndroid Build Coastguard Worker uint32_t max_delay = 0;
551*61046927SAndroid Build Coastguard Worker
552*61046927SAndroid Build Coastguard Worker util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
553*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *child =
554*61046927SAndroid Build Coastguard Worker (struct ir3_postsched_node *)edge->child;
555*61046927SAndroid Build Coastguard Worker unsigned delay = edge->data;
556*61046927SAndroid Build Coastguard Worker unsigned sy_delay = 0;
557*61046927SAndroid Build Coastguard Worker unsigned ss_delay = 0;
558*61046927SAndroid Build Coastguard Worker
559*61046927SAndroid Build Coastguard Worker if (child->has_sy_src && is_sy_producer(n->instr)) {
560*61046927SAndroid Build Coastguard Worker sy_delay = soft_sy_delay(n->instr, ctx->block->shader);
561*61046927SAndroid Build Coastguard Worker }
562*61046927SAndroid Build Coastguard Worker
563*61046927SAndroid Build Coastguard Worker if (child->has_ss_src &&
564*61046927SAndroid Build Coastguard Worker needs_ss(ctx->v->compiler, n->instr, child->instr)) {
565*61046927SAndroid Build Coastguard Worker ss_delay = soft_ss_delay(n->instr);
566*61046927SAndroid Build Coastguard Worker }
567*61046927SAndroid Build Coastguard Worker
568*61046927SAndroid Build Coastguard Worker delay = MAX3(delay, sy_delay, ss_delay);
569*61046927SAndroid Build Coastguard Worker max_delay = MAX2(child->max_delay + delay, max_delay);
570*61046927SAndroid Build Coastguard Worker }
571*61046927SAndroid Build Coastguard Worker
572*61046927SAndroid Build Coastguard Worker n->max_delay = MAX2(n->max_delay, max_delay);
573*61046927SAndroid Build Coastguard Worker }
574*61046927SAndroid Build Coastguard Worker
575*61046927SAndroid Build Coastguard Worker static void
sched_dag_init(struct ir3_postsched_ctx * ctx)576*61046927SAndroid Build Coastguard Worker sched_dag_init(struct ir3_postsched_ctx *ctx)
577*61046927SAndroid Build Coastguard Worker {
578*61046927SAndroid Build Coastguard Worker ctx->mem_ctx = ralloc_context(NULL);
579*61046927SAndroid Build Coastguard Worker
580*61046927SAndroid Build Coastguard Worker ctx->dag = dag_create(ctx->mem_ctx);
581*61046927SAndroid Build Coastguard Worker
582*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &ctx->unscheduled_list)
583*61046927SAndroid Build Coastguard Worker sched_node_init(ctx, instr);
584*61046927SAndroid Build Coastguard Worker
585*61046927SAndroid Build Coastguard Worker calculate_forward_deps(ctx);
586*61046927SAndroid Build Coastguard Worker calculate_reverse_deps(ctx);
587*61046927SAndroid Build Coastguard Worker
588*61046927SAndroid Build Coastguard Worker /*
589*61046927SAndroid Build Coastguard Worker * To avoid expensive texture fetches, etc, from being moved ahead
590*61046927SAndroid Build Coastguard Worker * of kills, track the kills we've seen so far, so we can add an
591*61046927SAndroid Build Coastguard Worker * extra dependency on them for tex/mem instructions
592*61046927SAndroid Build Coastguard Worker */
593*61046927SAndroid Build Coastguard Worker struct util_dynarray kills;
594*61046927SAndroid Build Coastguard Worker util_dynarray_init(&kills, ctx->mem_ctx);
595*61046927SAndroid Build Coastguard Worker
596*61046927SAndroid Build Coastguard Worker /* The last bary.f with the (ei) flag must be scheduled before any kills,
597*61046927SAndroid Build Coastguard Worker * or the hw gets angry. Keep track of inputs here so we can add the
598*61046927SAndroid Build Coastguard Worker * false dep on the kill instruction.
599*61046927SAndroid Build Coastguard Worker */
600*61046927SAndroid Build Coastguard Worker struct util_dynarray inputs;
601*61046927SAndroid Build Coastguard Worker util_dynarray_init(&inputs, ctx->mem_ctx);
602*61046927SAndroid Build Coastguard Worker
603*61046927SAndroid Build Coastguard Worker /*
604*61046927SAndroid Build Coastguard Worker * Normal srcs won't be in SSA at this point, those are dealt with in
605*61046927SAndroid Build Coastguard Worker * calculate_forward_deps() and calculate_reverse_deps(). But we still
606*61046927SAndroid Build Coastguard Worker * have the false-dep information in SSA form, so go ahead and add
607*61046927SAndroid Build Coastguard Worker * dependencies for that here:
608*61046927SAndroid Build Coastguard Worker */
609*61046927SAndroid Build Coastguard Worker foreach_instr (instr, &ctx->unscheduled_list) {
610*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *n = instr->data;
611*61046927SAndroid Build Coastguard Worker
612*61046927SAndroid Build Coastguard Worker foreach_ssa_src_n (src, i, instr) {
613*61046927SAndroid Build Coastguard Worker if (src->block != instr->block)
614*61046927SAndroid Build Coastguard Worker continue;
615*61046927SAndroid Build Coastguard Worker
616*61046927SAndroid Build Coastguard Worker /* we can end up with unused false-deps.. just skip them: */
617*61046927SAndroid Build Coastguard Worker if (src->flags & IR3_INSTR_UNUSED)
618*61046927SAndroid Build Coastguard Worker continue;
619*61046927SAndroid Build Coastguard Worker
620*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *sn = src->data;
621*61046927SAndroid Build Coastguard Worker
622*61046927SAndroid Build Coastguard Worker /* don't consider dependencies in other blocks: */
623*61046927SAndroid Build Coastguard Worker if (src->block != instr->block)
624*61046927SAndroid Build Coastguard Worker continue;
625*61046927SAndroid Build Coastguard Worker
626*61046927SAndroid Build Coastguard Worker dag_add_edge_max_data(&sn->dag, &n->dag, 0);
627*61046927SAndroid Build Coastguard Worker }
628*61046927SAndroid Build Coastguard Worker
629*61046927SAndroid Build Coastguard Worker if (is_input(instr)) {
630*61046927SAndroid Build Coastguard Worker util_dynarray_append(&inputs, struct ir3_instruction *, instr);
631*61046927SAndroid Build Coastguard Worker } else if (is_kill_or_demote(instr)) {
632*61046927SAndroid Build Coastguard Worker util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) {
633*61046927SAndroid Build Coastguard Worker struct ir3_instruction *input = *instrp;
634*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *in = input->data;
635*61046927SAndroid Build Coastguard Worker dag_add_edge_max_data(&in->dag, &n->dag, 0);
636*61046927SAndroid Build Coastguard Worker }
637*61046927SAndroid Build Coastguard Worker util_dynarray_append(&kills, struct ir3_instruction *, instr);
638*61046927SAndroid Build Coastguard Worker } else if (is_tex(instr) || is_mem(instr)) {
639*61046927SAndroid Build Coastguard Worker util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) {
640*61046927SAndroid Build Coastguard Worker struct ir3_instruction *kill = *instrp;
641*61046927SAndroid Build Coastguard Worker struct ir3_postsched_node *kn = kill->data;
642*61046927SAndroid Build Coastguard Worker dag_add_edge_max_data(&kn->dag, &n->dag, 0);
643*61046927SAndroid Build Coastguard Worker }
644*61046927SAndroid Build Coastguard Worker }
645*61046927SAndroid Build Coastguard Worker }
646*61046927SAndroid Build Coastguard Worker
647*61046927SAndroid Build Coastguard Worker #ifndef NDEBUG
648*61046927SAndroid Build Coastguard Worker dag_validate(ctx->dag, sched_dag_validate_cb, NULL);
649*61046927SAndroid Build Coastguard Worker #endif
650*61046927SAndroid Build Coastguard Worker
651*61046927SAndroid Build Coastguard Worker // TODO do we want to do this after reverse-dependencies?
652*61046927SAndroid Build Coastguard Worker dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, ctx);
653*61046927SAndroid Build Coastguard Worker }
654*61046927SAndroid Build Coastguard Worker
655*61046927SAndroid Build Coastguard Worker static void
sched_dag_destroy(struct ir3_postsched_ctx * ctx)656*61046927SAndroid Build Coastguard Worker sched_dag_destroy(struct ir3_postsched_ctx *ctx)
657*61046927SAndroid Build Coastguard Worker {
658*61046927SAndroid Build Coastguard Worker ralloc_free(ctx->mem_ctx);
659*61046927SAndroid Build Coastguard Worker ctx->mem_ctx = NULL;
660*61046927SAndroid Build Coastguard Worker ctx->dag = NULL;
661*61046927SAndroid Build Coastguard Worker }
662*61046927SAndroid Build Coastguard Worker
663*61046927SAndroid Build Coastguard Worker static void
sched_block(struct ir3_postsched_ctx * ctx,struct ir3_block * block)664*61046927SAndroid Build Coastguard Worker sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
665*61046927SAndroid Build Coastguard Worker {
666*61046927SAndroid Build Coastguard Worker ctx->block = block;
667*61046927SAndroid Build Coastguard Worker ctx->sy_delay = 0;
668*61046927SAndroid Build Coastguard Worker ctx->ss_delay = 0;
669*61046927SAndroid Build Coastguard Worker
670*61046927SAndroid Build Coastguard Worker /* The terminator has to stay at the end. Instead of trying to set up
671*61046927SAndroid Build Coastguard Worker * dependencies to achieve this, it's easier to just remove it now and add it
672*61046927SAndroid Build Coastguard Worker * back after scheduling.
673*61046927SAndroid Build Coastguard Worker */
674*61046927SAndroid Build Coastguard Worker struct ir3_instruction *terminator = ir3_block_take_terminator(block);
675*61046927SAndroid Build Coastguard Worker
676*61046927SAndroid Build Coastguard Worker /* move all instructions to the unscheduled list, and
677*61046927SAndroid Build Coastguard Worker * empty the block's instruction list (to which we will
678*61046927SAndroid Build Coastguard Worker * be inserting).
679*61046927SAndroid Build Coastguard Worker */
680*61046927SAndroid Build Coastguard Worker list_replace(&block->instr_list, &ctx->unscheduled_list);
681*61046927SAndroid Build Coastguard Worker list_inithead(&block->instr_list);
682*61046927SAndroid Build Coastguard Worker
683*61046927SAndroid Build Coastguard Worker // TODO once we are using post-sched for everything we can
684*61046927SAndroid Build Coastguard Worker // just not stick in NOP's prior to post-sched, and drop this.
685*61046927SAndroid Build Coastguard Worker // for now keep this, since it makes post-sched optional:
686*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &ctx->unscheduled_list) {
687*61046927SAndroid Build Coastguard Worker switch (instr->opc) {
688*61046927SAndroid Build Coastguard Worker case OPC_NOP:
689*61046927SAndroid Build Coastguard Worker list_delinit(&instr->node);
690*61046927SAndroid Build Coastguard Worker break;
691*61046927SAndroid Build Coastguard Worker default:
692*61046927SAndroid Build Coastguard Worker break;
693*61046927SAndroid Build Coastguard Worker }
694*61046927SAndroid Build Coastguard Worker }
695*61046927SAndroid Build Coastguard Worker
696*61046927SAndroid Build Coastguard Worker sched_dag_init(ctx);
697*61046927SAndroid Build Coastguard Worker
698*61046927SAndroid Build Coastguard Worker /* First schedule all meta:input instructions, followed by
699*61046927SAndroid Build Coastguard Worker * tex-prefetch. We want all of the instructions that load
700*61046927SAndroid Build Coastguard Worker * values into registers before the shader starts to go
701*61046927SAndroid Build Coastguard Worker * before any other instructions. But in particular we
702*61046927SAndroid Build Coastguard Worker * want inputs to come before prefetches. This is because
703*61046927SAndroid Build Coastguard Worker * a FS's bary_ij input may not actually be live in the
704*61046927SAndroid Build Coastguard Worker * shader, but it should not be scheduled on top of any
705*61046927SAndroid Build Coastguard Worker * other input (but can be overwritten by a tex prefetch)
706*61046927SAndroid Build Coastguard Worker */
707*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &ctx->unscheduled_list)
708*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_META_INPUT)
709*61046927SAndroid Build Coastguard Worker schedule(ctx, instr);
710*61046927SAndroid Build Coastguard Worker
711*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &ctx->unscheduled_list)
712*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_META_TEX_PREFETCH)
713*61046927SAndroid Build Coastguard Worker schedule(ctx, instr);
714*61046927SAndroid Build Coastguard Worker
715*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &ctx->unscheduled_list)
716*61046927SAndroid Build Coastguard Worker if (instr->opc == OPC_PUSH_CONSTS_LOAD_MACRO)
717*61046927SAndroid Build Coastguard Worker schedule(ctx, instr);
718*61046927SAndroid Build Coastguard Worker
719*61046927SAndroid Build Coastguard Worker while (!list_is_empty(&ctx->unscheduled_list)) {
720*61046927SAndroid Build Coastguard Worker struct ir3_instruction *instr = choose_instr(ctx);
721*61046927SAndroid Build Coastguard Worker
722*61046927SAndroid Build Coastguard Worker unsigned delay = node_delay(ctx, instr->data);
723*61046927SAndroid Build Coastguard Worker d("delay=%u", delay);
724*61046927SAndroid Build Coastguard Worker
725*61046927SAndroid Build Coastguard Worker assert(delay <= 6);
726*61046927SAndroid Build Coastguard Worker
727*61046927SAndroid Build Coastguard Worker schedule(ctx, instr);
728*61046927SAndroid Build Coastguard Worker }
729*61046927SAndroid Build Coastguard Worker
730*61046927SAndroid Build Coastguard Worker sched_dag_destroy(ctx);
731*61046927SAndroid Build Coastguard Worker
732*61046927SAndroid Build Coastguard Worker if (terminator)
733*61046927SAndroid Build Coastguard Worker list_addtail(&terminator->node, &block->instr_list);
734*61046927SAndroid Build Coastguard Worker }
735*61046927SAndroid Build Coastguard Worker
736*61046927SAndroid Build Coastguard Worker static bool
is_self_mov(struct ir3_instruction * instr)737*61046927SAndroid Build Coastguard Worker is_self_mov(struct ir3_instruction *instr)
738*61046927SAndroid Build Coastguard Worker {
739*61046927SAndroid Build Coastguard Worker if (!is_same_type_mov(instr))
740*61046927SAndroid Build Coastguard Worker return false;
741*61046927SAndroid Build Coastguard Worker
742*61046927SAndroid Build Coastguard Worker if (instr->dsts[0]->num != instr->srcs[0]->num)
743*61046927SAndroid Build Coastguard Worker return false;
744*61046927SAndroid Build Coastguard Worker
745*61046927SAndroid Build Coastguard Worker if (instr->dsts[0]->flags & IR3_REG_RELATIV)
746*61046927SAndroid Build Coastguard Worker return false;
747*61046927SAndroid Build Coastguard Worker
748*61046927SAndroid Build Coastguard Worker if (instr->cat1.round != ROUND_ZERO)
749*61046927SAndroid Build Coastguard Worker return false;
750*61046927SAndroid Build Coastguard Worker
751*61046927SAndroid Build Coastguard Worker if (instr->srcs[0]->flags &
752*61046927SAndroid Build Coastguard Worker (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_FNEG |
753*61046927SAndroid Build Coastguard Worker IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))
754*61046927SAndroid Build Coastguard Worker return false;
755*61046927SAndroid Build Coastguard Worker
756*61046927SAndroid Build Coastguard Worker return true;
757*61046927SAndroid Build Coastguard Worker }
758*61046927SAndroid Build Coastguard Worker
759*61046927SAndroid Build Coastguard Worker /* sometimes we end up w/ in-place mov's, ie. mov.u32u32 r1.y, r1.y
760*61046927SAndroid Build Coastguard Worker * as a result of places were before RA we are not sure that it is
761*61046927SAndroid Build Coastguard Worker * safe to eliminate. We could eliminate these earlier, but sometimes
762*61046927SAndroid Build Coastguard Worker * they are tangled up in false-dep's, etc, so it is easier just to
763*61046927SAndroid Build Coastguard Worker * let them exist until after RA
764*61046927SAndroid Build Coastguard Worker */
765*61046927SAndroid Build Coastguard Worker static void
cleanup_self_movs(struct ir3 * ir)766*61046927SAndroid Build Coastguard Worker cleanup_self_movs(struct ir3 *ir)
767*61046927SAndroid Build Coastguard Worker {
768*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
769*61046927SAndroid Build Coastguard Worker foreach_instr_safe (instr, &block->instr_list) {
770*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instr->deps_count; i++) {
771*61046927SAndroid Build Coastguard Worker if (instr->deps[i] && is_self_mov(instr->deps[i])) {
772*61046927SAndroid Build Coastguard Worker instr->deps[i] = NULL;
773*61046927SAndroid Build Coastguard Worker }
774*61046927SAndroid Build Coastguard Worker }
775*61046927SAndroid Build Coastguard Worker
776*61046927SAndroid Build Coastguard Worker if (is_self_mov(instr))
777*61046927SAndroid Build Coastguard Worker list_delinit(&instr->node);
778*61046927SAndroid Build Coastguard Worker }
779*61046927SAndroid Build Coastguard Worker }
780*61046927SAndroid Build Coastguard Worker }
781*61046927SAndroid Build Coastguard Worker
782*61046927SAndroid Build Coastguard Worker bool
ir3_postsched(struct ir3 * ir,struct ir3_shader_variant * v)783*61046927SAndroid Build Coastguard Worker ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v)
784*61046927SAndroid Build Coastguard Worker {
785*61046927SAndroid Build Coastguard Worker struct ir3_postsched_ctx ctx = {
786*61046927SAndroid Build Coastguard Worker .ir = ir,
787*61046927SAndroid Build Coastguard Worker .v = v,
788*61046927SAndroid Build Coastguard Worker };
789*61046927SAndroid Build Coastguard Worker
790*61046927SAndroid Build Coastguard Worker cleanup_self_movs(ir);
791*61046927SAndroid Build Coastguard Worker
792*61046927SAndroid Build Coastguard Worker foreach_block (block, &ir->block_list) {
793*61046927SAndroid Build Coastguard Worker sched_block(&ctx, block);
794*61046927SAndroid Build Coastguard Worker }
795*61046927SAndroid Build Coastguard Worker
796*61046927SAndroid Build Coastguard Worker return true;
797*61046927SAndroid Build Coastguard Worker }
798