1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2010 Intel Corporation
3*61046927SAndroid Build Coastguard Worker * Copyright © 2014-2017 Broadcom
4*61046927SAndroid Build Coastguard Worker *
5*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
6*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
7*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
8*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
10*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
11*61046927SAndroid Build Coastguard Worker *
12*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice (including the next
13*61046927SAndroid Build Coastguard Worker * paragraph) shall be included in all copies or substantial portions of the
14*61046927SAndroid Build Coastguard Worker * Software.
15*61046927SAndroid Build Coastguard Worker *
16*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*61046927SAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19*61046927SAndroid Build Coastguard Worker * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*61046927SAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21*61046927SAndroid Build Coastguard Worker * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22*61046927SAndroid Build Coastguard Worker * IN THE SOFTWARE.
23*61046927SAndroid Build Coastguard Worker */
24*61046927SAndroid Build Coastguard Worker
25*61046927SAndroid Build Coastguard Worker /**
26*61046927SAndroid Build Coastguard Worker * @file
27*61046927SAndroid Build Coastguard Worker *
28*61046927SAndroid Build Coastguard Worker * The basic model of the list scheduler is to take a basic block, compute a
29*61046927SAndroid Build Coastguard Worker * DAG of the dependencies, and make a list of the DAG heads. Heuristically
30*61046927SAndroid Build Coastguard Worker * pick a DAG head, then put all the children that are now DAG heads into the
31*61046927SAndroid Build Coastguard Worker * list of things to schedule.
32*61046927SAndroid Build Coastguard Worker *
33*61046927SAndroid Build Coastguard Worker * The goal of scheduling here is to pack pairs of operations together in a
34*61046927SAndroid Build Coastguard Worker * single QPU instruction.
35*61046927SAndroid Build Coastguard Worker */
36*61046927SAndroid Build Coastguard Worker
37*61046927SAndroid Build Coastguard Worker #include "qpu/qpu_disasm.h"
38*61046927SAndroid Build Coastguard Worker #include "v3d_compiler.h"
39*61046927SAndroid Build Coastguard Worker #include "util/ralloc.h"
40*61046927SAndroid Build Coastguard Worker #include "util/dag.h"
41*61046927SAndroid Build Coastguard Worker
42*61046927SAndroid Build Coastguard Worker static bool debug;
43*61046927SAndroid Build Coastguard Worker
44*61046927SAndroid Build Coastguard Worker struct schedule_node_child;
45*61046927SAndroid Build Coastguard Worker
46*61046927SAndroid Build Coastguard Worker struct schedule_node {
47*61046927SAndroid Build Coastguard Worker struct dag_node dag;
48*61046927SAndroid Build Coastguard Worker struct list_head link;
49*61046927SAndroid Build Coastguard Worker struct qinst *inst;
50*61046927SAndroid Build Coastguard Worker
51*61046927SAndroid Build Coastguard Worker /* Longest cycles + instruction_latency() of any parent of this node. */
52*61046927SAndroid Build Coastguard Worker uint32_t unblocked_time;
53*61046927SAndroid Build Coastguard Worker
54*61046927SAndroid Build Coastguard Worker /**
55*61046927SAndroid Build Coastguard Worker * Minimum number of cycles from scheduling this instruction until the
56*61046927SAndroid Build Coastguard Worker * end of the program, based on the slowest dependency chain through
57*61046927SAndroid Build Coastguard Worker * the children.
58*61046927SAndroid Build Coastguard Worker */
59*61046927SAndroid Build Coastguard Worker uint32_t delay;
60*61046927SAndroid Build Coastguard Worker
61*61046927SAndroid Build Coastguard Worker /**
62*61046927SAndroid Build Coastguard Worker * cycles between this instruction being scheduled and when its result
63*61046927SAndroid Build Coastguard Worker * can be consumed.
64*61046927SAndroid Build Coastguard Worker */
65*61046927SAndroid Build Coastguard Worker uint32_t latency;
66*61046927SAndroid Build Coastguard Worker };
67*61046927SAndroid Build Coastguard Worker
68*61046927SAndroid Build Coastguard Worker /* When walking the instructions in reverse, we need to swap before/after in
69*61046927SAndroid Build Coastguard Worker * add_dep().
70*61046927SAndroid Build Coastguard Worker */
71*61046927SAndroid Build Coastguard Worker enum direction { F, R };
72*61046927SAndroid Build Coastguard Worker
73*61046927SAndroid Build Coastguard Worker struct schedule_state {
74*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo;
75*61046927SAndroid Build Coastguard Worker struct dag *dag;
76*61046927SAndroid Build Coastguard Worker struct schedule_node *last_r[6];
77*61046927SAndroid Build Coastguard Worker struct schedule_node *last_rf[64];
78*61046927SAndroid Build Coastguard Worker struct schedule_node *last_sf;
79*61046927SAndroid Build Coastguard Worker struct schedule_node *last_vpm_read;
80*61046927SAndroid Build Coastguard Worker struct schedule_node *last_tmu_write;
81*61046927SAndroid Build Coastguard Worker struct schedule_node *last_tmu_config;
82*61046927SAndroid Build Coastguard Worker struct schedule_node *last_tmu_read;
83*61046927SAndroid Build Coastguard Worker struct schedule_node *last_tlb;
84*61046927SAndroid Build Coastguard Worker struct schedule_node *last_vpm;
85*61046927SAndroid Build Coastguard Worker struct schedule_node *last_unif;
86*61046927SAndroid Build Coastguard Worker struct schedule_node *last_rtop;
87*61046927SAndroid Build Coastguard Worker struct schedule_node *last_unifa;
88*61046927SAndroid Build Coastguard Worker struct schedule_node *last_setmsf;
89*61046927SAndroid Build Coastguard Worker enum direction dir;
90*61046927SAndroid Build Coastguard Worker /* Estimated cycle when the current instruction would start. */
91*61046927SAndroid Build Coastguard Worker uint32_t time;
92*61046927SAndroid Build Coastguard Worker };
93*61046927SAndroid Build Coastguard Worker
94*61046927SAndroid Build Coastguard Worker static void
add_dep(struct schedule_state * state,struct schedule_node * before,struct schedule_node * after,bool write)95*61046927SAndroid Build Coastguard Worker add_dep(struct schedule_state *state,
96*61046927SAndroid Build Coastguard Worker struct schedule_node *before,
97*61046927SAndroid Build Coastguard Worker struct schedule_node *after,
98*61046927SAndroid Build Coastguard Worker bool write)
99*61046927SAndroid Build Coastguard Worker {
100*61046927SAndroid Build Coastguard Worker bool write_after_read = !write && state->dir == R;
101*61046927SAndroid Build Coastguard Worker uintptr_t edge_data = write_after_read;
102*61046927SAndroid Build Coastguard Worker
103*61046927SAndroid Build Coastguard Worker if (!before || !after)
104*61046927SAndroid Build Coastguard Worker return;
105*61046927SAndroid Build Coastguard Worker
106*61046927SAndroid Build Coastguard Worker assert(before != after);
107*61046927SAndroid Build Coastguard Worker
108*61046927SAndroid Build Coastguard Worker if (state->dir == F)
109*61046927SAndroid Build Coastguard Worker dag_add_edge(&before->dag, &after->dag, edge_data);
110*61046927SAndroid Build Coastguard Worker else
111*61046927SAndroid Build Coastguard Worker dag_add_edge(&after->dag, &before->dag, edge_data);
112*61046927SAndroid Build Coastguard Worker }
113*61046927SAndroid Build Coastguard Worker
114*61046927SAndroid Build Coastguard Worker static void
add_read_dep(struct schedule_state * state,struct schedule_node * before,struct schedule_node * after)115*61046927SAndroid Build Coastguard Worker add_read_dep(struct schedule_state *state,
116*61046927SAndroid Build Coastguard Worker struct schedule_node *before,
117*61046927SAndroid Build Coastguard Worker struct schedule_node *after)
118*61046927SAndroid Build Coastguard Worker {
119*61046927SAndroid Build Coastguard Worker add_dep(state, before, after, false);
120*61046927SAndroid Build Coastguard Worker }
121*61046927SAndroid Build Coastguard Worker
122*61046927SAndroid Build Coastguard Worker static void
add_write_dep(struct schedule_state * state,struct schedule_node ** before,struct schedule_node * after)123*61046927SAndroid Build Coastguard Worker add_write_dep(struct schedule_state *state,
124*61046927SAndroid Build Coastguard Worker struct schedule_node **before,
125*61046927SAndroid Build Coastguard Worker struct schedule_node *after)
126*61046927SAndroid Build Coastguard Worker {
127*61046927SAndroid Build Coastguard Worker add_dep(state, *before, after, true);
128*61046927SAndroid Build Coastguard Worker *before = after;
129*61046927SAndroid Build Coastguard Worker }
130*61046927SAndroid Build Coastguard Worker
131*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_is_tlb(const struct v3d_qpu_instr * inst)132*61046927SAndroid Build Coastguard Worker qpu_inst_is_tlb(const struct v3d_qpu_instr *inst)
133*61046927SAndroid Build Coastguard Worker {
134*61046927SAndroid Build Coastguard Worker if (inst->sig.ldtlb || inst->sig.ldtlbu)
135*61046927SAndroid Build Coastguard Worker return true;
136*61046927SAndroid Build Coastguard Worker
137*61046927SAndroid Build Coastguard Worker if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
138*61046927SAndroid Build Coastguard Worker return false;
139*61046927SAndroid Build Coastguard Worker
140*61046927SAndroid Build Coastguard Worker if (inst->alu.add.op != V3D_QPU_A_NOP &&
141*61046927SAndroid Build Coastguard Worker inst->alu.add.magic_write &&
142*61046927SAndroid Build Coastguard Worker (inst->alu.add.waddr == V3D_QPU_WADDR_TLB ||
143*61046927SAndroid Build Coastguard Worker inst->alu.add.waddr == V3D_QPU_WADDR_TLBU))
144*61046927SAndroid Build Coastguard Worker return true;
145*61046927SAndroid Build Coastguard Worker
146*61046927SAndroid Build Coastguard Worker if (inst->alu.mul.op != V3D_QPU_M_NOP &&
147*61046927SAndroid Build Coastguard Worker inst->alu.mul.magic_write &&
148*61046927SAndroid Build Coastguard Worker (inst->alu.mul.waddr == V3D_QPU_WADDR_TLB ||
149*61046927SAndroid Build Coastguard Worker inst->alu.mul.waddr == V3D_QPU_WADDR_TLBU))
150*61046927SAndroid Build Coastguard Worker return true;
151*61046927SAndroid Build Coastguard Worker
152*61046927SAndroid Build Coastguard Worker return false;
153*61046927SAndroid Build Coastguard Worker }
154*61046927SAndroid Build Coastguard Worker
155*61046927SAndroid Build Coastguard Worker static void
process_mux_deps(struct schedule_state * state,struct schedule_node * n,enum v3d_qpu_mux mux)156*61046927SAndroid Build Coastguard Worker process_mux_deps(struct schedule_state *state, struct schedule_node *n,
157*61046927SAndroid Build Coastguard Worker enum v3d_qpu_mux mux)
158*61046927SAndroid Build Coastguard Worker {
159*61046927SAndroid Build Coastguard Worker assert(state->devinfo->ver < 71);
160*61046927SAndroid Build Coastguard Worker switch (mux) {
161*61046927SAndroid Build Coastguard Worker case V3D_QPU_MUX_A:
162*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
163*61046927SAndroid Build Coastguard Worker break;
164*61046927SAndroid Build Coastguard Worker case V3D_QPU_MUX_B:
165*61046927SAndroid Build Coastguard Worker if (!n->inst->qpu.sig.small_imm_b) {
166*61046927SAndroid Build Coastguard Worker add_read_dep(state,
167*61046927SAndroid Build Coastguard Worker state->last_rf[n->inst->qpu.raddr_b], n);
168*61046927SAndroid Build Coastguard Worker }
169*61046927SAndroid Build Coastguard Worker break;
170*61046927SAndroid Build Coastguard Worker default:
171*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_r[mux - V3D_QPU_MUX_R0], n);
172*61046927SAndroid Build Coastguard Worker break;
173*61046927SAndroid Build Coastguard Worker }
174*61046927SAndroid Build Coastguard Worker }
175*61046927SAndroid Build Coastguard Worker
176*61046927SAndroid Build Coastguard Worker
177*61046927SAndroid Build Coastguard Worker static void
process_raddr_deps(struct schedule_state * state,struct schedule_node * n,uint8_t raddr,bool is_small_imm)178*61046927SAndroid Build Coastguard Worker process_raddr_deps(struct schedule_state *state, struct schedule_node *n,
179*61046927SAndroid Build Coastguard Worker uint8_t raddr, bool is_small_imm)
180*61046927SAndroid Build Coastguard Worker {
181*61046927SAndroid Build Coastguard Worker assert(state->devinfo->ver >= 71);
182*61046927SAndroid Build Coastguard Worker
183*61046927SAndroid Build Coastguard Worker if (!is_small_imm)
184*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_rf[raddr], n);
185*61046927SAndroid Build Coastguard Worker }
186*61046927SAndroid Build Coastguard Worker
187*61046927SAndroid Build Coastguard Worker static bool
tmu_write_is_sequence_terminator(uint32_t waddr)188*61046927SAndroid Build Coastguard Worker tmu_write_is_sequence_terminator(uint32_t waddr)
189*61046927SAndroid Build Coastguard Worker {
190*61046927SAndroid Build Coastguard Worker switch (waddr) {
191*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_TMUS:
192*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_TMUSCM:
193*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_TMUSF:
194*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_TMUSLOD:
195*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_TMUA:
196*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_TMUAU:
197*61046927SAndroid Build Coastguard Worker return true;
198*61046927SAndroid Build Coastguard Worker default:
199*61046927SAndroid Build Coastguard Worker return false;
200*61046927SAndroid Build Coastguard Worker }
201*61046927SAndroid Build Coastguard Worker }
202*61046927SAndroid Build Coastguard Worker
203*61046927SAndroid Build Coastguard Worker static bool
can_reorder_tmu_write(const struct v3d_device_info * devinfo,uint32_t waddr)204*61046927SAndroid Build Coastguard Worker can_reorder_tmu_write(const struct v3d_device_info *devinfo, uint32_t waddr)
205*61046927SAndroid Build Coastguard Worker {
206*61046927SAndroid Build Coastguard Worker if (tmu_write_is_sequence_terminator(waddr))
207*61046927SAndroid Build Coastguard Worker return false;
208*61046927SAndroid Build Coastguard Worker
209*61046927SAndroid Build Coastguard Worker if (waddr == V3D_QPU_WADDR_TMUD)
210*61046927SAndroid Build Coastguard Worker return false;
211*61046927SAndroid Build Coastguard Worker
212*61046927SAndroid Build Coastguard Worker return true;
213*61046927SAndroid Build Coastguard Worker }
214*61046927SAndroid Build Coastguard Worker
215*61046927SAndroid Build Coastguard Worker static void
process_waddr_deps(struct schedule_state * state,struct schedule_node * n,uint32_t waddr,bool magic)216*61046927SAndroid Build Coastguard Worker process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
217*61046927SAndroid Build Coastguard Worker uint32_t waddr, bool magic)
218*61046927SAndroid Build Coastguard Worker {
219*61046927SAndroid Build Coastguard Worker if (!magic) {
220*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_rf[waddr], n);
221*61046927SAndroid Build Coastguard Worker } else if (v3d_qpu_magic_waddr_is_tmu(state->devinfo, waddr)) {
222*61046927SAndroid Build Coastguard Worker if (can_reorder_tmu_write(state->devinfo, waddr))
223*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_tmu_write, n);
224*61046927SAndroid Build Coastguard Worker else
225*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tmu_write, n);
226*61046927SAndroid Build Coastguard Worker
227*61046927SAndroid Build Coastguard Worker if (tmu_write_is_sequence_terminator(waddr))
228*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tmu_config, n);
229*61046927SAndroid Build Coastguard Worker } else if (v3d_qpu_magic_waddr_is_sfu(waddr)) {
230*61046927SAndroid Build Coastguard Worker /* Handled by v3d_qpu_writes_r4() check. */
231*61046927SAndroid Build Coastguard Worker } else {
232*61046927SAndroid Build Coastguard Worker switch (waddr) {
233*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_R0:
234*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_R1:
235*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_R2:
236*61046927SAndroid Build Coastguard Worker add_write_dep(state,
237*61046927SAndroid Build Coastguard Worker &state->last_r[waddr - V3D_QPU_WADDR_R0],
238*61046927SAndroid Build Coastguard Worker n);
239*61046927SAndroid Build Coastguard Worker break;
240*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_R3:
241*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_R4:
242*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_R5:
243*61046927SAndroid Build Coastguard Worker /* Handled by v3d_qpu_writes_r*() checks below. */
244*61046927SAndroid Build Coastguard Worker break;
245*61046927SAndroid Build Coastguard Worker
246*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_VPM:
247*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_VPMU:
248*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_vpm, n);
249*61046927SAndroid Build Coastguard Worker break;
250*61046927SAndroid Build Coastguard Worker
251*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_TLB:
252*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_TLBU:
253*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tlb, n);
254*61046927SAndroid Build Coastguard Worker break;
255*61046927SAndroid Build Coastguard Worker
256*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_SYNC:
257*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_SYNCB:
258*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_SYNCU:
259*61046927SAndroid Build Coastguard Worker /* For CS barrier(): Sync against any other memory
260*61046927SAndroid Build Coastguard Worker * accesses. There doesn't appear to be any need for
261*61046927SAndroid Build Coastguard Worker * barriers to affect ALU operations.
262*61046927SAndroid Build Coastguard Worker */
263*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tmu_write, n);
264*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tmu_read, n);
265*61046927SAndroid Build Coastguard Worker break;
266*61046927SAndroid Build Coastguard Worker
267*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_UNIFA:
268*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_unifa, n);
269*61046927SAndroid Build Coastguard Worker break;
270*61046927SAndroid Build Coastguard Worker
271*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_NOP:
272*61046927SAndroid Build Coastguard Worker break;
273*61046927SAndroid Build Coastguard Worker
274*61046927SAndroid Build Coastguard Worker default:
275*61046927SAndroid Build Coastguard Worker fprintf(stderr, "Unknown waddr %d\n", waddr);
276*61046927SAndroid Build Coastguard Worker abort();
277*61046927SAndroid Build Coastguard Worker }
278*61046927SAndroid Build Coastguard Worker }
279*61046927SAndroid Build Coastguard Worker }
280*61046927SAndroid Build Coastguard Worker
281*61046927SAndroid Build Coastguard Worker /**
282*61046927SAndroid Build Coastguard Worker * Common code for dependencies that need to be tracked both forward and
283*61046927SAndroid Build Coastguard Worker * backward.
284*61046927SAndroid Build Coastguard Worker *
285*61046927SAndroid Build Coastguard Worker * This is for things like "all reads of r4 have to happen between the r4
286*61046927SAndroid Build Coastguard Worker * writes that surround them".
287*61046927SAndroid Build Coastguard Worker */
288*61046927SAndroid Build Coastguard Worker static void
calculate_deps(struct schedule_state * state,struct schedule_node * n)289*61046927SAndroid Build Coastguard Worker calculate_deps(struct schedule_state *state, struct schedule_node *n)
290*61046927SAndroid Build Coastguard Worker {
291*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo = state->devinfo;
292*61046927SAndroid Build Coastguard Worker struct qinst *qinst = n->inst;
293*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr *inst = &qinst->qpu;
294*61046927SAndroid Build Coastguard Worker /* If the input and output segments are shared, then all VPM reads to
295*61046927SAndroid Build Coastguard Worker * a location need to happen before all writes. We handle this by
296*61046927SAndroid Build Coastguard Worker * serializing all VPM operations for now.
297*61046927SAndroid Build Coastguard Worker *
298*61046927SAndroid Build Coastguard Worker * FIXME: we are assuming that the segments are shared. That is
299*61046927SAndroid Build Coastguard Worker * correct right now as we are only using shared, but technically you
300*61046927SAndroid Build Coastguard Worker * can choose.
301*61046927SAndroid Build Coastguard Worker */
302*61046927SAndroid Build Coastguard Worker bool separate_vpm_segment = false;
303*61046927SAndroid Build Coastguard Worker
304*61046927SAndroid Build Coastguard Worker if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
305*61046927SAndroid Build Coastguard Worker if (inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS)
306*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_sf, n);
307*61046927SAndroid Build Coastguard Worker
308*61046927SAndroid Build Coastguard Worker /* XXX: BDI */
309*61046927SAndroid Build Coastguard Worker /* XXX: BDU */
310*61046927SAndroid Build Coastguard Worker /* XXX: ub */
311*61046927SAndroid Build Coastguard Worker /* XXX: raddr_a */
312*61046927SAndroid Build Coastguard Worker
313*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_unif, n);
314*61046927SAndroid Build Coastguard Worker return;
315*61046927SAndroid Build Coastguard Worker }
316*61046927SAndroid Build Coastguard Worker
317*61046927SAndroid Build Coastguard Worker assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
318*61046927SAndroid Build Coastguard Worker
319*61046927SAndroid Build Coastguard Worker /* XXX: LOAD_IMM */
320*61046927SAndroid Build Coastguard Worker
321*61046927SAndroid Build Coastguard Worker if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) {
322*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
323*61046927SAndroid Build Coastguard Worker process_mux_deps(state, n, inst->alu.add.a.mux);
324*61046927SAndroid Build Coastguard Worker } else {
325*61046927SAndroid Build Coastguard Worker process_raddr_deps(state, n, inst->alu.add.a.raddr,
326*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_a);
327*61046927SAndroid Build Coastguard Worker }
328*61046927SAndroid Build Coastguard Worker }
329*61046927SAndroid Build Coastguard Worker if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) {
330*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
331*61046927SAndroid Build Coastguard Worker process_mux_deps(state, n, inst->alu.add.b.mux);
332*61046927SAndroid Build Coastguard Worker } else {
333*61046927SAndroid Build Coastguard Worker process_raddr_deps(state, n, inst->alu.add.b.raddr,
334*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_b);
335*61046927SAndroid Build Coastguard Worker }
336*61046927SAndroid Build Coastguard Worker }
337*61046927SAndroid Build Coastguard Worker
338*61046927SAndroid Build Coastguard Worker if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) {
339*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
340*61046927SAndroid Build Coastguard Worker process_mux_deps(state, n, inst->alu.mul.a.mux);
341*61046927SAndroid Build Coastguard Worker } else {
342*61046927SAndroid Build Coastguard Worker process_raddr_deps(state, n, inst->alu.mul.a.raddr,
343*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_c);
344*61046927SAndroid Build Coastguard Worker }
345*61046927SAndroid Build Coastguard Worker }
346*61046927SAndroid Build Coastguard Worker if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) {
347*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
348*61046927SAndroid Build Coastguard Worker process_mux_deps(state, n, inst->alu.mul.b.mux);
349*61046927SAndroid Build Coastguard Worker } else {
350*61046927SAndroid Build Coastguard Worker process_raddr_deps(state, n, inst->alu.mul.b.raddr,
351*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_d);
352*61046927SAndroid Build Coastguard Worker }
353*61046927SAndroid Build Coastguard Worker }
354*61046927SAndroid Build Coastguard Worker
355*61046927SAndroid Build Coastguard Worker switch (inst->alu.add.op) {
356*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_VPMSETUP:
357*61046927SAndroid Build Coastguard Worker /* Could distinguish read/write by unpacking the uniform. */
358*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_vpm, n);
359*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_vpm_read, n);
360*61046927SAndroid Build Coastguard Worker break;
361*61046927SAndroid Build Coastguard Worker
362*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_STVPMV:
363*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_STVPMD:
364*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_STVPMP:
365*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_vpm, n);
366*61046927SAndroid Build Coastguard Worker break;
367*61046927SAndroid Build Coastguard Worker
368*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_LDVPMV_IN:
369*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_LDVPMD_IN:
370*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_LDVPMG_IN:
371*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_LDVPMP:
372*61046927SAndroid Build Coastguard Worker if (!separate_vpm_segment)
373*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_vpm, n);
374*61046927SAndroid Build Coastguard Worker break;
375*61046927SAndroid Build Coastguard Worker
376*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_VPMWT:
377*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_vpm, n);
378*61046927SAndroid Build Coastguard Worker break;
379*61046927SAndroid Build Coastguard Worker
380*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_MSF:
381*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_tlb, n);
382*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_setmsf, n);
383*61046927SAndroid Build Coastguard Worker break;
384*61046927SAndroid Build Coastguard Worker
385*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_SETMSF:
386*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_setmsf, n);
387*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tmu_write, n);
388*61046927SAndroid Build Coastguard Worker FALLTHROUGH;
389*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_SETREVF:
390*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tlb, n);
391*61046927SAndroid Build Coastguard Worker break;
392*61046927SAndroid Build Coastguard Worker
393*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_BALLOT:
394*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_BCASTF:
395*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_ALLEQ:
396*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_ALLFEQ:
397*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_setmsf, n);
398*61046927SAndroid Build Coastguard Worker break;
399*61046927SAndroid Build Coastguard Worker
400*61046927SAndroid Build Coastguard Worker default:
401*61046927SAndroid Build Coastguard Worker break;
402*61046927SAndroid Build Coastguard Worker }
403*61046927SAndroid Build Coastguard Worker
404*61046927SAndroid Build Coastguard Worker switch (inst->alu.mul.op) {
405*61046927SAndroid Build Coastguard Worker case V3D_QPU_M_MULTOP:
406*61046927SAndroid Build Coastguard Worker case V3D_QPU_M_UMUL24:
407*61046927SAndroid Build Coastguard Worker /* MULTOP sets rtop, and UMUL24 implicitly reads rtop and
408*61046927SAndroid Build Coastguard Worker * resets it to 0. We could possibly reorder umul24s relative
409*61046927SAndroid Build Coastguard Worker * to each other, but for now just keep all the MUL parts in
410*61046927SAndroid Build Coastguard Worker * order.
411*61046927SAndroid Build Coastguard Worker */
412*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_rtop, n);
413*61046927SAndroid Build Coastguard Worker break;
414*61046927SAndroid Build Coastguard Worker default:
415*61046927SAndroid Build Coastguard Worker break;
416*61046927SAndroid Build Coastguard Worker }
417*61046927SAndroid Build Coastguard Worker
418*61046927SAndroid Build Coastguard Worker if (inst->alu.add.op != V3D_QPU_A_NOP) {
419*61046927SAndroid Build Coastguard Worker process_waddr_deps(state, n, inst->alu.add.waddr,
420*61046927SAndroid Build Coastguard Worker inst->alu.add.magic_write);
421*61046927SAndroid Build Coastguard Worker }
422*61046927SAndroid Build Coastguard Worker if (inst->alu.mul.op != V3D_QPU_M_NOP) {
423*61046927SAndroid Build Coastguard Worker process_waddr_deps(state, n, inst->alu.mul.waddr,
424*61046927SAndroid Build Coastguard Worker inst->alu.mul.magic_write);
425*61046927SAndroid Build Coastguard Worker }
426*61046927SAndroid Build Coastguard Worker if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
427*61046927SAndroid Build Coastguard Worker process_waddr_deps(state, n, inst->sig_addr,
428*61046927SAndroid Build Coastguard Worker inst->sig_magic);
429*61046927SAndroid Build Coastguard Worker }
430*61046927SAndroid Build Coastguard Worker
431*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_r3(devinfo, inst))
432*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_r[3], n);
433*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_r4(devinfo, inst))
434*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_r[4], n);
435*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_r5(devinfo, inst))
436*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_r[5], n);
437*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_rf0_implicitly(devinfo, inst))
438*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_rf[0], n);
439*61046927SAndroid Build Coastguard Worker
440*61046927SAndroid Build Coastguard Worker /* If we add any more dependencies here we should consider whether we
441*61046927SAndroid Build Coastguard Worker * also need to update qpu_inst_after_thrsw_valid_in_delay_slot.
442*61046927SAndroid Build Coastguard Worker */
443*61046927SAndroid Build Coastguard Worker if (inst->sig.thrsw) {
444*61046927SAndroid Build Coastguard Worker /* All accumulator contents and flags are undefined after the
445*61046927SAndroid Build Coastguard Worker * switch.
446*61046927SAndroid Build Coastguard Worker */
447*61046927SAndroid Build Coastguard Worker for (int i = 0; i < ARRAY_SIZE(state->last_r); i++)
448*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_r[i], n);
449*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_sf, n);
450*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_rtop, n);
451*61046927SAndroid Build Coastguard Worker
452*61046927SAndroid Build Coastguard Worker /* Scoreboard-locking operations have to stay after the last
453*61046927SAndroid Build Coastguard Worker * thread switch.
454*61046927SAndroid Build Coastguard Worker */
455*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tlb, n);
456*61046927SAndroid Build Coastguard Worker
457*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tmu_write, n);
458*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tmu_config, n);
459*61046927SAndroid Build Coastguard Worker }
460*61046927SAndroid Build Coastguard Worker
461*61046927SAndroid Build Coastguard Worker if (v3d_qpu_waits_on_tmu(inst)) {
462*61046927SAndroid Build Coastguard Worker /* TMU loads are coming from a FIFO, so ordering is important.
463*61046927SAndroid Build Coastguard Worker */
464*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tmu_read, n);
465*61046927SAndroid Build Coastguard Worker /* Keep TMU loads after their TMU lookup terminator */
466*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_tmu_config, n);
467*61046927SAndroid Build Coastguard Worker }
468*61046927SAndroid Build Coastguard Worker
469*61046927SAndroid Build Coastguard Worker /* Allow wrtmuc to be reordered with other instructions in the
470*61046927SAndroid Build Coastguard Worker * same TMU sequence by using a read dependency on the last TMU
471*61046927SAndroid Build Coastguard Worker * sequence terminator.
472*61046927SAndroid Build Coastguard Worker */
473*61046927SAndroid Build Coastguard Worker if (inst->sig.wrtmuc)
474*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_tmu_config, n);
475*61046927SAndroid Build Coastguard Worker
476*61046927SAndroid Build Coastguard Worker if (inst->sig.ldtlb | inst->sig.ldtlbu)
477*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_tlb, n);
478*61046927SAndroid Build Coastguard Worker
479*61046927SAndroid Build Coastguard Worker if (inst->sig.ldvpm) {
480*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_vpm_read, n);
481*61046927SAndroid Build Coastguard Worker
482*61046927SAndroid Build Coastguard Worker /* At least for now, we're doing shared I/O segments, so queue
483*61046927SAndroid Build Coastguard Worker * all writes after all reads.
484*61046927SAndroid Build Coastguard Worker */
485*61046927SAndroid Build Coastguard Worker if (!separate_vpm_segment)
486*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_vpm, n);
487*61046927SAndroid Build Coastguard Worker }
488*61046927SAndroid Build Coastguard Worker
489*61046927SAndroid Build Coastguard Worker /* inst->sig.ldunif or sideband uniform read */
490*61046927SAndroid Build Coastguard Worker if (vir_has_uniform(qinst))
491*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_unif, n);
492*61046927SAndroid Build Coastguard Worker
493*61046927SAndroid Build Coastguard Worker /* Both unifa and ldunifa must preserve ordering */
494*61046927SAndroid Build Coastguard Worker if (inst->sig.ldunifa || inst->sig.ldunifarf)
495*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_unifa, n);
496*61046927SAndroid Build Coastguard Worker
497*61046927SAndroid Build Coastguard Worker if (v3d_qpu_reads_flags(inst))
498*61046927SAndroid Build Coastguard Worker add_read_dep(state, state->last_sf, n);
499*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_flags(inst))
500*61046927SAndroid Build Coastguard Worker add_write_dep(state, &state->last_sf, n);
501*61046927SAndroid Build Coastguard Worker }
502*61046927SAndroid Build Coastguard Worker
503*61046927SAndroid Build Coastguard Worker static void
calculate_forward_deps(struct v3d_compile * c,struct dag * dag,struct list_head * schedule_list)504*61046927SAndroid Build Coastguard Worker calculate_forward_deps(struct v3d_compile *c, struct dag *dag,
505*61046927SAndroid Build Coastguard Worker struct list_head *schedule_list)
506*61046927SAndroid Build Coastguard Worker {
507*61046927SAndroid Build Coastguard Worker struct schedule_state state;
508*61046927SAndroid Build Coastguard Worker
509*61046927SAndroid Build Coastguard Worker memset(&state, 0, sizeof(state));
510*61046927SAndroid Build Coastguard Worker state.dag = dag;
511*61046927SAndroid Build Coastguard Worker state.devinfo = c->devinfo;
512*61046927SAndroid Build Coastguard Worker state.dir = F;
513*61046927SAndroid Build Coastguard Worker
514*61046927SAndroid Build Coastguard Worker list_for_each_entry(struct schedule_node, node, schedule_list, link)
515*61046927SAndroid Build Coastguard Worker calculate_deps(&state, node);
516*61046927SAndroid Build Coastguard Worker }
517*61046927SAndroid Build Coastguard Worker
518*61046927SAndroid Build Coastguard Worker static void
calculate_reverse_deps(struct v3d_compile * c,struct dag * dag,struct list_head * schedule_list)519*61046927SAndroid Build Coastguard Worker calculate_reverse_deps(struct v3d_compile *c, struct dag *dag,
520*61046927SAndroid Build Coastguard Worker struct list_head *schedule_list)
521*61046927SAndroid Build Coastguard Worker {
522*61046927SAndroid Build Coastguard Worker struct schedule_state state;
523*61046927SAndroid Build Coastguard Worker
524*61046927SAndroid Build Coastguard Worker memset(&state, 0, sizeof(state));
525*61046927SAndroid Build Coastguard Worker state.dag = dag;
526*61046927SAndroid Build Coastguard Worker state.devinfo = c->devinfo;
527*61046927SAndroid Build Coastguard Worker state.dir = R;
528*61046927SAndroid Build Coastguard Worker
529*61046927SAndroid Build Coastguard Worker list_for_each_entry_rev(struct schedule_node, node, schedule_list,
530*61046927SAndroid Build Coastguard Worker link) {
531*61046927SAndroid Build Coastguard Worker calculate_deps(&state, (struct schedule_node *)node);
532*61046927SAndroid Build Coastguard Worker }
533*61046927SAndroid Build Coastguard Worker }
534*61046927SAndroid Build Coastguard Worker
535*61046927SAndroid Build Coastguard Worker struct choose_scoreboard {
536*61046927SAndroid Build Coastguard Worker struct dag *dag;
537*61046927SAndroid Build Coastguard Worker int tick;
538*61046927SAndroid Build Coastguard Worker int last_magic_sfu_write_tick;
539*61046927SAndroid Build Coastguard Worker int last_stallable_sfu_reg;
540*61046927SAndroid Build Coastguard Worker int last_stallable_sfu_tick;
541*61046927SAndroid Build Coastguard Worker int last_ldvary_tick;
542*61046927SAndroid Build Coastguard Worker int last_unifa_write_tick;
543*61046927SAndroid Build Coastguard Worker int last_uniforms_reset_tick;
544*61046927SAndroid Build Coastguard Worker int last_thrsw_tick;
545*61046927SAndroid Build Coastguard Worker int last_branch_tick;
546*61046927SAndroid Build Coastguard Worker int last_setmsf_tick;
547*61046927SAndroid Build Coastguard Worker bool first_thrsw_emitted;
548*61046927SAndroid Build Coastguard Worker bool last_thrsw_emitted;
549*61046927SAndroid Build Coastguard Worker bool fixup_ldvary;
550*61046927SAndroid Build Coastguard Worker int ldvary_count;
551*61046927SAndroid Build Coastguard Worker int pending_ldtmu_count;
552*61046927SAndroid Build Coastguard Worker bool first_ldtmu_after_thrsw;
553*61046927SAndroid Build Coastguard Worker
554*61046927SAndroid Build Coastguard Worker /* V3D 7.x */
555*61046927SAndroid Build Coastguard Worker int last_implicit_rf0_write_tick;
556*61046927SAndroid Build Coastguard Worker bool has_rf0_flops_conflict;
557*61046927SAndroid Build Coastguard Worker };
558*61046927SAndroid Build Coastguard Worker
559*61046927SAndroid Build Coastguard Worker static bool
mux_reads_too_soon(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)560*61046927SAndroid Build Coastguard Worker mux_reads_too_soon(struct choose_scoreboard *scoreboard,
561*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
562*61046927SAndroid Build Coastguard Worker {
563*61046927SAndroid Build Coastguard Worker switch (mux) {
564*61046927SAndroid Build Coastguard Worker case V3D_QPU_MUX_R4:
565*61046927SAndroid Build Coastguard Worker if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick <= 2)
566*61046927SAndroid Build Coastguard Worker return true;
567*61046927SAndroid Build Coastguard Worker break;
568*61046927SAndroid Build Coastguard Worker
569*61046927SAndroid Build Coastguard Worker case V3D_QPU_MUX_R5:
570*61046927SAndroid Build Coastguard Worker if (scoreboard->tick - scoreboard->last_ldvary_tick <= 1)
571*61046927SAndroid Build Coastguard Worker return true;
572*61046927SAndroid Build Coastguard Worker break;
573*61046927SAndroid Build Coastguard Worker default:
574*61046927SAndroid Build Coastguard Worker break;
575*61046927SAndroid Build Coastguard Worker }
576*61046927SAndroid Build Coastguard Worker
577*61046927SAndroid Build Coastguard Worker return false;
578*61046927SAndroid Build Coastguard Worker }
579*61046927SAndroid Build Coastguard Worker
580*61046927SAndroid Build Coastguard Worker static bool
reads_too_soon(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst,uint8_t raddr)581*61046927SAndroid Build Coastguard Worker reads_too_soon(struct choose_scoreboard *scoreboard,
582*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst, uint8_t raddr)
583*61046927SAndroid Build Coastguard Worker {
584*61046927SAndroid Build Coastguard Worker switch (raddr) {
585*61046927SAndroid Build Coastguard Worker case 0: /* ldvary delayed write of C coefficient to rf0 */
586*61046927SAndroid Build Coastguard Worker if (scoreboard->tick - scoreboard->last_ldvary_tick <= 1)
587*61046927SAndroid Build Coastguard Worker return true;
588*61046927SAndroid Build Coastguard Worker break;
589*61046927SAndroid Build Coastguard Worker default:
590*61046927SAndroid Build Coastguard Worker break;
591*61046927SAndroid Build Coastguard Worker }
592*61046927SAndroid Build Coastguard Worker
593*61046927SAndroid Build Coastguard Worker return false;
594*61046927SAndroid Build Coastguard Worker }
595*61046927SAndroid Build Coastguard Worker
596*61046927SAndroid Build Coastguard Worker static bool
reads_too_soon_after_write(const struct v3d_device_info * devinfo,struct choose_scoreboard * scoreboard,struct qinst * qinst)597*61046927SAndroid Build Coastguard Worker reads_too_soon_after_write(const struct v3d_device_info *devinfo,
598*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
599*61046927SAndroid Build Coastguard Worker struct qinst *qinst)
600*61046927SAndroid Build Coastguard Worker {
601*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst = &qinst->qpu;
602*61046927SAndroid Build Coastguard Worker
603*61046927SAndroid Build Coastguard Worker /* XXX: Branching off of raddr. */
604*61046927SAndroid Build Coastguard Worker if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
605*61046927SAndroid Build Coastguard Worker return false;
606*61046927SAndroid Build Coastguard Worker
607*61046927SAndroid Build Coastguard Worker assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
608*61046927SAndroid Build Coastguard Worker
609*61046927SAndroid Build Coastguard Worker if (inst->alu.add.op != V3D_QPU_A_NOP) {
610*61046927SAndroid Build Coastguard Worker if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) {
611*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
612*61046927SAndroid Build Coastguard Worker if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux))
613*61046927SAndroid Build Coastguard Worker return true;
614*61046927SAndroid Build Coastguard Worker } else {
615*61046927SAndroid Build Coastguard Worker if (reads_too_soon(scoreboard, inst, inst->alu.add.a.raddr))
616*61046927SAndroid Build Coastguard Worker return true;
617*61046927SAndroid Build Coastguard Worker }
618*61046927SAndroid Build Coastguard Worker }
619*61046927SAndroid Build Coastguard Worker if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) {
620*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
621*61046927SAndroid Build Coastguard Worker if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux))
622*61046927SAndroid Build Coastguard Worker return true;
623*61046927SAndroid Build Coastguard Worker } else {
624*61046927SAndroid Build Coastguard Worker if (reads_too_soon(scoreboard, inst, inst->alu.add.b.raddr))
625*61046927SAndroid Build Coastguard Worker return true;
626*61046927SAndroid Build Coastguard Worker }
627*61046927SAndroid Build Coastguard Worker }
628*61046927SAndroid Build Coastguard Worker }
629*61046927SAndroid Build Coastguard Worker
630*61046927SAndroid Build Coastguard Worker if (inst->alu.mul.op != V3D_QPU_M_NOP) {
631*61046927SAndroid Build Coastguard Worker if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) {
632*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
633*61046927SAndroid Build Coastguard Worker if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux))
634*61046927SAndroid Build Coastguard Worker return true;
635*61046927SAndroid Build Coastguard Worker } else {
636*61046927SAndroid Build Coastguard Worker if (reads_too_soon(scoreboard, inst, inst->alu.mul.a.raddr))
637*61046927SAndroid Build Coastguard Worker return true;
638*61046927SAndroid Build Coastguard Worker }
639*61046927SAndroid Build Coastguard Worker }
640*61046927SAndroid Build Coastguard Worker if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) {
641*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
642*61046927SAndroid Build Coastguard Worker if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux))
643*61046927SAndroid Build Coastguard Worker return true;
644*61046927SAndroid Build Coastguard Worker } else {
645*61046927SAndroid Build Coastguard Worker if (reads_too_soon(scoreboard, inst, inst->alu.mul.b.raddr))
646*61046927SAndroid Build Coastguard Worker return true;
647*61046927SAndroid Build Coastguard Worker }
648*61046927SAndroid Build Coastguard Worker }
649*61046927SAndroid Build Coastguard Worker }
650*61046927SAndroid Build Coastguard Worker
651*61046927SAndroid Build Coastguard Worker /* XXX: imm */
652*61046927SAndroid Build Coastguard Worker
653*61046927SAndroid Build Coastguard Worker return false;
654*61046927SAndroid Build Coastguard Worker }
655*61046927SAndroid Build Coastguard Worker
656*61046927SAndroid Build Coastguard Worker static bool
writes_too_soon_after_write(const struct v3d_device_info * devinfo,struct choose_scoreboard * scoreboard,struct qinst * qinst)657*61046927SAndroid Build Coastguard Worker writes_too_soon_after_write(const struct v3d_device_info *devinfo,
658*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
659*61046927SAndroid Build Coastguard Worker struct qinst *qinst)
660*61046927SAndroid Build Coastguard Worker {
661*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst = &qinst->qpu;
662*61046927SAndroid Build Coastguard Worker
663*61046927SAndroid Build Coastguard Worker /* Don't schedule any other r4 write too soon after an SFU write.
664*61046927SAndroid Build Coastguard Worker * This would normally be prevented by dependency tracking, but might
665*61046927SAndroid Build Coastguard Worker * occur if a dead SFU computation makes it to scheduling.
666*61046927SAndroid Build Coastguard Worker */
667*61046927SAndroid Build Coastguard Worker if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick < 2 &&
668*61046927SAndroid Build Coastguard Worker v3d_qpu_writes_r4(devinfo, inst))
669*61046927SAndroid Build Coastguard Worker return true;
670*61046927SAndroid Build Coastguard Worker
671*61046927SAndroid Build Coastguard Worker if (devinfo->ver == 42)
672*61046927SAndroid Build Coastguard Worker return false;
673*61046927SAndroid Build Coastguard Worker
674*61046927SAndroid Build Coastguard Worker /* Don't schedule anything that writes rf0 right after ldvary, since
675*61046927SAndroid Build Coastguard Worker * that would clash with the ldvary's delayed rf0 write (the exception
676*61046927SAndroid Build Coastguard Worker * is another ldvary, since its implicit rf0 write would also have
677*61046927SAndroid Build Coastguard Worker * one cycle of delay and would not clash).
678*61046927SAndroid Build Coastguard Worker */
679*61046927SAndroid Build Coastguard Worker if (scoreboard->last_ldvary_tick + 1 == scoreboard->tick &&
680*61046927SAndroid Build Coastguard Worker (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) ||
681*61046927SAndroid Build Coastguard Worker (v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
682*61046927SAndroid Build Coastguard Worker !inst->sig.ldvary))) {
683*61046927SAndroid Build Coastguard Worker return true;
684*61046927SAndroid Build Coastguard Worker }
685*61046927SAndroid Build Coastguard Worker
686*61046927SAndroid Build Coastguard Worker return false;
687*61046927SAndroid Build Coastguard Worker }
688*61046927SAndroid Build Coastguard Worker
689*61046927SAndroid Build Coastguard Worker static bool
scoreboard_is_locked(struct choose_scoreboard * scoreboard,bool lock_scoreboard_on_first_thrsw)690*61046927SAndroid Build Coastguard Worker scoreboard_is_locked(struct choose_scoreboard *scoreboard,
691*61046927SAndroid Build Coastguard Worker bool lock_scoreboard_on_first_thrsw)
692*61046927SAndroid Build Coastguard Worker {
693*61046927SAndroid Build Coastguard Worker if (lock_scoreboard_on_first_thrsw) {
694*61046927SAndroid Build Coastguard Worker return scoreboard->first_thrsw_emitted &&
695*61046927SAndroid Build Coastguard Worker scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
696*61046927SAndroid Build Coastguard Worker }
697*61046927SAndroid Build Coastguard Worker
698*61046927SAndroid Build Coastguard Worker return scoreboard->last_thrsw_emitted &&
699*61046927SAndroid Build Coastguard Worker scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
700*61046927SAndroid Build Coastguard Worker }
701*61046927SAndroid Build Coastguard Worker
702*61046927SAndroid Build Coastguard Worker static bool
pixel_scoreboard_too_soon(struct v3d_compile * c,struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst)703*61046927SAndroid Build Coastguard Worker pixel_scoreboard_too_soon(struct v3d_compile *c,
704*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
705*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst)
706*61046927SAndroid Build Coastguard Worker {
707*61046927SAndroid Build Coastguard Worker return qpu_inst_is_tlb(inst) &&
708*61046927SAndroid Build Coastguard Worker !scoreboard_is_locked(scoreboard,
709*61046927SAndroid Build Coastguard Worker c->lock_scoreboard_on_first_thrsw);
710*61046927SAndroid Build Coastguard Worker }
711*61046927SAndroid Build Coastguard Worker
712*61046927SAndroid Build Coastguard Worker static bool
qpu_instruction_uses_rf(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint32_t waddr)713*61046927SAndroid Build Coastguard Worker qpu_instruction_uses_rf(const struct v3d_device_info *devinfo,
714*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst,
715*61046927SAndroid Build Coastguard Worker uint32_t waddr) {
716*61046927SAndroid Build Coastguard Worker
717*61046927SAndroid Build Coastguard Worker if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
718*61046927SAndroid Build Coastguard Worker return false;
719*61046927SAndroid Build Coastguard Worker
720*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71) {
721*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
722*61046927SAndroid Build Coastguard Worker inst->raddr_a == waddr)
723*61046927SAndroid Build Coastguard Worker return true;
724*61046927SAndroid Build Coastguard Worker
725*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
726*61046927SAndroid Build Coastguard Worker !inst->sig.small_imm_b && (inst->raddr_b == waddr))
727*61046927SAndroid Build Coastguard Worker return true;
728*61046927SAndroid Build Coastguard Worker } else {
729*61046927SAndroid Build Coastguard Worker if (v3d71_qpu_reads_raddr(inst, waddr))
730*61046927SAndroid Build Coastguard Worker return true;
731*61046927SAndroid Build Coastguard Worker }
732*61046927SAndroid Build Coastguard Worker
733*61046927SAndroid Build Coastguard Worker return false;
734*61046927SAndroid Build Coastguard Worker }
735*61046927SAndroid Build Coastguard Worker
736*61046927SAndroid Build Coastguard Worker static bool
read_stalls(const struct v3d_device_info * devinfo,struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst)737*61046927SAndroid Build Coastguard Worker read_stalls(const struct v3d_device_info *devinfo,
738*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
739*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst)
740*61046927SAndroid Build Coastguard Worker {
741*61046927SAndroid Build Coastguard Worker return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 &&
742*61046927SAndroid Build Coastguard Worker qpu_instruction_uses_rf(devinfo, inst,
743*61046927SAndroid Build Coastguard Worker scoreboard->last_stallable_sfu_reg);
744*61046927SAndroid Build Coastguard Worker }
745*61046927SAndroid Build Coastguard Worker
746*61046927SAndroid Build Coastguard Worker /* We define a max schedule priority to allow negative priorities as result of
747*61046927SAndroid Build Coastguard Worker * subtracting this max when an instruction stalls. So instructions that
748*61046927SAndroid Build Coastguard Worker * stall have lower priority than regular instructions. */
749*61046927SAndroid Build Coastguard Worker #define MAX_SCHEDULE_PRIORITY 16
750*61046927SAndroid Build Coastguard Worker
751*61046927SAndroid Build Coastguard Worker static int
get_instruction_priority(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)752*61046927SAndroid Build Coastguard Worker get_instruction_priority(const struct v3d_device_info *devinfo,
753*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst)
754*61046927SAndroid Build Coastguard Worker {
755*61046927SAndroid Build Coastguard Worker uint32_t baseline_score;
756*61046927SAndroid Build Coastguard Worker uint32_t next_score = 0;
757*61046927SAndroid Build Coastguard Worker
758*61046927SAndroid Build Coastguard Worker /* Schedule TLB operations as late as possible, to get more
759*61046927SAndroid Build Coastguard Worker * parallelism between shaders.
760*61046927SAndroid Build Coastguard Worker */
761*61046927SAndroid Build Coastguard Worker if (qpu_inst_is_tlb(inst))
762*61046927SAndroid Build Coastguard Worker return next_score;
763*61046927SAndroid Build Coastguard Worker next_score++;
764*61046927SAndroid Build Coastguard Worker
765*61046927SAndroid Build Coastguard Worker /* Empirical testing shows that using priorities to hide latency of
766*61046927SAndroid Build Coastguard Worker * TMU operations when scheduling QPU leads to slightly worse
767*61046927SAndroid Build Coastguard Worker * performance, even at 2 threads. We think this is because the thread
768*61046927SAndroid Build Coastguard Worker * switching is already quite effective at hiding latency and NIR
769*61046927SAndroid Build Coastguard Worker * scheduling (and possibly TMU pipelining too) are sufficient to hide
770*61046927SAndroid Build Coastguard Worker * TMU latency, so piling up on that here doesn't provide any benefits
771*61046927SAndroid Build Coastguard Worker * and instead may cause us to postpone critical paths that depend on
772*61046927SAndroid Build Coastguard Worker * the TMU results.
773*61046927SAndroid Build Coastguard Worker */
774*61046927SAndroid Build Coastguard Worker #if 0
775*61046927SAndroid Build Coastguard Worker /* Schedule texture read results collection late to hide latency. */
776*61046927SAndroid Build Coastguard Worker if (v3d_qpu_waits_on_tmu(inst))
777*61046927SAndroid Build Coastguard Worker return next_score;
778*61046927SAndroid Build Coastguard Worker next_score++;
779*61046927SAndroid Build Coastguard Worker #endif
780*61046927SAndroid Build Coastguard Worker
781*61046927SAndroid Build Coastguard Worker /* Default score for things that aren't otherwise special. */
782*61046927SAndroid Build Coastguard Worker baseline_score = next_score;
783*61046927SAndroid Build Coastguard Worker next_score++;
784*61046927SAndroid Build Coastguard Worker
785*61046927SAndroid Build Coastguard Worker #if 0
786*61046927SAndroid Build Coastguard Worker /* Schedule texture read setup early to hide their latency better. */
787*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_tmu(devinfo, inst))
788*61046927SAndroid Build Coastguard Worker return next_score;
789*61046927SAndroid Build Coastguard Worker next_score++;
790*61046927SAndroid Build Coastguard Worker #endif
791*61046927SAndroid Build Coastguard Worker
792*61046927SAndroid Build Coastguard Worker /* We should increase the maximum if we assert here */
793*61046927SAndroid Build Coastguard Worker assert(next_score < MAX_SCHEDULE_PRIORITY);
794*61046927SAndroid Build Coastguard Worker
795*61046927SAndroid Build Coastguard Worker return baseline_score;
796*61046927SAndroid Build Coastguard Worker }
797*61046927SAndroid Build Coastguard Worker
798*61046927SAndroid Build Coastguard Worker enum {
799*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_VPM_READ = (1 << 0),
800*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_VPM_WRITE = (1 << 1),
801*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_VPM_WAIT = (1 << 2),
802*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_SFU = (1 << 3),
803*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TMU_WRITE = (1 << 4),
804*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TMU_READ = (1 << 5),
805*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TMU_WAIT = (1 << 6),
806*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TMU_WRTMUC_SIG = (1 << 7),
807*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TSY = (1 << 8),
808*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TLB_READ = (1 << 9),
809*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TLB_WRITE = (1 << 10),
810*61046927SAndroid Build Coastguard Worker };
811*61046927SAndroid Build Coastguard Worker
812*61046927SAndroid Build Coastguard Worker static uint32_t
qpu_peripherals(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)813*61046927SAndroid Build Coastguard Worker qpu_peripherals(const struct v3d_device_info *devinfo,
814*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst)
815*61046927SAndroid Build Coastguard Worker {
816*61046927SAndroid Build Coastguard Worker uint32_t result = 0;
817*61046927SAndroid Build Coastguard Worker if (v3d_qpu_reads_vpm(inst))
818*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_VPM_READ;
819*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_vpm(inst))
820*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_VPM_WRITE;
821*61046927SAndroid Build Coastguard Worker if (v3d_qpu_waits_vpm(inst))
822*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_VPM_WAIT;
823*61046927SAndroid Build Coastguard Worker
824*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_tmu(devinfo, inst))
825*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_TMU_WRITE;
826*61046927SAndroid Build Coastguard Worker if (inst->sig.ldtmu)
827*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_TMU_READ;
828*61046927SAndroid Build Coastguard Worker if (inst->sig.wrtmuc)
829*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_TMU_WRTMUC_SIG;
830*61046927SAndroid Build Coastguard Worker
831*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_sfu(inst))
832*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_SFU;
833*61046927SAndroid Build Coastguard Worker
834*61046927SAndroid Build Coastguard Worker if (v3d_qpu_reads_tlb(inst))
835*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_TLB_READ;
836*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_tlb(inst))
837*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_TLB_WRITE;
838*61046927SAndroid Build Coastguard Worker
839*61046927SAndroid Build Coastguard Worker if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
840*61046927SAndroid Build Coastguard Worker if (inst->alu.add.op != V3D_QPU_A_NOP &&
841*61046927SAndroid Build Coastguard Worker inst->alu.add.magic_write &&
842*61046927SAndroid Build Coastguard Worker v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) {
843*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_TSY;
844*61046927SAndroid Build Coastguard Worker }
845*61046927SAndroid Build Coastguard Worker
846*61046927SAndroid Build Coastguard Worker if (inst->alu.add.op == V3D_QPU_A_TMUWT)
847*61046927SAndroid Build Coastguard Worker result |= V3D_PERIPHERAL_TMU_WAIT;
848*61046927SAndroid Build Coastguard Worker }
849*61046927SAndroid Build Coastguard Worker
850*61046927SAndroid Build Coastguard Worker return result;
851*61046927SAndroid Build Coastguard Worker }
852*61046927SAndroid Build Coastguard Worker
853*61046927SAndroid Build Coastguard Worker static bool
qpu_compatible_peripheral_access(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * a,const struct v3d_qpu_instr * b)854*61046927SAndroid Build Coastguard Worker qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
855*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *a,
856*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *b)
857*61046927SAndroid Build Coastguard Worker {
858*61046927SAndroid Build Coastguard Worker const uint32_t a_peripherals = qpu_peripherals(devinfo, a);
859*61046927SAndroid Build Coastguard Worker const uint32_t b_peripherals = qpu_peripherals(devinfo, b);
860*61046927SAndroid Build Coastguard Worker
861*61046927SAndroid Build Coastguard Worker /* We can always do one peripheral access per instruction. */
862*61046927SAndroid Build Coastguard Worker if (util_bitcount(a_peripherals) + util_bitcount(b_peripherals) <= 1)
863*61046927SAndroid Build Coastguard Worker return true;
864*61046927SAndroid Build Coastguard Worker
865*61046927SAndroid Build Coastguard Worker /* V3D 4.x can't do more than one peripheral access except in a
866*61046927SAndroid Build Coastguard Worker * few cases:
867*61046927SAndroid Build Coastguard Worker */
868*61046927SAndroid Build Coastguard Worker if (devinfo->ver == 42) {
869*61046927SAndroid Build Coastguard Worker /* WRTMUC signal with TMU register write (other than tmuc). */
870*61046927SAndroid Build Coastguard Worker if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
871*61046927SAndroid Build Coastguard Worker b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
872*61046927SAndroid Build Coastguard Worker return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
873*61046927SAndroid Build Coastguard Worker }
874*61046927SAndroid Build Coastguard Worker if (b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
875*61046927SAndroid Build Coastguard Worker a_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
876*61046927SAndroid Build Coastguard Worker return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
877*61046927SAndroid Build Coastguard Worker }
878*61046927SAndroid Build Coastguard Worker
879*61046927SAndroid Build Coastguard Worker /* TMU read with VPM read/write. */
880*61046927SAndroid Build Coastguard Worker if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
881*61046927SAndroid Build Coastguard Worker (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
882*61046927SAndroid Build Coastguard Worker b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
883*61046927SAndroid Build Coastguard Worker return true;
884*61046927SAndroid Build Coastguard Worker }
885*61046927SAndroid Build Coastguard Worker if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
886*61046927SAndroid Build Coastguard Worker (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
887*61046927SAndroid Build Coastguard Worker a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
888*61046927SAndroid Build Coastguard Worker return true;
889*61046927SAndroid Build Coastguard Worker }
890*61046927SAndroid Build Coastguard Worker
891*61046927SAndroid Build Coastguard Worker return false;
892*61046927SAndroid Build Coastguard Worker }
893*61046927SAndroid Build Coastguard Worker
894*61046927SAndroid Build Coastguard Worker /* V3D 7.x can't have more than one of these restricted peripherals */
895*61046927SAndroid Build Coastguard Worker const uint32_t restricted = V3D_PERIPHERAL_TMU_WRITE |
896*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TMU_WRTMUC_SIG |
897*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TSY |
898*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TLB_READ |
899*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_SFU |
900*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_VPM_READ |
901*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_VPM_WRITE;
902*61046927SAndroid Build Coastguard Worker
903*61046927SAndroid Build Coastguard Worker const uint32_t a_restricted = a_peripherals & restricted;
904*61046927SAndroid Build Coastguard Worker const uint32_t b_restricted = b_peripherals & restricted;
905*61046927SAndroid Build Coastguard Worker if (a_restricted && b_restricted) {
906*61046927SAndroid Build Coastguard Worker /* WRTMUC signal with TMU register write (other than tmuc) is
907*61046927SAndroid Build Coastguard Worker * allowed though.
908*61046927SAndroid Build Coastguard Worker */
909*61046927SAndroid Build Coastguard Worker if (!((a_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
910*61046927SAndroid Build Coastguard Worker b_restricted == V3D_PERIPHERAL_TMU_WRITE &&
911*61046927SAndroid Build Coastguard Worker v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) ||
912*61046927SAndroid Build Coastguard Worker (b_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
913*61046927SAndroid Build Coastguard Worker a_restricted == V3D_PERIPHERAL_TMU_WRITE &&
914*61046927SAndroid Build Coastguard Worker v3d_qpu_writes_tmu_not_tmuc(devinfo, a)))) {
915*61046927SAndroid Build Coastguard Worker return false;
916*61046927SAndroid Build Coastguard Worker }
917*61046927SAndroid Build Coastguard Worker }
918*61046927SAndroid Build Coastguard Worker
919*61046927SAndroid Build Coastguard Worker /* Only one TMU read per instruction */
920*61046927SAndroid Build Coastguard Worker if ((a_peripherals & V3D_PERIPHERAL_TMU_READ) &&
921*61046927SAndroid Build Coastguard Worker (b_peripherals & V3D_PERIPHERAL_TMU_READ)) {
922*61046927SAndroid Build Coastguard Worker return false;
923*61046927SAndroid Build Coastguard Worker }
924*61046927SAndroid Build Coastguard Worker
925*61046927SAndroid Build Coastguard Worker /* Only one TLB access per instruction */
926*61046927SAndroid Build Coastguard Worker if ((a_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
927*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TLB_READ)) &&
928*61046927SAndroid Build Coastguard Worker (b_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
929*61046927SAndroid Build Coastguard Worker V3D_PERIPHERAL_TLB_READ))) {
930*61046927SAndroid Build Coastguard Worker return false;
931*61046927SAndroid Build Coastguard Worker }
932*61046927SAndroid Build Coastguard Worker
933*61046927SAndroid Build Coastguard Worker return true;
934*61046927SAndroid Build Coastguard Worker }
935*61046927SAndroid Build Coastguard Worker
936*61046927SAndroid Build Coastguard Worker /* Compute a bitmask of which rf registers are used between
937*61046927SAndroid Build Coastguard Worker * the two instructions.
938*61046927SAndroid Build Coastguard Worker */
939*61046927SAndroid Build Coastguard Worker static uint64_t
qpu_raddrs_used(const struct v3d_qpu_instr * a,const struct v3d_qpu_instr * b)940*61046927SAndroid Build Coastguard Worker qpu_raddrs_used(const struct v3d_qpu_instr *a,
941*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *b)
942*61046927SAndroid Build Coastguard Worker {
943*61046927SAndroid Build Coastguard Worker assert(a->type == V3D_QPU_INSTR_TYPE_ALU);
944*61046927SAndroid Build Coastguard Worker assert(b->type == V3D_QPU_INSTR_TYPE_ALU);
945*61046927SAndroid Build Coastguard Worker
946*61046927SAndroid Build Coastguard Worker uint64_t raddrs_used = 0;
947*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_A))
948*61046927SAndroid Build Coastguard Worker raddrs_used |= (UINT64_C(1) << a->raddr_a);
949*61046927SAndroid Build Coastguard Worker if (!a->sig.small_imm_b && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
950*61046927SAndroid Build Coastguard Worker raddrs_used |= (UINT64_C(1) << a->raddr_b);
951*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_A))
952*61046927SAndroid Build Coastguard Worker raddrs_used |= (UINT64_C(1) << b->raddr_a);
953*61046927SAndroid Build Coastguard Worker if (!b->sig.small_imm_b && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
954*61046927SAndroid Build Coastguard Worker raddrs_used |= (UINT64_C(1) << b->raddr_b);
955*61046927SAndroid Build Coastguard Worker
956*61046927SAndroid Build Coastguard Worker return raddrs_used;
957*61046927SAndroid Build Coastguard Worker }
958*61046927SAndroid Build Coastguard Worker
959*61046927SAndroid Build Coastguard Worker /* Takes two instructions and attempts to merge their raddr fields (including
960*61046927SAndroid Build Coastguard Worker * small immediates) into one merged instruction. For V3D 4.x, returns false
961*61046927SAndroid Build Coastguard Worker * if the two instructions access more than two different rf registers between
962*61046927SAndroid Build Coastguard Worker * them, or more than one rf register and one small immediate. For 7.x returns
963*61046927SAndroid Build Coastguard Worker * false if both instructions use small immediates.
964*61046927SAndroid Build Coastguard Worker */
965*61046927SAndroid Build Coastguard Worker static bool
qpu_merge_raddrs(struct v3d_qpu_instr * result,const struct v3d_qpu_instr * add_instr,const struct v3d_qpu_instr * mul_instr,const struct v3d_device_info * devinfo)966*61046927SAndroid Build Coastguard Worker qpu_merge_raddrs(struct v3d_qpu_instr *result,
967*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *add_instr,
968*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *mul_instr,
969*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo)
970*61046927SAndroid Build Coastguard Worker {
971*61046927SAndroid Build Coastguard Worker if (devinfo->ver >= 71) {
972*61046927SAndroid Build Coastguard Worker assert(add_instr->sig.small_imm_a +
973*61046927SAndroid Build Coastguard Worker add_instr->sig.small_imm_b <= 1);
974*61046927SAndroid Build Coastguard Worker assert(add_instr->sig.small_imm_c +
975*61046927SAndroid Build Coastguard Worker add_instr->sig.small_imm_d == 0);
976*61046927SAndroid Build Coastguard Worker assert(mul_instr->sig.small_imm_a +
977*61046927SAndroid Build Coastguard Worker mul_instr->sig.small_imm_b == 0);
978*61046927SAndroid Build Coastguard Worker assert(mul_instr->sig.small_imm_c +
979*61046927SAndroid Build Coastguard Worker mul_instr->sig.small_imm_d <= 1);
980*61046927SAndroid Build Coastguard Worker
981*61046927SAndroid Build Coastguard Worker result->sig.small_imm_a = add_instr->sig.small_imm_a;
982*61046927SAndroid Build Coastguard Worker result->sig.small_imm_b = add_instr->sig.small_imm_b;
983*61046927SAndroid Build Coastguard Worker result->sig.small_imm_c = mul_instr->sig.small_imm_c;
984*61046927SAndroid Build Coastguard Worker result->sig.small_imm_d = mul_instr->sig.small_imm_d;
985*61046927SAndroid Build Coastguard Worker
986*61046927SAndroid Build Coastguard Worker return (result->sig.small_imm_a +
987*61046927SAndroid Build Coastguard Worker result->sig.small_imm_b +
988*61046927SAndroid Build Coastguard Worker result->sig.small_imm_c +
989*61046927SAndroid Build Coastguard Worker result->sig.small_imm_d) <= 1;
990*61046927SAndroid Build Coastguard Worker }
991*61046927SAndroid Build Coastguard Worker
992*61046927SAndroid Build Coastguard Worker assert(devinfo->ver == 42);
993*61046927SAndroid Build Coastguard Worker
994*61046927SAndroid Build Coastguard Worker uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
995*61046927SAndroid Build Coastguard Worker int naddrs = util_bitcount64(raddrs_used);
996*61046927SAndroid Build Coastguard Worker
997*61046927SAndroid Build Coastguard Worker if (naddrs > 2)
998*61046927SAndroid Build Coastguard Worker return false;
999*61046927SAndroid Build Coastguard Worker
1000*61046927SAndroid Build Coastguard Worker if ((add_instr->sig.small_imm_b || mul_instr->sig.small_imm_b)) {
1001*61046927SAndroid Build Coastguard Worker if (naddrs > 1)
1002*61046927SAndroid Build Coastguard Worker return false;
1003*61046927SAndroid Build Coastguard Worker
1004*61046927SAndroid Build Coastguard Worker if (add_instr->sig.small_imm_b && mul_instr->sig.small_imm_b)
1005*61046927SAndroid Build Coastguard Worker if (add_instr->raddr_b != mul_instr->raddr_b)
1006*61046927SAndroid Build Coastguard Worker return false;
1007*61046927SAndroid Build Coastguard Worker
1008*61046927SAndroid Build Coastguard Worker result->sig.small_imm_b = true;
1009*61046927SAndroid Build Coastguard Worker result->raddr_b = add_instr->sig.small_imm_b ?
1010*61046927SAndroid Build Coastguard Worker add_instr->raddr_b : mul_instr->raddr_b;
1011*61046927SAndroid Build Coastguard Worker }
1012*61046927SAndroid Build Coastguard Worker
1013*61046927SAndroid Build Coastguard Worker if (naddrs == 0)
1014*61046927SAndroid Build Coastguard Worker return true;
1015*61046927SAndroid Build Coastguard Worker
1016*61046927SAndroid Build Coastguard Worker int raddr_a = ffsll(raddrs_used) - 1;
1017*61046927SAndroid Build Coastguard Worker raddrs_used &= ~(UINT64_C(1) << raddr_a);
1018*61046927SAndroid Build Coastguard Worker result->raddr_a = raddr_a;
1019*61046927SAndroid Build Coastguard Worker
1020*61046927SAndroid Build Coastguard Worker if (!result->sig.small_imm_b) {
1021*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) &&
1022*61046927SAndroid Build Coastguard Worker raddr_a == add_instr->raddr_b) {
1023*61046927SAndroid Build Coastguard Worker if (add_instr->alu.add.a.mux == V3D_QPU_MUX_B)
1024*61046927SAndroid Build Coastguard Worker result->alu.add.a.mux = V3D_QPU_MUX_A;
1025*61046927SAndroid Build Coastguard Worker if (add_instr->alu.add.b.mux == V3D_QPU_MUX_B &&
1026*61046927SAndroid Build Coastguard Worker v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) {
1027*61046927SAndroid Build Coastguard Worker result->alu.add.b.mux = V3D_QPU_MUX_A;
1028*61046927SAndroid Build Coastguard Worker }
1029*61046927SAndroid Build Coastguard Worker }
1030*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_B) &&
1031*61046927SAndroid Build Coastguard Worker raddr_a == mul_instr->raddr_b) {
1032*61046927SAndroid Build Coastguard Worker if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_B)
1033*61046927SAndroid Build Coastguard Worker result->alu.mul.a.mux = V3D_QPU_MUX_A;
1034*61046927SAndroid Build Coastguard Worker if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_B &&
1035*61046927SAndroid Build Coastguard Worker v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) {
1036*61046927SAndroid Build Coastguard Worker result->alu.mul.b.mux = V3D_QPU_MUX_A;
1037*61046927SAndroid Build Coastguard Worker }
1038*61046927SAndroid Build Coastguard Worker }
1039*61046927SAndroid Build Coastguard Worker }
1040*61046927SAndroid Build Coastguard Worker if (!raddrs_used)
1041*61046927SAndroid Build Coastguard Worker return true;
1042*61046927SAndroid Build Coastguard Worker
1043*61046927SAndroid Build Coastguard Worker int raddr_b = ffsll(raddrs_used) - 1;
1044*61046927SAndroid Build Coastguard Worker result->raddr_b = raddr_b;
1045*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_A) &&
1046*61046927SAndroid Build Coastguard Worker raddr_b == add_instr->raddr_a) {
1047*61046927SAndroid Build Coastguard Worker if (add_instr->alu.add.a.mux == V3D_QPU_MUX_A)
1048*61046927SAndroid Build Coastguard Worker result->alu.add.a.mux = V3D_QPU_MUX_B;
1049*61046927SAndroid Build Coastguard Worker if (add_instr->alu.add.b.mux == V3D_QPU_MUX_A &&
1050*61046927SAndroid Build Coastguard Worker v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) {
1051*61046927SAndroid Build Coastguard Worker result->alu.add.b.mux = V3D_QPU_MUX_B;
1052*61046927SAndroid Build Coastguard Worker }
1053*61046927SAndroid Build Coastguard Worker }
1054*61046927SAndroid Build Coastguard Worker if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_A) &&
1055*61046927SAndroid Build Coastguard Worker raddr_b == mul_instr->raddr_a) {
1056*61046927SAndroid Build Coastguard Worker if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_A)
1057*61046927SAndroid Build Coastguard Worker result->alu.mul.a.mux = V3D_QPU_MUX_B;
1058*61046927SAndroid Build Coastguard Worker if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_A &&
1059*61046927SAndroid Build Coastguard Worker v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) {
1060*61046927SAndroid Build Coastguard Worker result->alu.mul.b.mux = V3D_QPU_MUX_B;
1061*61046927SAndroid Build Coastguard Worker }
1062*61046927SAndroid Build Coastguard Worker }
1063*61046927SAndroid Build Coastguard Worker
1064*61046927SAndroid Build Coastguard Worker return true;
1065*61046927SAndroid Build Coastguard Worker }
1066*61046927SAndroid Build Coastguard Worker
1067*61046927SAndroid Build Coastguard Worker static bool
can_do_add_as_mul(enum v3d_qpu_add_op op)1068*61046927SAndroid Build Coastguard Worker can_do_add_as_mul(enum v3d_qpu_add_op op)
1069*61046927SAndroid Build Coastguard Worker {
1070*61046927SAndroid Build Coastguard Worker switch (op) {
1071*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_ADD:
1072*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_SUB:
1073*61046927SAndroid Build Coastguard Worker return true;
1074*61046927SAndroid Build Coastguard Worker default:
1075*61046927SAndroid Build Coastguard Worker return false;
1076*61046927SAndroid Build Coastguard Worker }
1077*61046927SAndroid Build Coastguard Worker }
1078*61046927SAndroid Build Coastguard Worker
1079*61046927SAndroid Build Coastguard Worker static enum v3d_qpu_mul_op
add_op_as_mul_op(enum v3d_qpu_add_op op)1080*61046927SAndroid Build Coastguard Worker add_op_as_mul_op(enum v3d_qpu_add_op op)
1081*61046927SAndroid Build Coastguard Worker {
1082*61046927SAndroid Build Coastguard Worker switch (op) {
1083*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_ADD:
1084*61046927SAndroid Build Coastguard Worker return V3D_QPU_M_ADD;
1085*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_SUB:
1086*61046927SAndroid Build Coastguard Worker return V3D_QPU_M_SUB;
1087*61046927SAndroid Build Coastguard Worker default:
1088*61046927SAndroid Build Coastguard Worker unreachable("unexpected add opcode");
1089*61046927SAndroid Build Coastguard Worker }
1090*61046927SAndroid Build Coastguard Worker }
1091*61046927SAndroid Build Coastguard Worker
1092*61046927SAndroid Build Coastguard Worker static void
qpu_convert_add_to_mul(const struct v3d_device_info * devinfo,struct v3d_qpu_instr * inst)1093*61046927SAndroid Build Coastguard Worker qpu_convert_add_to_mul(const struct v3d_device_info *devinfo,
1094*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr *inst)
1095*61046927SAndroid Build Coastguard Worker {
1096*61046927SAndroid Build Coastguard Worker STATIC_ASSERT(sizeof(inst->alu.mul) == sizeof(inst->alu.add));
1097*61046927SAndroid Build Coastguard Worker assert(inst->alu.add.op != V3D_QPU_A_NOP);
1098*61046927SAndroid Build Coastguard Worker assert(inst->alu.mul.op == V3D_QPU_M_NOP);
1099*61046927SAndroid Build Coastguard Worker
1100*61046927SAndroid Build Coastguard Worker memcpy(&inst->alu.mul, &inst->alu.add, sizeof(inst->alu.mul));
1101*61046927SAndroid Build Coastguard Worker inst->alu.mul.op = add_op_as_mul_op(inst->alu.add.op);
1102*61046927SAndroid Build Coastguard Worker inst->alu.add.op = V3D_QPU_A_NOP;
1103*61046927SAndroid Build Coastguard Worker
1104*61046927SAndroid Build Coastguard Worker inst->flags.mc = inst->flags.ac;
1105*61046927SAndroid Build Coastguard Worker inst->flags.mpf = inst->flags.apf;
1106*61046927SAndroid Build Coastguard Worker inst->flags.muf = inst->flags.auf;
1107*61046927SAndroid Build Coastguard Worker inst->flags.ac = V3D_QPU_COND_NONE;
1108*61046927SAndroid Build Coastguard Worker inst->flags.apf = V3D_QPU_PF_NONE;
1109*61046927SAndroid Build Coastguard Worker inst->flags.auf = V3D_QPU_UF_NONE;
1110*61046927SAndroid Build Coastguard Worker
1111*61046927SAndroid Build Coastguard Worker inst->alu.mul.output_pack = inst->alu.add.output_pack;
1112*61046927SAndroid Build Coastguard Worker
1113*61046927SAndroid Build Coastguard Worker inst->alu.mul.a.unpack = inst->alu.add.a.unpack;
1114*61046927SAndroid Build Coastguard Worker inst->alu.mul.b.unpack = inst->alu.add.b.unpack;
1115*61046927SAndroid Build Coastguard Worker inst->alu.add.output_pack = V3D_QPU_PACK_NONE;
1116*61046927SAndroid Build Coastguard Worker inst->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1117*61046927SAndroid Build Coastguard Worker inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1118*61046927SAndroid Build Coastguard Worker
1119*61046927SAndroid Build Coastguard Worker if (devinfo->ver >= 71) {
1120*61046927SAndroid Build Coastguard Worker assert(!inst->sig.small_imm_c && !inst->sig.small_imm_d);
1121*61046927SAndroid Build Coastguard Worker assert(inst->sig.small_imm_a + inst->sig.small_imm_b <= 1);
1122*61046927SAndroid Build Coastguard Worker if (inst->sig.small_imm_a) {
1123*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_c = true;
1124*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_a = false;
1125*61046927SAndroid Build Coastguard Worker } else if (inst->sig.small_imm_b) {
1126*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_d = true;
1127*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_b = false;
1128*61046927SAndroid Build Coastguard Worker }
1129*61046927SAndroid Build Coastguard Worker }
1130*61046927SAndroid Build Coastguard Worker }
1131*61046927SAndroid Build Coastguard Worker
1132*61046927SAndroid Build Coastguard Worker static bool
can_do_mul_as_add(const struct v3d_device_info * devinfo,enum v3d_qpu_mul_op op)1133*61046927SAndroid Build Coastguard Worker can_do_mul_as_add(const struct v3d_device_info *devinfo, enum v3d_qpu_mul_op op)
1134*61046927SAndroid Build Coastguard Worker {
1135*61046927SAndroid Build Coastguard Worker switch (op) {
1136*61046927SAndroid Build Coastguard Worker case V3D_QPU_M_MOV:
1137*61046927SAndroid Build Coastguard Worker case V3D_QPU_M_FMOV:
1138*61046927SAndroid Build Coastguard Worker return devinfo->ver >= 71;
1139*61046927SAndroid Build Coastguard Worker default:
1140*61046927SAndroid Build Coastguard Worker return false;
1141*61046927SAndroid Build Coastguard Worker }
1142*61046927SAndroid Build Coastguard Worker }
1143*61046927SAndroid Build Coastguard Worker
1144*61046927SAndroid Build Coastguard Worker static enum v3d_qpu_mul_op
mul_op_as_add_op(enum v3d_qpu_mul_op op)1145*61046927SAndroid Build Coastguard Worker mul_op_as_add_op(enum v3d_qpu_mul_op op)
1146*61046927SAndroid Build Coastguard Worker {
1147*61046927SAndroid Build Coastguard Worker switch (op) {
1148*61046927SAndroid Build Coastguard Worker case V3D_QPU_M_MOV:
1149*61046927SAndroid Build Coastguard Worker return V3D_QPU_A_MOV;
1150*61046927SAndroid Build Coastguard Worker case V3D_QPU_M_FMOV:
1151*61046927SAndroid Build Coastguard Worker return V3D_QPU_A_FMOV;
1152*61046927SAndroid Build Coastguard Worker default:
1153*61046927SAndroid Build Coastguard Worker unreachable("unexpected mov opcode");
1154*61046927SAndroid Build Coastguard Worker }
1155*61046927SAndroid Build Coastguard Worker }
1156*61046927SAndroid Build Coastguard Worker
1157*61046927SAndroid Build Coastguard Worker static void
qpu_convert_mul_to_add(struct v3d_qpu_instr * inst)1158*61046927SAndroid Build Coastguard Worker qpu_convert_mul_to_add(struct v3d_qpu_instr *inst)
1159*61046927SAndroid Build Coastguard Worker {
1160*61046927SAndroid Build Coastguard Worker STATIC_ASSERT(sizeof(inst->alu.add) == sizeof(inst->alu.mul));
1161*61046927SAndroid Build Coastguard Worker assert(inst->alu.mul.op != V3D_QPU_M_NOP);
1162*61046927SAndroid Build Coastguard Worker assert(inst->alu.add.op == V3D_QPU_A_NOP);
1163*61046927SAndroid Build Coastguard Worker
1164*61046927SAndroid Build Coastguard Worker memcpy(&inst->alu.add, &inst->alu.mul, sizeof(inst->alu.add));
1165*61046927SAndroid Build Coastguard Worker inst->alu.add.op = mul_op_as_add_op(inst->alu.mul.op);
1166*61046927SAndroid Build Coastguard Worker inst->alu.mul.op = V3D_QPU_M_NOP;
1167*61046927SAndroid Build Coastguard Worker
1168*61046927SAndroid Build Coastguard Worker inst->flags.ac = inst->flags.mc;
1169*61046927SAndroid Build Coastguard Worker inst->flags.apf = inst->flags.mpf;
1170*61046927SAndroid Build Coastguard Worker inst->flags.auf = inst->flags.muf;
1171*61046927SAndroid Build Coastguard Worker inst->flags.mc = V3D_QPU_COND_NONE;
1172*61046927SAndroid Build Coastguard Worker inst->flags.mpf = V3D_QPU_PF_NONE;
1173*61046927SAndroid Build Coastguard Worker inst->flags.muf = V3D_QPU_UF_NONE;
1174*61046927SAndroid Build Coastguard Worker
1175*61046927SAndroid Build Coastguard Worker inst->alu.add.output_pack = inst->alu.mul.output_pack;
1176*61046927SAndroid Build Coastguard Worker inst->alu.add.a.unpack = inst->alu.mul.a.unpack;
1177*61046927SAndroid Build Coastguard Worker inst->alu.add.b.unpack = inst->alu.mul.b.unpack;
1178*61046927SAndroid Build Coastguard Worker inst->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1179*61046927SAndroid Build Coastguard Worker inst->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1180*61046927SAndroid Build Coastguard Worker inst->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1181*61046927SAndroid Build Coastguard Worker
1182*61046927SAndroid Build Coastguard Worker assert(!inst->sig.small_imm_a && !inst->sig.small_imm_b);
1183*61046927SAndroid Build Coastguard Worker assert(inst->sig.small_imm_c + inst->sig.small_imm_d <= 1);
1184*61046927SAndroid Build Coastguard Worker if (inst->sig.small_imm_c) {
1185*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_a = true;
1186*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_c = false;
1187*61046927SAndroid Build Coastguard Worker } else if (inst->sig.small_imm_d) {
1188*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_b = true;
1189*61046927SAndroid Build Coastguard Worker inst->sig.small_imm_d = false;
1190*61046927SAndroid Build Coastguard Worker }
1191*61046927SAndroid Build Coastguard Worker }
1192*61046927SAndroid Build Coastguard Worker
1193*61046927SAndroid Build Coastguard Worker static bool
qpu_merge_inst(const struct v3d_device_info * devinfo,struct v3d_qpu_instr * result,const struct v3d_qpu_instr * a,const struct v3d_qpu_instr * b)1194*61046927SAndroid Build Coastguard Worker qpu_merge_inst(const struct v3d_device_info *devinfo,
1195*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr *result,
1196*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *a,
1197*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *b)
1198*61046927SAndroid Build Coastguard Worker {
1199*61046927SAndroid Build Coastguard Worker if (a->type != V3D_QPU_INSTR_TYPE_ALU ||
1200*61046927SAndroid Build Coastguard Worker b->type != V3D_QPU_INSTR_TYPE_ALU) {
1201*61046927SAndroid Build Coastguard Worker return false;
1202*61046927SAndroid Build Coastguard Worker }
1203*61046927SAndroid Build Coastguard Worker
1204*61046927SAndroid Build Coastguard Worker if (!qpu_compatible_peripheral_access(devinfo, a, b))
1205*61046927SAndroid Build Coastguard Worker return false;
1206*61046927SAndroid Build Coastguard Worker
1207*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr merge = *a;
1208*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *add_instr = NULL, *mul_instr = NULL;
1209*61046927SAndroid Build Coastguard Worker
1210*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr mul_inst;
1211*61046927SAndroid Build Coastguard Worker if (b->alu.add.op != V3D_QPU_A_NOP) {
1212*61046927SAndroid Build Coastguard Worker if (a->alu.add.op == V3D_QPU_A_NOP) {
1213*61046927SAndroid Build Coastguard Worker merge.alu.add = b->alu.add;
1214*61046927SAndroid Build Coastguard Worker
1215*61046927SAndroid Build Coastguard Worker merge.flags.ac = b->flags.ac;
1216*61046927SAndroid Build Coastguard Worker merge.flags.apf = b->flags.apf;
1217*61046927SAndroid Build Coastguard Worker merge.flags.auf = b->flags.auf;
1218*61046927SAndroid Build Coastguard Worker
1219*61046927SAndroid Build Coastguard Worker add_instr = b;
1220*61046927SAndroid Build Coastguard Worker mul_instr = a;
1221*61046927SAndroid Build Coastguard Worker }
1222*61046927SAndroid Build Coastguard Worker /* If a's add op is used but its mul op is not, then see if we
1223*61046927SAndroid Build Coastguard Worker * can convert either a's add op or b's add op to a mul op
1224*61046927SAndroid Build Coastguard Worker * so we can merge.
1225*61046927SAndroid Build Coastguard Worker */
1226*61046927SAndroid Build Coastguard Worker else if (a->alu.mul.op == V3D_QPU_M_NOP &&
1227*61046927SAndroid Build Coastguard Worker can_do_add_as_mul(b->alu.add.op)) {
1228*61046927SAndroid Build Coastguard Worker mul_inst = *b;
1229*61046927SAndroid Build Coastguard Worker qpu_convert_add_to_mul(devinfo, &mul_inst);
1230*61046927SAndroid Build Coastguard Worker
1231*61046927SAndroid Build Coastguard Worker merge.alu.mul = mul_inst.alu.mul;
1232*61046927SAndroid Build Coastguard Worker
1233*61046927SAndroid Build Coastguard Worker merge.flags.mc = mul_inst.flags.mc;
1234*61046927SAndroid Build Coastguard Worker merge.flags.mpf = mul_inst.flags.mpf;
1235*61046927SAndroid Build Coastguard Worker merge.flags.muf = mul_inst.flags.muf;
1236*61046927SAndroid Build Coastguard Worker
1237*61046927SAndroid Build Coastguard Worker add_instr = a;
1238*61046927SAndroid Build Coastguard Worker mul_instr = &mul_inst;
1239*61046927SAndroid Build Coastguard Worker } else if (a->alu.mul.op == V3D_QPU_M_NOP &&
1240*61046927SAndroid Build Coastguard Worker can_do_add_as_mul(a->alu.add.op)) {
1241*61046927SAndroid Build Coastguard Worker mul_inst = *a;
1242*61046927SAndroid Build Coastguard Worker qpu_convert_add_to_mul(devinfo, &mul_inst);
1243*61046927SAndroid Build Coastguard Worker
1244*61046927SAndroid Build Coastguard Worker merge = mul_inst;
1245*61046927SAndroid Build Coastguard Worker merge.alu.add = b->alu.add;
1246*61046927SAndroid Build Coastguard Worker
1247*61046927SAndroid Build Coastguard Worker merge.flags.ac = b->flags.ac;
1248*61046927SAndroid Build Coastguard Worker merge.flags.apf = b->flags.apf;
1249*61046927SAndroid Build Coastguard Worker merge.flags.auf = b->flags.auf;
1250*61046927SAndroid Build Coastguard Worker
1251*61046927SAndroid Build Coastguard Worker add_instr = b;
1252*61046927SAndroid Build Coastguard Worker mul_instr = &mul_inst;
1253*61046927SAndroid Build Coastguard Worker } else {
1254*61046927SAndroid Build Coastguard Worker return false;
1255*61046927SAndroid Build Coastguard Worker }
1256*61046927SAndroid Build Coastguard Worker }
1257*61046927SAndroid Build Coastguard Worker
1258*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr add_inst;
1259*61046927SAndroid Build Coastguard Worker if (b->alu.mul.op != V3D_QPU_M_NOP) {
1260*61046927SAndroid Build Coastguard Worker if (a->alu.mul.op == V3D_QPU_M_NOP) {
1261*61046927SAndroid Build Coastguard Worker merge.alu.mul = b->alu.mul;
1262*61046927SAndroid Build Coastguard Worker
1263*61046927SAndroid Build Coastguard Worker merge.flags.mc = b->flags.mc;
1264*61046927SAndroid Build Coastguard Worker merge.flags.mpf = b->flags.mpf;
1265*61046927SAndroid Build Coastguard Worker merge.flags.muf = b->flags.muf;
1266*61046927SAndroid Build Coastguard Worker
1267*61046927SAndroid Build Coastguard Worker mul_instr = b;
1268*61046927SAndroid Build Coastguard Worker add_instr = a;
1269*61046927SAndroid Build Coastguard Worker }
1270*61046927SAndroid Build Coastguard Worker /* If a's mul op is used but its add op is not, then see if we
1271*61046927SAndroid Build Coastguard Worker * can convert either a's mul op or b's mul op to an add op
1272*61046927SAndroid Build Coastguard Worker * so we can merge.
1273*61046927SAndroid Build Coastguard Worker */
1274*61046927SAndroid Build Coastguard Worker else if (a->alu.add.op == V3D_QPU_A_NOP &&
1275*61046927SAndroid Build Coastguard Worker can_do_mul_as_add(devinfo, b->alu.mul.op)) {
1276*61046927SAndroid Build Coastguard Worker add_inst = *b;
1277*61046927SAndroid Build Coastguard Worker qpu_convert_mul_to_add(&add_inst);
1278*61046927SAndroid Build Coastguard Worker
1279*61046927SAndroid Build Coastguard Worker merge.alu.add = add_inst.alu.add;
1280*61046927SAndroid Build Coastguard Worker
1281*61046927SAndroid Build Coastguard Worker merge.flags.ac = add_inst.flags.ac;
1282*61046927SAndroid Build Coastguard Worker merge.flags.apf = add_inst.flags.apf;
1283*61046927SAndroid Build Coastguard Worker merge.flags.auf = add_inst.flags.auf;
1284*61046927SAndroid Build Coastguard Worker
1285*61046927SAndroid Build Coastguard Worker mul_instr = a;
1286*61046927SAndroid Build Coastguard Worker add_instr = &add_inst;
1287*61046927SAndroid Build Coastguard Worker } else if (a->alu.add.op == V3D_QPU_A_NOP &&
1288*61046927SAndroid Build Coastguard Worker can_do_mul_as_add(devinfo, a->alu.mul.op)) {
1289*61046927SAndroid Build Coastguard Worker add_inst = *a;
1290*61046927SAndroid Build Coastguard Worker qpu_convert_mul_to_add(&add_inst);
1291*61046927SAndroid Build Coastguard Worker
1292*61046927SAndroid Build Coastguard Worker merge = add_inst;
1293*61046927SAndroid Build Coastguard Worker merge.alu.mul = b->alu.mul;
1294*61046927SAndroid Build Coastguard Worker
1295*61046927SAndroid Build Coastguard Worker merge.flags.mc = b->flags.mc;
1296*61046927SAndroid Build Coastguard Worker merge.flags.mpf = b->flags.mpf;
1297*61046927SAndroid Build Coastguard Worker merge.flags.muf = b->flags.muf;
1298*61046927SAndroid Build Coastguard Worker
1299*61046927SAndroid Build Coastguard Worker mul_instr = b;
1300*61046927SAndroid Build Coastguard Worker add_instr = &add_inst;
1301*61046927SAndroid Build Coastguard Worker } else {
1302*61046927SAndroid Build Coastguard Worker return false;
1303*61046927SAndroid Build Coastguard Worker }
1304*61046927SAndroid Build Coastguard Worker }
1305*61046927SAndroid Build Coastguard Worker
1306*61046927SAndroid Build Coastguard Worker /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and
1307*61046927SAndroid Build Coastguard Worker * they have restrictions on the number of raddrs that can be adressed
1308*61046927SAndroid Build Coastguard Worker * in a single instruction. In V3D 7.x, we don't have that restriction,
1309*61046927SAndroid Build Coastguard Worker * but we are still limited to a single small immediate per instruction.
1310*61046927SAndroid Build Coastguard Worker */
1311*61046927SAndroid Build Coastguard Worker if (add_instr && mul_instr &&
1312*61046927SAndroid Build Coastguard Worker !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) {
1313*61046927SAndroid Build Coastguard Worker return false;
1314*61046927SAndroid Build Coastguard Worker }
1315*61046927SAndroid Build Coastguard Worker
1316*61046927SAndroid Build Coastguard Worker merge.sig.thrsw |= b->sig.thrsw;
1317*61046927SAndroid Build Coastguard Worker merge.sig.ldunif |= b->sig.ldunif;
1318*61046927SAndroid Build Coastguard Worker merge.sig.ldunifrf |= b->sig.ldunifrf;
1319*61046927SAndroid Build Coastguard Worker merge.sig.ldunifa |= b->sig.ldunifa;
1320*61046927SAndroid Build Coastguard Worker merge.sig.ldunifarf |= b->sig.ldunifarf;
1321*61046927SAndroid Build Coastguard Worker merge.sig.ldtmu |= b->sig.ldtmu;
1322*61046927SAndroid Build Coastguard Worker merge.sig.ldvary |= b->sig.ldvary;
1323*61046927SAndroid Build Coastguard Worker merge.sig.ldvpm |= b->sig.ldvpm;
1324*61046927SAndroid Build Coastguard Worker merge.sig.ldtlb |= b->sig.ldtlb;
1325*61046927SAndroid Build Coastguard Worker merge.sig.ldtlbu |= b->sig.ldtlbu;
1326*61046927SAndroid Build Coastguard Worker merge.sig.ucb |= b->sig.ucb;
1327*61046927SAndroid Build Coastguard Worker merge.sig.rotate |= b->sig.rotate;
1328*61046927SAndroid Build Coastguard Worker merge.sig.wrtmuc |= b->sig.wrtmuc;
1329*61046927SAndroid Build Coastguard Worker
1330*61046927SAndroid Build Coastguard Worker if (v3d_qpu_sig_writes_address(devinfo, &a->sig) &&
1331*61046927SAndroid Build Coastguard Worker v3d_qpu_sig_writes_address(devinfo, &b->sig))
1332*61046927SAndroid Build Coastguard Worker return false;
1333*61046927SAndroid Build Coastguard Worker merge.sig_addr |= b->sig_addr;
1334*61046927SAndroid Build Coastguard Worker merge.sig_magic |= b->sig_magic;
1335*61046927SAndroid Build Coastguard Worker
1336*61046927SAndroid Build Coastguard Worker uint64_t packed;
1337*61046927SAndroid Build Coastguard Worker bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
1338*61046927SAndroid Build Coastguard Worker
1339*61046927SAndroid Build Coastguard Worker *result = merge;
1340*61046927SAndroid Build Coastguard Worker /* No modifying the real instructions on failure. */
1341*61046927SAndroid Build Coastguard Worker assert(ok || (a != result && b != result));
1342*61046927SAndroid Build Coastguard Worker
1343*61046927SAndroid Build Coastguard Worker return ok;
1344*61046927SAndroid Build Coastguard Worker }
1345*61046927SAndroid Build Coastguard Worker
1346*61046927SAndroid Build Coastguard Worker static inline bool
try_skip_for_ldvary_pipelining(const struct v3d_qpu_instr * inst)1347*61046927SAndroid Build Coastguard Worker try_skip_for_ldvary_pipelining(const struct v3d_qpu_instr *inst)
1348*61046927SAndroid Build Coastguard Worker {
1349*61046927SAndroid Build Coastguard Worker return inst->sig.ldunif || inst->sig.ldunifrf;
1350*61046927SAndroid Build Coastguard Worker }
1351*61046927SAndroid Build Coastguard Worker
1352*61046927SAndroid Build Coastguard Worker static bool
1353*61046927SAndroid Build Coastguard Worker qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile *c,
1354*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
1355*61046927SAndroid Build Coastguard Worker const struct qinst *qinst);
1356*61046927SAndroid Build Coastguard Worker
1357*61046927SAndroid Build Coastguard Worker static struct schedule_node *
choose_instruction_to_schedule(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct schedule_node * prev_inst)1358*61046927SAndroid Build Coastguard Worker choose_instruction_to_schedule(struct v3d_compile *c,
1359*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
1360*61046927SAndroid Build Coastguard Worker struct schedule_node *prev_inst)
1361*61046927SAndroid Build Coastguard Worker {
1362*61046927SAndroid Build Coastguard Worker struct schedule_node *chosen = NULL;
1363*61046927SAndroid Build Coastguard Worker int chosen_prio = 0;
1364*61046927SAndroid Build Coastguard Worker
1365*61046927SAndroid Build Coastguard Worker /* Don't pair up anything with a thread switch signal -- emit_thrsw()
1366*61046927SAndroid Build Coastguard Worker * will handle pairing it along with filling the delay slots.
1367*61046927SAndroid Build Coastguard Worker */
1368*61046927SAndroid Build Coastguard Worker if (prev_inst) {
1369*61046927SAndroid Build Coastguard Worker if (prev_inst->inst->qpu.sig.thrsw)
1370*61046927SAndroid Build Coastguard Worker return NULL;
1371*61046927SAndroid Build Coastguard Worker }
1372*61046927SAndroid Build Coastguard Worker
1373*61046927SAndroid Build Coastguard Worker bool ldvary_pipelining = c->s->info.stage == MESA_SHADER_FRAGMENT &&
1374*61046927SAndroid Build Coastguard Worker scoreboard->ldvary_count < c->num_inputs;
1375*61046927SAndroid Build Coastguard Worker bool skipped_insts_for_ldvary_pipelining = false;
1376*61046927SAndroid Build Coastguard Worker retry:
1377*61046927SAndroid Build Coastguard Worker list_for_each_entry(struct schedule_node, n, &scoreboard->dag->heads,
1378*61046927SAndroid Build Coastguard Worker dag.link) {
1379*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst = &n->inst->qpu;
1380*61046927SAndroid Build Coastguard Worker
1381*61046927SAndroid Build Coastguard Worker if (ldvary_pipelining && try_skip_for_ldvary_pipelining(inst)) {
1382*61046927SAndroid Build Coastguard Worker skipped_insts_for_ldvary_pipelining = true;
1383*61046927SAndroid Build Coastguard Worker continue;
1384*61046927SAndroid Build Coastguard Worker }
1385*61046927SAndroid Build Coastguard Worker
1386*61046927SAndroid Build Coastguard Worker /* Don't choose the branch instruction until it's the last one
1387*61046927SAndroid Build Coastguard Worker * left. We'll move it up to fit its delay slots after we
1388*61046927SAndroid Build Coastguard Worker * choose it.
1389*61046927SAndroid Build Coastguard Worker */
1390*61046927SAndroid Build Coastguard Worker if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH &&
1391*61046927SAndroid Build Coastguard Worker !list_is_singular(&scoreboard->dag->heads)) {
1392*61046927SAndroid Build Coastguard Worker continue;
1393*61046927SAndroid Build Coastguard Worker }
1394*61046927SAndroid Build Coastguard Worker
1395*61046927SAndroid Build Coastguard Worker /* We need to have 3 delay slots between a write to unifa and
1396*61046927SAndroid Build Coastguard Worker * a follow-up ldunifa.
1397*61046927SAndroid Build Coastguard Worker */
1398*61046927SAndroid Build Coastguard Worker if ((inst->sig.ldunifa || inst->sig.ldunifarf) &&
1399*61046927SAndroid Build Coastguard Worker scoreboard->tick - scoreboard->last_unifa_write_tick <= 3)
1400*61046927SAndroid Build Coastguard Worker continue;
1401*61046927SAndroid Build Coastguard Worker
1402*61046927SAndroid Build Coastguard Worker /* "An instruction must not read from a location in physical
1403*61046927SAndroid Build Coastguard Worker * regfile A or B that was written to by the previous
1404*61046927SAndroid Build Coastguard Worker * instruction."
1405*61046927SAndroid Build Coastguard Worker */
1406*61046927SAndroid Build Coastguard Worker if (reads_too_soon_after_write(c->devinfo, scoreboard, n->inst))
1407*61046927SAndroid Build Coastguard Worker continue;
1408*61046927SAndroid Build Coastguard Worker
1409*61046927SAndroid Build Coastguard Worker if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst))
1410*61046927SAndroid Build Coastguard Worker continue;
1411*61046927SAndroid Build Coastguard Worker
1412*61046927SAndroid Build Coastguard Worker /* "Before doing a TLB access a scoreboard wait must have been
1413*61046927SAndroid Build Coastguard Worker * done. This happens either on the first or last thread
1414*61046927SAndroid Build Coastguard Worker * switch, depending on a setting (scb_wait_on_first_thrsw) in
1415*61046927SAndroid Build Coastguard Worker * the shader state."
1416*61046927SAndroid Build Coastguard Worker */
1417*61046927SAndroid Build Coastguard Worker if (pixel_scoreboard_too_soon(c, scoreboard, inst))
1418*61046927SAndroid Build Coastguard Worker continue;
1419*61046927SAndroid Build Coastguard Worker
1420*61046927SAndroid Build Coastguard Worker /* ldunif and ldvary both write the same register (r5 for v42
1421*61046927SAndroid Build Coastguard Worker * and below, rf0 for v71), but ldunif does so a tick sooner.
1422*61046927SAndroid Build Coastguard Worker * If the ldvary's register wasn't used, then ldunif might
1423*61046927SAndroid Build Coastguard Worker * otherwise get scheduled so ldunif and ldvary try to update
1424*61046927SAndroid Build Coastguard Worker * the register in the same tick.
1425*61046927SAndroid Build Coastguard Worker */
1426*61046927SAndroid Build Coastguard Worker if ((inst->sig.ldunif || inst->sig.ldunifa) &&
1427*61046927SAndroid Build Coastguard Worker scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
1428*61046927SAndroid Build Coastguard Worker continue;
1429*61046927SAndroid Build Coastguard Worker }
1430*61046927SAndroid Build Coastguard Worker
1431*61046927SAndroid Build Coastguard Worker /* If we are in a thrsw delay slot check that this instruction
1432*61046927SAndroid Build Coastguard Worker * is valid for that.
1433*61046927SAndroid Build Coastguard Worker */
1434*61046927SAndroid Build Coastguard Worker if (scoreboard->last_thrsw_tick + 2 >= scoreboard->tick &&
1435*61046927SAndroid Build Coastguard Worker !qpu_inst_after_thrsw_valid_in_delay_slot(c, scoreboard,
1436*61046927SAndroid Build Coastguard Worker n->inst)) {
1437*61046927SAndroid Build Coastguard Worker continue;
1438*61046927SAndroid Build Coastguard Worker }
1439*61046927SAndroid Build Coastguard Worker
1440*61046927SAndroid Build Coastguard Worker if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
1441*61046927SAndroid Build Coastguard Worker /* Don't try to put a branch in the delay slots of another
1442*61046927SAndroid Build Coastguard Worker * branch or a unifa write.
1443*61046927SAndroid Build Coastguard Worker */
1444*61046927SAndroid Build Coastguard Worker if (scoreboard->last_branch_tick + 3 >= scoreboard->tick)
1445*61046927SAndroid Build Coastguard Worker continue;
1446*61046927SAndroid Build Coastguard Worker if (scoreboard->last_unifa_write_tick + 3 >= scoreboard->tick)
1447*61046927SAndroid Build Coastguard Worker continue;
1448*61046927SAndroid Build Coastguard Worker
1449*61046927SAndroid Build Coastguard Worker /* No branch with cond != 0,2,3 and msfign != 0 after
1450*61046927SAndroid Build Coastguard Worker * setmsf.
1451*61046927SAndroid Build Coastguard Worker */
1452*61046927SAndroid Build Coastguard Worker if (scoreboard->last_setmsf_tick == scoreboard->tick - 1 &&
1453*61046927SAndroid Build Coastguard Worker inst->branch.msfign != V3D_QPU_MSFIGN_NONE &&
1454*61046927SAndroid Build Coastguard Worker inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS &&
1455*61046927SAndroid Build Coastguard Worker inst->branch.cond != V3D_QPU_BRANCH_COND_A0 &&
1456*61046927SAndroid Build Coastguard Worker inst->branch.cond != V3D_QPU_BRANCH_COND_NA0) {
1457*61046927SAndroid Build Coastguard Worker continue;
1458*61046927SAndroid Build Coastguard Worker }
1459*61046927SAndroid Build Coastguard Worker }
1460*61046927SAndroid Build Coastguard Worker
1461*61046927SAndroid Build Coastguard Worker /* If we're trying to pair with another instruction, check
1462*61046927SAndroid Build Coastguard Worker * that they're compatible.
1463*61046927SAndroid Build Coastguard Worker */
1464*61046927SAndroid Build Coastguard Worker if (prev_inst) {
1465*61046927SAndroid Build Coastguard Worker /* Don't pair up a thread switch signal -- we'll
1466*61046927SAndroid Build Coastguard Worker * handle pairing it when we pick it on its own.
1467*61046927SAndroid Build Coastguard Worker */
1468*61046927SAndroid Build Coastguard Worker if (inst->sig.thrsw)
1469*61046927SAndroid Build Coastguard Worker continue;
1470*61046927SAndroid Build Coastguard Worker
1471*61046927SAndroid Build Coastguard Worker if (prev_inst->inst->uniform != -1 &&
1472*61046927SAndroid Build Coastguard Worker n->inst->uniform != -1)
1473*61046927SAndroid Build Coastguard Worker continue;
1474*61046927SAndroid Build Coastguard Worker
1475*61046927SAndroid Build Coastguard Worker /* Simulator complains if we have two uniforms loaded in
1476*61046927SAndroid Build Coastguard Worker * the the same instruction, which could happen if we
1477*61046927SAndroid Build Coastguard Worker * have a ldunif or sideband uniform and we pair that
1478*61046927SAndroid Build Coastguard Worker * with ldunifa.
1479*61046927SAndroid Build Coastguard Worker */
1480*61046927SAndroid Build Coastguard Worker if (vir_has_uniform(prev_inst->inst) &&
1481*61046927SAndroid Build Coastguard Worker (inst->sig.ldunifa || inst->sig.ldunifarf)) {
1482*61046927SAndroid Build Coastguard Worker continue;
1483*61046927SAndroid Build Coastguard Worker }
1484*61046927SAndroid Build Coastguard Worker
1485*61046927SAndroid Build Coastguard Worker if ((prev_inst->inst->qpu.sig.ldunifa ||
1486*61046927SAndroid Build Coastguard Worker prev_inst->inst->qpu.sig.ldunifarf) &&
1487*61046927SAndroid Build Coastguard Worker vir_has_uniform(n->inst)) {
1488*61046927SAndroid Build Coastguard Worker continue;
1489*61046927SAndroid Build Coastguard Worker }
1490*61046927SAndroid Build Coastguard Worker
1491*61046927SAndroid Build Coastguard Worker /* Don't merge TLB instructions before we have acquired
1492*61046927SAndroid Build Coastguard Worker * the scoreboard lock.
1493*61046927SAndroid Build Coastguard Worker */
1494*61046927SAndroid Build Coastguard Worker if (pixel_scoreboard_too_soon(c, scoreboard, inst))
1495*61046927SAndroid Build Coastguard Worker continue;
1496*61046927SAndroid Build Coastguard Worker
1497*61046927SAndroid Build Coastguard Worker /* When we successfully pair up an ldvary we then try
1498*61046927SAndroid Build Coastguard Worker * to merge it into the previous instruction if
1499*61046927SAndroid Build Coastguard Worker * possible to improve pipelining. Don't pick up the
1500*61046927SAndroid Build Coastguard Worker * ldvary now if the follow-up fixup would place
1501*61046927SAndroid Build Coastguard Worker * it in the delay slots of a thrsw, which is not
1502*61046927SAndroid Build Coastguard Worker * allowed and would prevent the fixup from being
1503*61046927SAndroid Build Coastguard Worker * successful. In V3D 7.x we can allow this to happen
1504*61046927SAndroid Build Coastguard Worker * as long as it is not the last delay slot.
1505*61046927SAndroid Build Coastguard Worker */
1506*61046927SAndroid Build Coastguard Worker if (inst->sig.ldvary) {
1507*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver == 42 &&
1508*61046927SAndroid Build Coastguard Worker scoreboard->last_thrsw_tick + 2 >=
1509*61046927SAndroid Build Coastguard Worker scoreboard->tick - 1) {
1510*61046927SAndroid Build Coastguard Worker continue;
1511*61046927SAndroid Build Coastguard Worker }
1512*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver >= 71 &&
1513*61046927SAndroid Build Coastguard Worker scoreboard->last_thrsw_tick + 2 ==
1514*61046927SAndroid Build Coastguard Worker scoreboard->tick - 1) {
1515*61046927SAndroid Build Coastguard Worker continue;
1516*61046927SAndroid Build Coastguard Worker }
1517*61046927SAndroid Build Coastguard Worker }
1518*61046927SAndroid Build Coastguard Worker
1519*61046927SAndroid Build Coastguard Worker /* We can emit a new tmu lookup with a previous ldtmu
1520*61046927SAndroid Build Coastguard Worker * if doing this would free just enough space in the
1521*61046927SAndroid Build Coastguard Worker * TMU output fifo so we don't overflow, however, this
1522*61046927SAndroid Build Coastguard Worker * is only safe if the ldtmu cannot stall.
1523*61046927SAndroid Build Coastguard Worker *
1524*61046927SAndroid Build Coastguard Worker * A ldtmu can stall if it is not the first following a
1525*61046927SAndroid Build Coastguard Worker * thread switch and corresponds to the first word of a
1526*61046927SAndroid Build Coastguard Worker * read request.
1527*61046927SAndroid Build Coastguard Worker *
1528*61046927SAndroid Build Coastguard Worker * FIXME: For now we forbid pairing up a new lookup
1529*61046927SAndroid Build Coastguard Worker * with a previous ldtmu that is not the first after a
1530*61046927SAndroid Build Coastguard Worker * thrsw if that could overflow the TMU output fifo
1531*61046927SAndroid Build Coastguard Worker * regardless of whether the ldtmu is reading the first
1532*61046927SAndroid Build Coastguard Worker * word of a TMU result or not, since we don't track
1533*61046927SAndroid Build Coastguard Worker * this aspect in the compiler yet.
1534*61046927SAndroid Build Coastguard Worker */
1535*61046927SAndroid Build Coastguard Worker if (prev_inst->inst->qpu.sig.ldtmu &&
1536*61046927SAndroid Build Coastguard Worker !scoreboard->first_ldtmu_after_thrsw &&
1537*61046927SAndroid Build Coastguard Worker (scoreboard->pending_ldtmu_count +
1538*61046927SAndroid Build Coastguard Worker n->inst->ldtmu_count > 16 / c->threads)) {
1539*61046927SAndroid Build Coastguard Worker continue;
1540*61046927SAndroid Build Coastguard Worker }
1541*61046927SAndroid Build Coastguard Worker
1542*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr merged_inst;
1543*61046927SAndroid Build Coastguard Worker if (!qpu_merge_inst(c->devinfo, &merged_inst,
1544*61046927SAndroid Build Coastguard Worker &prev_inst->inst->qpu, inst)) {
1545*61046927SAndroid Build Coastguard Worker continue;
1546*61046927SAndroid Build Coastguard Worker }
1547*61046927SAndroid Build Coastguard Worker }
1548*61046927SAndroid Build Coastguard Worker
1549*61046927SAndroid Build Coastguard Worker int prio = get_instruction_priority(c->devinfo, inst);
1550*61046927SAndroid Build Coastguard Worker
1551*61046927SAndroid Build Coastguard Worker if (read_stalls(c->devinfo, scoreboard, inst)) {
1552*61046927SAndroid Build Coastguard Worker /* Don't merge an instruction that stalls */
1553*61046927SAndroid Build Coastguard Worker if (prev_inst)
1554*61046927SAndroid Build Coastguard Worker continue;
1555*61046927SAndroid Build Coastguard Worker else {
1556*61046927SAndroid Build Coastguard Worker /* Any instruction that don't stall will have
1557*61046927SAndroid Build Coastguard Worker * higher scheduling priority */
1558*61046927SAndroid Build Coastguard Worker prio -= MAX_SCHEDULE_PRIORITY;
1559*61046927SAndroid Build Coastguard Worker assert(prio < 0);
1560*61046927SAndroid Build Coastguard Worker }
1561*61046927SAndroid Build Coastguard Worker }
1562*61046927SAndroid Build Coastguard Worker
1563*61046927SAndroid Build Coastguard Worker /* Found a valid instruction. If nothing better comes along,
1564*61046927SAndroid Build Coastguard Worker * this one works.
1565*61046927SAndroid Build Coastguard Worker */
1566*61046927SAndroid Build Coastguard Worker if (!chosen) {
1567*61046927SAndroid Build Coastguard Worker chosen = n;
1568*61046927SAndroid Build Coastguard Worker chosen_prio = prio;
1569*61046927SAndroid Build Coastguard Worker continue;
1570*61046927SAndroid Build Coastguard Worker }
1571*61046927SAndroid Build Coastguard Worker
1572*61046927SAndroid Build Coastguard Worker if (prio > chosen_prio) {
1573*61046927SAndroid Build Coastguard Worker chosen = n;
1574*61046927SAndroid Build Coastguard Worker chosen_prio = prio;
1575*61046927SAndroid Build Coastguard Worker } else if (prio < chosen_prio) {
1576*61046927SAndroid Build Coastguard Worker continue;
1577*61046927SAndroid Build Coastguard Worker }
1578*61046927SAndroid Build Coastguard Worker
1579*61046927SAndroid Build Coastguard Worker if (n->delay > chosen->delay) {
1580*61046927SAndroid Build Coastguard Worker chosen = n;
1581*61046927SAndroid Build Coastguard Worker chosen_prio = prio;
1582*61046927SAndroid Build Coastguard Worker } else if (n->delay < chosen->delay) {
1583*61046927SAndroid Build Coastguard Worker continue;
1584*61046927SAndroid Build Coastguard Worker }
1585*61046927SAndroid Build Coastguard Worker }
1586*61046927SAndroid Build Coastguard Worker
1587*61046927SAndroid Build Coastguard Worker /* If we did not find any instruction to schedule but we discarded
1588*61046927SAndroid Build Coastguard Worker * some of them to prioritize ldvary pipelining, try again.
1589*61046927SAndroid Build Coastguard Worker */
1590*61046927SAndroid Build Coastguard Worker if (!chosen && !prev_inst && skipped_insts_for_ldvary_pipelining) {
1591*61046927SAndroid Build Coastguard Worker skipped_insts_for_ldvary_pipelining = false;
1592*61046927SAndroid Build Coastguard Worker ldvary_pipelining = false;
1593*61046927SAndroid Build Coastguard Worker goto retry;
1594*61046927SAndroid Build Coastguard Worker }
1595*61046927SAndroid Build Coastguard Worker
1596*61046927SAndroid Build Coastguard Worker if (chosen && chosen->inst->qpu.sig.ldvary) {
1597*61046927SAndroid Build Coastguard Worker scoreboard->ldvary_count++;
1598*61046927SAndroid Build Coastguard Worker /* If we are pairing an ldvary, flag it so we can fix it up for
1599*61046927SAndroid Build Coastguard Worker * optimal pipelining of ldvary sequences.
1600*61046927SAndroid Build Coastguard Worker */
1601*61046927SAndroid Build Coastguard Worker if (prev_inst)
1602*61046927SAndroid Build Coastguard Worker scoreboard->fixup_ldvary = true;
1603*61046927SAndroid Build Coastguard Worker }
1604*61046927SAndroid Build Coastguard Worker
1605*61046927SAndroid Build Coastguard Worker return chosen;
1606*61046927SAndroid Build Coastguard Worker }
1607*61046927SAndroid Build Coastguard Worker
1608*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_for_magic_waddr(struct choose_scoreboard * scoreboard,enum v3d_qpu_waddr waddr,const struct v3d_device_info * devinfo)1609*61046927SAndroid Build Coastguard Worker update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
1610*61046927SAndroid Build Coastguard Worker enum v3d_qpu_waddr waddr,
1611*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo)
1612*61046927SAndroid Build Coastguard Worker {
1613*61046927SAndroid Build Coastguard Worker if (v3d_qpu_magic_waddr_is_sfu(waddr))
1614*61046927SAndroid Build Coastguard Worker scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
1615*61046927SAndroid Build Coastguard Worker else if (waddr == V3D_QPU_WADDR_UNIFA)
1616*61046927SAndroid Build Coastguard Worker scoreboard->last_unifa_write_tick = scoreboard->tick;
1617*61046927SAndroid Build Coastguard Worker }
1618*61046927SAndroid Build Coastguard Worker
1619*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst)1620*61046927SAndroid Build Coastguard Worker update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard *scoreboard,
1621*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst)
1622*61046927SAndroid Build Coastguard Worker {
1623*61046927SAndroid Build Coastguard Worker if (v3d_qpu_instr_is_sfu(inst)) {
1624*61046927SAndroid Build Coastguard Worker scoreboard->last_stallable_sfu_reg = inst->alu.add.waddr;
1625*61046927SAndroid Build Coastguard Worker scoreboard->last_stallable_sfu_tick = scoreboard->tick;
1626*61046927SAndroid Build Coastguard Worker }
1627*61046927SAndroid Build Coastguard Worker }
1628*61046927SAndroid Build Coastguard Worker
1629*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_tmu_tracking(struct choose_scoreboard * scoreboard,const struct qinst * inst)1630*61046927SAndroid Build Coastguard Worker update_scoreboard_tmu_tracking(struct choose_scoreboard *scoreboard,
1631*61046927SAndroid Build Coastguard Worker const struct qinst *inst)
1632*61046927SAndroid Build Coastguard Worker {
1633*61046927SAndroid Build Coastguard Worker /* Track if the have seen any ldtmu after the last thread switch */
1634*61046927SAndroid Build Coastguard Worker if (scoreboard->tick == scoreboard->last_thrsw_tick + 2)
1635*61046927SAndroid Build Coastguard Worker scoreboard->first_ldtmu_after_thrsw = true;
1636*61046927SAndroid Build Coastguard Worker
1637*61046927SAndroid Build Coastguard Worker /* Track the number of pending ldtmu instructions for outstanding
1638*61046927SAndroid Build Coastguard Worker * TMU lookups.
1639*61046927SAndroid Build Coastguard Worker */
1640*61046927SAndroid Build Coastguard Worker scoreboard->pending_ldtmu_count += inst->ldtmu_count;
1641*61046927SAndroid Build Coastguard Worker if (inst->qpu.sig.ldtmu) {
1642*61046927SAndroid Build Coastguard Worker assert(scoreboard->pending_ldtmu_count > 0);
1643*61046927SAndroid Build Coastguard Worker scoreboard->pending_ldtmu_count--;
1644*61046927SAndroid Build Coastguard Worker scoreboard->first_ldtmu_after_thrsw = false;
1645*61046927SAndroid Build Coastguard Worker }
1646*61046927SAndroid Build Coastguard Worker }
1647*61046927SAndroid Build Coastguard Worker
1648*61046927SAndroid Build Coastguard Worker static void
set_has_rf0_flops_conflict(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst,const struct v3d_device_info * devinfo)1649*61046927SAndroid Build Coastguard Worker set_has_rf0_flops_conflict(struct choose_scoreboard *scoreboard,
1650*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst,
1651*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo)
1652*61046927SAndroid Build Coastguard Worker {
1653*61046927SAndroid Build Coastguard Worker if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick &&
1654*61046927SAndroid Build Coastguard Worker v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
1655*61046927SAndroid Build Coastguard Worker !inst->sig_magic) {
1656*61046927SAndroid Build Coastguard Worker scoreboard->has_rf0_flops_conflict = true;
1657*61046927SAndroid Build Coastguard Worker }
1658*61046927SAndroid Build Coastguard Worker }
1659*61046927SAndroid Build Coastguard Worker
1660*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_for_rf0_flops(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst,const struct v3d_device_info * devinfo)1661*61046927SAndroid Build Coastguard Worker update_scoreboard_for_rf0_flops(struct choose_scoreboard *scoreboard,
1662*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst,
1663*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo)
1664*61046927SAndroid Build Coastguard Worker {
1665*61046927SAndroid Build Coastguard Worker if (devinfo->ver < 71)
1666*61046927SAndroid Build Coastguard Worker return;
1667*61046927SAndroid Build Coastguard Worker
1668*61046927SAndroid Build Coastguard Worker /* Thread switch restrictions:
1669*61046927SAndroid Build Coastguard Worker *
1670*61046927SAndroid Build Coastguard Worker * At the point of a thread switch or thread end (when the actual
1671*61046927SAndroid Build Coastguard Worker * thread switch or thread end happens, not when the signalling
1672*61046927SAndroid Build Coastguard Worker * instruction is processed):
1673*61046927SAndroid Build Coastguard Worker *
1674*61046927SAndroid Build Coastguard Worker * - If the most recent write to rf0 was from a ldunif, ldunifa, or
1675*61046927SAndroid Build Coastguard Worker * ldvary instruction in which another signal also wrote to the
1676*61046927SAndroid Build Coastguard Worker * register file, and the final instruction of the thread section
1677*61046927SAndroid Build Coastguard Worker * contained a signal which wrote to the register file, then the
1678*61046927SAndroid Build Coastguard Worker * value of rf0 is undefined at the start of the new section
1679*61046927SAndroid Build Coastguard Worker *
1680*61046927SAndroid Build Coastguard Worker * Here we use the scoreboard to track if our last rf0 implicit write
1681*61046927SAndroid Build Coastguard Worker * happens at the same time that another signal writes the register
1682*61046927SAndroid Build Coastguard Worker * file (has_rf0_flops_conflict). We will use that information when
1683*61046927SAndroid Build Coastguard Worker * scheduling thrsw instructions to avoid putting anything in their
1684*61046927SAndroid Build Coastguard Worker * last delay slot which has a signal that writes to the register file.
1685*61046927SAndroid Build Coastguard Worker */
1686*61046927SAndroid Build Coastguard Worker
1687*61046927SAndroid Build Coastguard Worker /* Reset tracking if we have an explicit rf0 write or we are starting
1688*61046927SAndroid Build Coastguard Worker * a new thread section.
1689*61046927SAndroid Build Coastguard Worker */
1690*61046927SAndroid Build Coastguard Worker if (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) ||
1691*61046927SAndroid Build Coastguard Worker scoreboard->tick - scoreboard->last_thrsw_tick == 3) {
1692*61046927SAndroid Build Coastguard Worker scoreboard->last_implicit_rf0_write_tick = -10;
1693*61046927SAndroid Build Coastguard Worker scoreboard->has_rf0_flops_conflict = false;
1694*61046927SAndroid Build Coastguard Worker }
1695*61046927SAndroid Build Coastguard Worker
1696*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_rf0_implicitly(devinfo, inst)) {
1697*61046927SAndroid Build Coastguard Worker scoreboard->last_implicit_rf0_write_tick = inst->sig.ldvary ?
1698*61046927SAndroid Build Coastguard Worker scoreboard->tick + 1 : scoreboard->tick;
1699*61046927SAndroid Build Coastguard Worker }
1700*61046927SAndroid Build Coastguard Worker
1701*61046927SAndroid Build Coastguard Worker set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
1702*61046927SAndroid Build Coastguard Worker }
1703*61046927SAndroid Build Coastguard Worker
1704*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_for_chosen(struct choose_scoreboard * scoreboard,const struct qinst * qinst,const struct v3d_device_info * devinfo)1705*61046927SAndroid Build Coastguard Worker update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
1706*61046927SAndroid Build Coastguard Worker const struct qinst *qinst,
1707*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo)
1708*61046927SAndroid Build Coastguard Worker {
1709*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst = &qinst->qpu;
1710*61046927SAndroid Build Coastguard Worker
1711*61046927SAndroid Build Coastguard Worker if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
1712*61046927SAndroid Build Coastguard Worker return;
1713*61046927SAndroid Build Coastguard Worker
1714*61046927SAndroid Build Coastguard Worker assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
1715*61046927SAndroid Build Coastguard Worker
1716*61046927SAndroid Build Coastguard Worker if (inst->alu.add.op != V3D_QPU_A_NOP) {
1717*61046927SAndroid Build Coastguard Worker if (inst->alu.add.magic_write) {
1718*61046927SAndroid Build Coastguard Worker update_scoreboard_for_magic_waddr(scoreboard,
1719*61046927SAndroid Build Coastguard Worker inst->alu.add.waddr,
1720*61046927SAndroid Build Coastguard Worker devinfo);
1721*61046927SAndroid Build Coastguard Worker } else {
1722*61046927SAndroid Build Coastguard Worker update_scoreboard_for_sfu_stall_waddr(scoreboard,
1723*61046927SAndroid Build Coastguard Worker inst);
1724*61046927SAndroid Build Coastguard Worker }
1725*61046927SAndroid Build Coastguard Worker
1726*61046927SAndroid Build Coastguard Worker if (inst->alu.add.op == V3D_QPU_A_SETMSF)
1727*61046927SAndroid Build Coastguard Worker scoreboard->last_setmsf_tick = scoreboard->tick;
1728*61046927SAndroid Build Coastguard Worker }
1729*61046927SAndroid Build Coastguard Worker
1730*61046927SAndroid Build Coastguard Worker if (inst->alu.mul.op != V3D_QPU_M_NOP) {
1731*61046927SAndroid Build Coastguard Worker if (inst->alu.mul.magic_write) {
1732*61046927SAndroid Build Coastguard Worker update_scoreboard_for_magic_waddr(scoreboard,
1733*61046927SAndroid Build Coastguard Worker inst->alu.mul.waddr,
1734*61046927SAndroid Build Coastguard Worker devinfo);
1735*61046927SAndroid Build Coastguard Worker }
1736*61046927SAndroid Build Coastguard Worker }
1737*61046927SAndroid Build Coastguard Worker
1738*61046927SAndroid Build Coastguard Worker if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && inst->sig_magic) {
1739*61046927SAndroid Build Coastguard Worker update_scoreboard_for_magic_waddr(scoreboard,
1740*61046927SAndroid Build Coastguard Worker inst->sig_addr,
1741*61046927SAndroid Build Coastguard Worker devinfo);
1742*61046927SAndroid Build Coastguard Worker }
1743*61046927SAndroid Build Coastguard Worker
1744*61046927SAndroid Build Coastguard Worker if (inst->sig.ldvary)
1745*61046927SAndroid Build Coastguard Worker scoreboard->last_ldvary_tick = scoreboard->tick;
1746*61046927SAndroid Build Coastguard Worker
1747*61046927SAndroid Build Coastguard Worker update_scoreboard_for_rf0_flops(scoreboard, inst, devinfo);
1748*61046927SAndroid Build Coastguard Worker
1749*61046927SAndroid Build Coastguard Worker update_scoreboard_tmu_tracking(scoreboard, qinst);
1750*61046927SAndroid Build Coastguard Worker }
1751*61046927SAndroid Build Coastguard Worker
1752*61046927SAndroid Build Coastguard Worker static void
dump_state(const struct v3d_device_info * devinfo,struct dag * dag)1753*61046927SAndroid Build Coastguard Worker dump_state(const struct v3d_device_info *devinfo, struct dag *dag)
1754*61046927SAndroid Build Coastguard Worker {
1755*61046927SAndroid Build Coastguard Worker list_for_each_entry(struct schedule_node, n, &dag->heads, dag.link) {
1756*61046927SAndroid Build Coastguard Worker fprintf(stderr, " t=%4d: ", n->unblocked_time);
1757*61046927SAndroid Build Coastguard Worker v3d_qpu_dump(devinfo, &n->inst->qpu);
1758*61046927SAndroid Build Coastguard Worker fprintf(stderr, "\n");
1759*61046927SAndroid Build Coastguard Worker
1760*61046927SAndroid Build Coastguard Worker util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
1761*61046927SAndroid Build Coastguard Worker struct schedule_node *child =
1762*61046927SAndroid Build Coastguard Worker (struct schedule_node *)edge->child;
1763*61046927SAndroid Build Coastguard Worker if (!child)
1764*61046927SAndroid Build Coastguard Worker continue;
1765*61046927SAndroid Build Coastguard Worker
1766*61046927SAndroid Build Coastguard Worker fprintf(stderr, " - ");
1767*61046927SAndroid Build Coastguard Worker v3d_qpu_dump(devinfo, &child->inst->qpu);
1768*61046927SAndroid Build Coastguard Worker fprintf(stderr, " (%d parents, %c)\n",
1769*61046927SAndroid Build Coastguard Worker child->dag.parent_count,
1770*61046927SAndroid Build Coastguard Worker edge->data ? 'w' : 'r');
1771*61046927SAndroid Build Coastguard Worker }
1772*61046927SAndroid Build Coastguard Worker }
1773*61046927SAndroid Build Coastguard Worker }
1774*61046927SAndroid Build Coastguard Worker
magic_waddr_latency(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr,const struct v3d_qpu_instr * after)1775*61046927SAndroid Build Coastguard Worker static uint32_t magic_waddr_latency(const struct v3d_device_info *devinfo,
1776*61046927SAndroid Build Coastguard Worker enum v3d_qpu_waddr waddr,
1777*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *after)
1778*61046927SAndroid Build Coastguard Worker {
1779*61046927SAndroid Build Coastguard Worker /* Apply some huge latency between texture fetch requests and getting
1780*61046927SAndroid Build Coastguard Worker * their results back.
1781*61046927SAndroid Build Coastguard Worker *
1782*61046927SAndroid Build Coastguard Worker * FIXME: This is actually pretty bogus. If we do:
1783*61046927SAndroid Build Coastguard Worker *
1784*61046927SAndroid Build Coastguard Worker * mov tmu0_s, a
1785*61046927SAndroid Build Coastguard Worker * <a bit of math>
1786*61046927SAndroid Build Coastguard Worker * mov tmu0_s, b
1787*61046927SAndroid Build Coastguard Worker * load_tmu0
1788*61046927SAndroid Build Coastguard Worker * <more math>
1789*61046927SAndroid Build Coastguard Worker * load_tmu0
1790*61046927SAndroid Build Coastguard Worker *
1791*61046927SAndroid Build Coastguard Worker * we count that as worse than
1792*61046927SAndroid Build Coastguard Worker *
1793*61046927SAndroid Build Coastguard Worker * mov tmu0_s, a
1794*61046927SAndroid Build Coastguard Worker * mov tmu0_s, b
1795*61046927SAndroid Build Coastguard Worker * <lots of math>
1796*61046927SAndroid Build Coastguard Worker * load_tmu0
1797*61046927SAndroid Build Coastguard Worker * <more math>
1798*61046927SAndroid Build Coastguard Worker * load_tmu0
1799*61046927SAndroid Build Coastguard Worker *
1800*61046927SAndroid Build Coastguard Worker * because we associate the first load_tmu0 with the *second* tmu0_s.
1801*61046927SAndroid Build Coastguard Worker */
1802*61046927SAndroid Build Coastguard Worker if (v3d_qpu_magic_waddr_is_tmu(devinfo, waddr) &&
1803*61046927SAndroid Build Coastguard Worker v3d_qpu_waits_on_tmu(after)) {
1804*61046927SAndroid Build Coastguard Worker return 100;
1805*61046927SAndroid Build Coastguard Worker }
1806*61046927SAndroid Build Coastguard Worker
1807*61046927SAndroid Build Coastguard Worker /* Assume that anything depending on us is consuming the SFU result. */
1808*61046927SAndroid Build Coastguard Worker if (v3d_qpu_magic_waddr_is_sfu(waddr))
1809*61046927SAndroid Build Coastguard Worker return 3;
1810*61046927SAndroid Build Coastguard Worker
1811*61046927SAndroid Build Coastguard Worker return 1;
1812*61046927SAndroid Build Coastguard Worker }
1813*61046927SAndroid Build Coastguard Worker
1814*61046927SAndroid Build Coastguard Worker static uint32_t
instruction_latency(const struct v3d_device_info * devinfo,struct schedule_node * before,struct schedule_node * after)1815*61046927SAndroid Build Coastguard Worker instruction_latency(const struct v3d_device_info *devinfo,
1816*61046927SAndroid Build Coastguard Worker struct schedule_node *before, struct schedule_node *after)
1817*61046927SAndroid Build Coastguard Worker {
1818*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *before_inst = &before->inst->qpu;
1819*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *after_inst = &after->inst->qpu;
1820*61046927SAndroid Build Coastguard Worker uint32_t latency = 1;
1821*61046927SAndroid Build Coastguard Worker
1822*61046927SAndroid Build Coastguard Worker if (before_inst->type != V3D_QPU_INSTR_TYPE_ALU ||
1823*61046927SAndroid Build Coastguard Worker after_inst->type != V3D_QPU_INSTR_TYPE_ALU)
1824*61046927SAndroid Build Coastguard Worker return latency;
1825*61046927SAndroid Build Coastguard Worker
1826*61046927SAndroid Build Coastguard Worker if (v3d_qpu_instr_is_sfu(before_inst))
1827*61046927SAndroid Build Coastguard Worker return 2;
1828*61046927SAndroid Build Coastguard Worker
1829*61046927SAndroid Build Coastguard Worker if (before_inst->alu.add.op != V3D_QPU_A_NOP &&
1830*61046927SAndroid Build Coastguard Worker before_inst->alu.add.magic_write) {
1831*61046927SAndroid Build Coastguard Worker latency = MAX2(latency,
1832*61046927SAndroid Build Coastguard Worker magic_waddr_latency(devinfo,
1833*61046927SAndroid Build Coastguard Worker before_inst->alu.add.waddr,
1834*61046927SAndroid Build Coastguard Worker after_inst));
1835*61046927SAndroid Build Coastguard Worker }
1836*61046927SAndroid Build Coastguard Worker
1837*61046927SAndroid Build Coastguard Worker if (before_inst->alu.mul.op != V3D_QPU_M_NOP &&
1838*61046927SAndroid Build Coastguard Worker before_inst->alu.mul.magic_write) {
1839*61046927SAndroid Build Coastguard Worker latency = MAX2(latency,
1840*61046927SAndroid Build Coastguard Worker magic_waddr_latency(devinfo,
1841*61046927SAndroid Build Coastguard Worker before_inst->alu.mul.waddr,
1842*61046927SAndroid Build Coastguard Worker after_inst));
1843*61046927SAndroid Build Coastguard Worker }
1844*61046927SAndroid Build Coastguard Worker
1845*61046927SAndroid Build Coastguard Worker return latency;
1846*61046927SAndroid Build Coastguard Worker }
1847*61046927SAndroid Build Coastguard Worker
1848*61046927SAndroid Build Coastguard Worker /** Recursive computation of the delay member of a node. */
1849*61046927SAndroid Build Coastguard Worker static void
compute_delay(struct dag_node * node,void * state)1850*61046927SAndroid Build Coastguard Worker compute_delay(struct dag_node *node, void *state)
1851*61046927SAndroid Build Coastguard Worker {
1852*61046927SAndroid Build Coastguard Worker struct schedule_node *n = (struct schedule_node *)node;
1853*61046927SAndroid Build Coastguard Worker struct v3d_compile *c = (struct v3d_compile *) state;
1854*61046927SAndroid Build Coastguard Worker
1855*61046927SAndroid Build Coastguard Worker n->delay = 1;
1856*61046927SAndroid Build Coastguard Worker
1857*61046927SAndroid Build Coastguard Worker util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
1858*61046927SAndroid Build Coastguard Worker struct schedule_node *child =
1859*61046927SAndroid Build Coastguard Worker (struct schedule_node *)edge->child;
1860*61046927SAndroid Build Coastguard Worker
1861*61046927SAndroid Build Coastguard Worker n->delay = MAX2(n->delay, (child->delay +
1862*61046927SAndroid Build Coastguard Worker instruction_latency(c->devinfo, n,
1863*61046927SAndroid Build Coastguard Worker child)));
1864*61046927SAndroid Build Coastguard Worker }
1865*61046927SAndroid Build Coastguard Worker }
1866*61046927SAndroid Build Coastguard Worker
1867*61046927SAndroid Build Coastguard Worker /* Removes a DAG head, but removing only the WAR edges. (dag_prune_head()
1868*61046927SAndroid Build Coastguard Worker * should be called on it later to finish pruning the other edges).
1869*61046927SAndroid Build Coastguard Worker */
1870*61046927SAndroid Build Coastguard Worker static void
pre_remove_head(struct dag * dag,struct schedule_node * n)1871*61046927SAndroid Build Coastguard Worker pre_remove_head(struct dag *dag, struct schedule_node *n)
1872*61046927SAndroid Build Coastguard Worker {
1873*61046927SAndroid Build Coastguard Worker list_delinit(&n->dag.link);
1874*61046927SAndroid Build Coastguard Worker
1875*61046927SAndroid Build Coastguard Worker util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
1876*61046927SAndroid Build Coastguard Worker if (edge->data)
1877*61046927SAndroid Build Coastguard Worker dag_remove_edge(dag, edge);
1878*61046927SAndroid Build Coastguard Worker }
1879*61046927SAndroid Build Coastguard Worker }
1880*61046927SAndroid Build Coastguard Worker
1881*61046927SAndroid Build Coastguard Worker static void
mark_instruction_scheduled(const struct v3d_device_info * devinfo,struct dag * dag,uint32_t time,struct schedule_node * node)1882*61046927SAndroid Build Coastguard Worker mark_instruction_scheduled(const struct v3d_device_info *devinfo,
1883*61046927SAndroid Build Coastguard Worker struct dag *dag,
1884*61046927SAndroid Build Coastguard Worker uint32_t time,
1885*61046927SAndroid Build Coastguard Worker struct schedule_node *node)
1886*61046927SAndroid Build Coastguard Worker {
1887*61046927SAndroid Build Coastguard Worker if (!node)
1888*61046927SAndroid Build Coastguard Worker return;
1889*61046927SAndroid Build Coastguard Worker
1890*61046927SAndroid Build Coastguard Worker util_dynarray_foreach(&node->dag.edges, struct dag_edge, edge) {
1891*61046927SAndroid Build Coastguard Worker struct schedule_node *child =
1892*61046927SAndroid Build Coastguard Worker (struct schedule_node *)edge->child;
1893*61046927SAndroid Build Coastguard Worker
1894*61046927SAndroid Build Coastguard Worker if (!child)
1895*61046927SAndroid Build Coastguard Worker continue;
1896*61046927SAndroid Build Coastguard Worker
1897*61046927SAndroid Build Coastguard Worker uint32_t latency = instruction_latency(devinfo, node, child);
1898*61046927SAndroid Build Coastguard Worker
1899*61046927SAndroid Build Coastguard Worker child->unblocked_time = MAX2(child->unblocked_time,
1900*61046927SAndroid Build Coastguard Worker time + latency);
1901*61046927SAndroid Build Coastguard Worker }
1902*61046927SAndroid Build Coastguard Worker dag_prune_head(dag, &node->dag);
1903*61046927SAndroid Build Coastguard Worker }
1904*61046927SAndroid Build Coastguard Worker
1905*61046927SAndroid Build Coastguard Worker static void
insert_scheduled_instruction(struct v3d_compile * c,struct qblock * block,struct choose_scoreboard * scoreboard,struct qinst * inst)1906*61046927SAndroid Build Coastguard Worker insert_scheduled_instruction(struct v3d_compile *c,
1907*61046927SAndroid Build Coastguard Worker struct qblock *block,
1908*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
1909*61046927SAndroid Build Coastguard Worker struct qinst *inst)
1910*61046927SAndroid Build Coastguard Worker {
1911*61046927SAndroid Build Coastguard Worker list_addtail(&inst->link, &block->instructions);
1912*61046927SAndroid Build Coastguard Worker
1913*61046927SAndroid Build Coastguard Worker update_scoreboard_for_chosen(scoreboard, inst, c->devinfo);
1914*61046927SAndroid Build Coastguard Worker c->qpu_inst_count++;
1915*61046927SAndroid Build Coastguard Worker scoreboard->tick++;
1916*61046927SAndroid Build Coastguard Worker }
1917*61046927SAndroid Build Coastguard Worker
1918*61046927SAndroid Build Coastguard Worker static struct qinst *
vir_nop()1919*61046927SAndroid Build Coastguard Worker vir_nop()
1920*61046927SAndroid Build Coastguard Worker {
1921*61046927SAndroid Build Coastguard Worker struct qreg undef = vir_nop_reg();
1922*61046927SAndroid Build Coastguard Worker struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
1923*61046927SAndroid Build Coastguard Worker
1924*61046927SAndroid Build Coastguard Worker return qinst;
1925*61046927SAndroid Build Coastguard Worker }
1926*61046927SAndroid Build Coastguard Worker
1927*61046927SAndroid Build Coastguard Worker static void
emit_nop(struct v3d_compile * c,struct qblock * block,struct choose_scoreboard * scoreboard)1928*61046927SAndroid Build Coastguard Worker emit_nop(struct v3d_compile *c, struct qblock *block,
1929*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard)
1930*61046927SAndroid Build Coastguard Worker {
1931*61046927SAndroid Build Coastguard Worker insert_scheduled_instruction(c, block, scoreboard, vir_nop());
1932*61046927SAndroid Build Coastguard Worker }
1933*61046927SAndroid Build Coastguard Worker
1934*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_valid_in_thrend_slot(struct v3d_compile * c,const struct qinst * qinst,int slot)1935*61046927SAndroid Build Coastguard Worker qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
1936*61046927SAndroid Build Coastguard Worker const struct qinst *qinst, int slot)
1937*61046927SAndroid Build Coastguard Worker {
1938*61046927SAndroid Build Coastguard Worker const struct v3d_qpu_instr *inst = &qinst->qpu;
1939*61046927SAndroid Build Coastguard Worker
1940*61046927SAndroid Build Coastguard Worker if (slot == 2 && qinst->is_tlb_z_write)
1941*61046927SAndroid Build Coastguard Worker return false;
1942*61046927SAndroid Build Coastguard Worker
1943*61046927SAndroid Build Coastguard Worker if (slot > 0 && qinst->uniform != ~0)
1944*61046927SAndroid Build Coastguard Worker return false;
1945*61046927SAndroid Build Coastguard Worker
1946*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver == 42 && v3d_qpu_waits_vpm(inst))
1947*61046927SAndroid Build Coastguard Worker return false;
1948*61046927SAndroid Build Coastguard Worker
1949*61046927SAndroid Build Coastguard Worker if (inst->sig.ldvary)
1950*61046927SAndroid Build Coastguard Worker return false;
1951*61046927SAndroid Build Coastguard Worker
1952*61046927SAndroid Build Coastguard Worker if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
1953*61046927SAndroid Build Coastguard Worker /* GFXH-1625: TMUWT not allowed in the final instruction. */
1954*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver == 42 && slot == 2 &&
1955*61046927SAndroid Build Coastguard Worker inst->alu.add.op == V3D_QPU_A_TMUWT) {
1956*61046927SAndroid Build Coastguard Worker return false;
1957*61046927SAndroid Build Coastguard Worker }
1958*61046927SAndroid Build Coastguard Worker
1959*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver == 42) {
1960*61046927SAndroid Build Coastguard Worker /* No writing physical registers at the end. */
1961*61046927SAndroid Build Coastguard Worker bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
1962*61046927SAndroid Build Coastguard Worker bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
1963*61046927SAndroid Build Coastguard Worker if ((!add_is_nop && !inst->alu.add.magic_write) ||
1964*61046927SAndroid Build Coastguard Worker (!mul_is_nop && !inst->alu.mul.magic_write)) {
1965*61046927SAndroid Build Coastguard Worker return false;
1966*61046927SAndroid Build Coastguard Worker }
1967*61046927SAndroid Build Coastguard Worker
1968*61046927SAndroid Build Coastguard Worker if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
1969*61046927SAndroid Build Coastguard Worker !inst->sig_magic) {
1970*61046927SAndroid Build Coastguard Worker return false;
1971*61046927SAndroid Build Coastguard Worker }
1972*61046927SAndroid Build Coastguard Worker }
1973*61046927SAndroid Build Coastguard Worker
1974*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver >= 71) {
1975*61046927SAndroid Build Coastguard Worker /* The thread end instruction must not write to the
1976*61046927SAndroid Build Coastguard Worker * register file via the add/mul ALUs.
1977*61046927SAndroid Build Coastguard Worker */
1978*61046927SAndroid Build Coastguard Worker if (slot == 0 &&
1979*61046927SAndroid Build Coastguard Worker (!inst->alu.add.magic_write ||
1980*61046927SAndroid Build Coastguard Worker !inst->alu.mul.magic_write)) {
1981*61046927SAndroid Build Coastguard Worker return false;
1982*61046927SAndroid Build Coastguard Worker }
1983*61046927SAndroid Build Coastguard Worker }
1984*61046927SAndroid Build Coastguard Worker
1985*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver == 42) {
1986*61046927SAndroid Build Coastguard Worker /* RF0-2 might be overwritten during the delay slots by
1987*61046927SAndroid Build Coastguard Worker * fragment shader setup.
1988*61046927SAndroid Build Coastguard Worker */
1989*61046927SAndroid Build Coastguard Worker if (inst->raddr_a < 3 && v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A))
1990*61046927SAndroid Build Coastguard Worker return false;
1991*61046927SAndroid Build Coastguard Worker
1992*61046927SAndroid Build Coastguard Worker if (inst->raddr_b < 3 &&
1993*61046927SAndroid Build Coastguard Worker !inst->sig.small_imm_b &&
1994*61046927SAndroid Build Coastguard Worker v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
1995*61046927SAndroid Build Coastguard Worker return false;
1996*61046927SAndroid Build Coastguard Worker }
1997*61046927SAndroid Build Coastguard Worker }
1998*61046927SAndroid Build Coastguard Worker
1999*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver >= 71) {
2000*61046927SAndroid Build Coastguard Worker /* RF2-3 might be overwritten during the delay slots by
2001*61046927SAndroid Build Coastguard Worker * fragment shader setup.
2002*61046927SAndroid Build Coastguard Worker */
2003*61046927SAndroid Build Coastguard Worker if (v3d71_qpu_reads_raddr(inst, 2) ||
2004*61046927SAndroid Build Coastguard Worker v3d71_qpu_reads_raddr(inst, 3)) {
2005*61046927SAndroid Build Coastguard Worker return false;
2006*61046927SAndroid Build Coastguard Worker }
2007*61046927SAndroid Build Coastguard Worker
2008*61046927SAndroid Build Coastguard Worker if (v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 2) ||
2009*61046927SAndroid Build Coastguard Worker v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 3)) {
2010*61046927SAndroid Build Coastguard Worker return false;
2011*61046927SAndroid Build Coastguard Worker }
2012*61046927SAndroid Build Coastguard Worker }
2013*61046927SAndroid Build Coastguard Worker }
2014*61046927SAndroid Build Coastguard Worker
2015*61046927SAndroid Build Coastguard Worker return true;
2016*61046927SAndroid Build Coastguard Worker }
2017*61046927SAndroid Build Coastguard Worker
2018*61046927SAndroid Build Coastguard Worker /**
2019*61046927SAndroid Build Coastguard Worker * This is called when trying to merge a thrsw back into the instruction stream
2020*61046927SAndroid Build Coastguard Worker * of instructions that were scheduled *before* the thrsw signal to fill its
2021*61046927SAndroid Build Coastguard Worker * delay slots. Because the actual execution of the thrsw happens after the
2022*61046927SAndroid Build Coastguard Worker * delay slots, it is usually safe to do this, but there are some cases that
2023*61046927SAndroid Build Coastguard Worker * need special care.
2024*61046927SAndroid Build Coastguard Worker */
2025*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile * c,struct choose_scoreboard * scoreboard,const struct qinst * qinst,uint32_t slot)2026*61046927SAndroid Build Coastguard Worker qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
2027*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
2028*61046927SAndroid Build Coastguard Worker const struct qinst *qinst,
2029*61046927SAndroid Build Coastguard Worker uint32_t slot)
2030*61046927SAndroid Build Coastguard Worker {
2031*61046927SAndroid Build Coastguard Worker /* No scheduling SFU when the result would land in the other
2032*61046927SAndroid Build Coastguard Worker * thread. The simulator complains for safety, though it
2033*61046927SAndroid Build Coastguard Worker * would only occur for dead code in our case.
2034*61046927SAndroid Build Coastguard Worker */
2035*61046927SAndroid Build Coastguard Worker if (slot > 0) {
2036*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver == 42 && v3d_qpu_instr_is_legacy_sfu(&qinst->qpu))
2037*61046927SAndroid Build Coastguard Worker return false;
2038*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver >= 71 && v3d_qpu_instr_is_sfu(&qinst->qpu))
2039*61046927SAndroid Build Coastguard Worker return false;
2040*61046927SAndroid Build Coastguard Worker }
2041*61046927SAndroid Build Coastguard Worker
2042*61046927SAndroid Build Coastguard Worker if (qinst->qpu.sig.ldvary) {
2043*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver == 42 && slot > 0)
2044*61046927SAndroid Build Coastguard Worker return false;
2045*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver >= 71 && slot == 2)
2046*61046927SAndroid Build Coastguard Worker return false;
2047*61046927SAndroid Build Coastguard Worker }
2048*61046927SAndroid Build Coastguard Worker
2049*61046927SAndroid Build Coastguard Worker /* unifa and the following 3 instructions can't overlap a
2050*61046927SAndroid Build Coastguard Worker * thread switch/end. The docs further clarify that this means
2051*61046927SAndroid Build Coastguard Worker * the cycle at which the actual thread switch/end happens
2052*61046927SAndroid Build Coastguard Worker * and not when the thrsw instruction is processed, which would
2053*61046927SAndroid Build Coastguard Worker * be after the 2 delay slots following the thrsw instruction.
2054*61046927SAndroid Build Coastguard Worker * This means that we can move up a thrsw up to the instruction
2055*61046927SAndroid Build Coastguard Worker * right after unifa:
2056*61046927SAndroid Build Coastguard Worker *
2057*61046927SAndroid Build Coastguard Worker * unifa, r5
2058*61046927SAndroid Build Coastguard Worker * thrsw
2059*61046927SAndroid Build Coastguard Worker * delay slot 1
2060*61046927SAndroid Build Coastguard Worker * delay slot 2
2061*61046927SAndroid Build Coastguard Worker * Thread switch happens here, 4 instructions away from unifa
2062*61046927SAndroid Build Coastguard Worker */
2063*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_unifa(c->devinfo, &qinst->qpu))
2064*61046927SAndroid Build Coastguard Worker return false;
2065*61046927SAndroid Build Coastguard Worker
2066*61046927SAndroid Build Coastguard Worker /* See comment when we set has_rf0_flops_conflict for details */
2067*61046927SAndroid Build Coastguard Worker if (c->devinfo->ver >= 71 &&
2068*61046927SAndroid Build Coastguard Worker slot == 2 &&
2069*61046927SAndroid Build Coastguard Worker v3d_qpu_sig_writes_address(c->devinfo, &qinst->qpu.sig) &&
2070*61046927SAndroid Build Coastguard Worker !qinst->qpu.sig_magic) {
2071*61046927SAndroid Build Coastguard Worker if (scoreboard->has_rf0_flops_conflict)
2072*61046927SAndroid Build Coastguard Worker return false;
2073*61046927SAndroid Build Coastguard Worker if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick)
2074*61046927SAndroid Build Coastguard Worker return false;
2075*61046927SAndroid Build Coastguard Worker }
2076*61046927SAndroid Build Coastguard Worker
2077*61046927SAndroid Build Coastguard Worker return true;
2078*61046927SAndroid Build Coastguard Worker }
2079*61046927SAndroid Build Coastguard Worker
2080*61046927SAndroid Build Coastguard Worker /**
2081*61046927SAndroid Build Coastguard Worker * This is called for instructions scheduled *after* a thrsw signal that may
2082*61046927SAndroid Build Coastguard Worker * land in the delay slots of the thrsw. Because these instructions were
2083*61046927SAndroid Build Coastguard Worker * scheduled after the thrsw, we need to be careful when placing them into
2084*61046927SAndroid Build Coastguard Worker * the delay slots, since that means that we are moving them ahead of the
2085*61046927SAndroid Build Coastguard Worker * thread switch and we need to ensure that is not a problem.
2086*61046927SAndroid Build Coastguard Worker */
2087*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile * c,struct choose_scoreboard * scoreboard,const struct qinst * qinst)2088*61046927SAndroid Build Coastguard Worker qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile *c,
2089*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
2090*61046927SAndroid Build Coastguard Worker const struct qinst *qinst)
2091*61046927SAndroid Build Coastguard Worker {
2092*61046927SAndroid Build Coastguard Worker const uint32_t slot = scoreboard->tick - scoreboard->last_thrsw_tick;
2093*61046927SAndroid Build Coastguard Worker assert(slot <= 2);
2094*61046927SAndroid Build Coastguard Worker
2095*61046927SAndroid Build Coastguard Worker /* We merge thrsw instructions back into the instruction stream
2096*61046927SAndroid Build Coastguard Worker * manually, so any instructions scheduled after a thrsw should be
2097*61046927SAndroid Build Coastguard Worker * in the actual delay slots and not in the same slot as the thrsw.
2098*61046927SAndroid Build Coastguard Worker */
2099*61046927SAndroid Build Coastguard Worker assert(slot >= 1);
2100*61046927SAndroid Build Coastguard Worker
2101*61046927SAndroid Build Coastguard Worker /* No emitting a thrsw while the previous thrsw hasn't happened yet. */
2102*61046927SAndroid Build Coastguard Worker if (qinst->qpu.sig.thrsw)
2103*61046927SAndroid Build Coastguard Worker return false;
2104*61046927SAndroid Build Coastguard Worker
2105*61046927SAndroid Build Coastguard Worker /* The restrictions for instructions scheduled before the the thrsw
2106*61046927SAndroid Build Coastguard Worker * also apply to instructions scheduled after the thrsw that we want
2107*61046927SAndroid Build Coastguard Worker * to place in its delay slots.
2108*61046927SAndroid Build Coastguard Worker */
2109*61046927SAndroid Build Coastguard Worker if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard, qinst, slot))
2110*61046927SAndroid Build Coastguard Worker return false;
2111*61046927SAndroid Build Coastguard Worker
2112*61046927SAndroid Build Coastguard Worker /* TLB access is disallowed until scoreboard wait is executed, which
2113*61046927SAndroid Build Coastguard Worker * we do on the last thread switch.
2114*61046927SAndroid Build Coastguard Worker */
2115*61046927SAndroid Build Coastguard Worker if (qpu_inst_is_tlb(&qinst->qpu))
2116*61046927SAndroid Build Coastguard Worker return false;
2117*61046927SAndroid Build Coastguard Worker
2118*61046927SAndroid Build Coastguard Worker /* Instruction sequence restrictions: Branch is not allowed in delay
2119*61046927SAndroid Build Coastguard Worker * slots of a thrsw.
2120*61046927SAndroid Build Coastguard Worker */
2121*61046927SAndroid Build Coastguard Worker if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
2122*61046927SAndroid Build Coastguard Worker return false;
2123*61046927SAndroid Build Coastguard Worker
2124*61046927SAndroid Build Coastguard Worker /* Miscellaneous restrictions: At the point of a thrsw we need to have
2125*61046927SAndroid Build Coastguard Worker * at least one outstanding lookup or TSY wait.
2126*61046927SAndroid Build Coastguard Worker *
2127*61046927SAndroid Build Coastguard Worker * So avoid placing TMU instructions scheduled after the thrsw into
2128*61046927SAndroid Build Coastguard Worker * its delay slots or we may be compromising the integrity of our TMU
2129*61046927SAndroid Build Coastguard Worker * sequences. Also, notice that if we moved these instructions into
2130*61046927SAndroid Build Coastguard Worker * the delay slots of a previous thrsw we could overflow our TMU output
2131*61046927SAndroid Build Coastguard Worker * fifo, since we could be effectively pipelining a lookup scheduled
2132*61046927SAndroid Build Coastguard Worker * after the thrsw into the sequence before the thrsw.
2133*61046927SAndroid Build Coastguard Worker */
2134*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_tmu(c->devinfo, &qinst->qpu) ||
2135*61046927SAndroid Build Coastguard Worker qinst->qpu.sig.wrtmuc) {
2136*61046927SAndroid Build Coastguard Worker return false;
2137*61046927SAndroid Build Coastguard Worker }
2138*61046927SAndroid Build Coastguard Worker
2139*61046927SAndroid Build Coastguard Worker /* Don't move instructions that wait on the TMU before the thread switch
2140*61046927SAndroid Build Coastguard Worker * happens since that would make the current thread stall before the
2141*61046927SAndroid Build Coastguard Worker * switch, which is exactly what we want to avoid with the thrsw
2142*61046927SAndroid Build Coastguard Worker * instruction.
2143*61046927SAndroid Build Coastguard Worker */
2144*61046927SAndroid Build Coastguard Worker if (v3d_qpu_waits_on_tmu(&qinst->qpu))
2145*61046927SAndroid Build Coastguard Worker return false;
2146*61046927SAndroid Build Coastguard Worker
2147*61046927SAndroid Build Coastguard Worker /* A thread switch invalidates all accumulators, so don't place any
2148*61046927SAndroid Build Coastguard Worker * instructions that write accumulators into the delay slots.
2149*61046927SAndroid Build Coastguard Worker */
2150*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_accum(c->devinfo, &qinst->qpu))
2151*61046927SAndroid Build Coastguard Worker return false;
2152*61046927SAndroid Build Coastguard Worker
2153*61046927SAndroid Build Coastguard Worker /* Multop has an implicit write to the rtop register which is an
2154*61046927SAndroid Build Coastguard Worker * specialized accumulator that is only used with this instruction.
2155*61046927SAndroid Build Coastguard Worker */
2156*61046927SAndroid Build Coastguard Worker if (qinst->qpu.alu.mul.op == V3D_QPU_M_MULTOP)
2157*61046927SAndroid Build Coastguard Worker return false;
2158*61046927SAndroid Build Coastguard Worker
2159*61046927SAndroid Build Coastguard Worker /* Flags are invalidated across a thread switch, so dont' place
2160*61046927SAndroid Build Coastguard Worker * instructions that write flags into delay slots.
2161*61046927SAndroid Build Coastguard Worker */
2162*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_flags(&qinst->qpu))
2163*61046927SAndroid Build Coastguard Worker return false;
2164*61046927SAndroid Build Coastguard Worker
2165*61046927SAndroid Build Coastguard Worker /* TSY sync ops materialize at the point of the next thread switch,
2166*61046927SAndroid Build Coastguard Worker * therefore, if we have a TSY sync right after a thread switch, we
2167*61046927SAndroid Build Coastguard Worker * cannot place it in its delay slots, or we would be moving the sync
2168*61046927SAndroid Build Coastguard Worker * to the thrsw before it instead.
2169*61046927SAndroid Build Coastguard Worker */
2170*61046927SAndroid Build Coastguard Worker if (qinst->qpu.alu.add.op == V3D_QPU_A_BARRIERID)
2171*61046927SAndroid Build Coastguard Worker return false;
2172*61046927SAndroid Build Coastguard Worker
2173*61046927SAndroid Build Coastguard Worker return true;
2174*61046927SAndroid Build Coastguard Worker }
2175*61046927SAndroid Build Coastguard Worker
2176*61046927SAndroid Build Coastguard Worker static bool
valid_thrsw_sequence(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct qinst * qinst,int instructions_in_sequence,bool is_thrend)2177*61046927SAndroid Build Coastguard Worker valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard,
2178*61046927SAndroid Build Coastguard Worker struct qinst *qinst, int instructions_in_sequence,
2179*61046927SAndroid Build Coastguard Worker bool is_thrend)
2180*61046927SAndroid Build Coastguard Worker {
2181*61046927SAndroid Build Coastguard Worker for (int slot = 0; slot < instructions_in_sequence; slot++) {
2182*61046927SAndroid Build Coastguard Worker if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard,
2183*61046927SAndroid Build Coastguard Worker qinst, slot)) {
2184*61046927SAndroid Build Coastguard Worker return false;
2185*61046927SAndroid Build Coastguard Worker }
2186*61046927SAndroid Build Coastguard Worker
2187*61046927SAndroid Build Coastguard Worker if (is_thrend &&
2188*61046927SAndroid Build Coastguard Worker !qpu_inst_valid_in_thrend_slot(c, qinst, slot)) {
2189*61046927SAndroid Build Coastguard Worker return false;
2190*61046927SAndroid Build Coastguard Worker }
2191*61046927SAndroid Build Coastguard Worker
2192*61046927SAndroid Build Coastguard Worker /* Note that the list is circular, so we can only do this up
2193*61046927SAndroid Build Coastguard Worker * to instructions_in_sequence.
2194*61046927SAndroid Build Coastguard Worker */
2195*61046927SAndroid Build Coastguard Worker qinst = (struct qinst *)qinst->link.next;
2196*61046927SAndroid Build Coastguard Worker }
2197*61046927SAndroid Build Coastguard Worker
2198*61046927SAndroid Build Coastguard Worker return true;
2199*61046927SAndroid Build Coastguard Worker }
2200*61046927SAndroid Build Coastguard Worker
2201*61046927SAndroid Build Coastguard Worker /**
2202*61046927SAndroid Build Coastguard Worker * Emits a THRSW signal in the stream, trying to move it up to pair with
2203*61046927SAndroid Build Coastguard Worker * another instruction.
2204*61046927SAndroid Build Coastguard Worker */
2205*61046927SAndroid Build Coastguard Worker static int
emit_thrsw(struct v3d_compile * c,struct qblock * block,struct choose_scoreboard * scoreboard,struct qinst * inst,bool is_thrend)2206*61046927SAndroid Build Coastguard Worker emit_thrsw(struct v3d_compile *c,
2207*61046927SAndroid Build Coastguard Worker struct qblock *block,
2208*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
2209*61046927SAndroid Build Coastguard Worker struct qinst *inst,
2210*61046927SAndroid Build Coastguard Worker bool is_thrend)
2211*61046927SAndroid Build Coastguard Worker {
2212*61046927SAndroid Build Coastguard Worker int time = 0;
2213*61046927SAndroid Build Coastguard Worker
2214*61046927SAndroid Build Coastguard Worker /* There should be nothing in a thrsw inst being scheduled other than
2215*61046927SAndroid Build Coastguard Worker * the signal bits.
2216*61046927SAndroid Build Coastguard Worker */
2217*61046927SAndroid Build Coastguard Worker assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
2218*61046927SAndroid Build Coastguard Worker assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP);
2219*61046927SAndroid Build Coastguard Worker assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP);
2220*61046927SAndroid Build Coastguard Worker
2221*61046927SAndroid Build Coastguard Worker /* Don't try to emit a thrsw in the delay slots of a previous thrsw
2222*61046927SAndroid Build Coastguard Worker * or branch.
2223*61046927SAndroid Build Coastguard Worker */
2224*61046927SAndroid Build Coastguard Worker while (scoreboard->last_thrsw_tick + 2 >= scoreboard->tick) {
2225*61046927SAndroid Build Coastguard Worker emit_nop(c, block, scoreboard);
2226*61046927SAndroid Build Coastguard Worker time++;
2227*61046927SAndroid Build Coastguard Worker }
2228*61046927SAndroid Build Coastguard Worker while (scoreboard->last_branch_tick + 3 >= scoreboard->tick) {
2229*61046927SAndroid Build Coastguard Worker emit_nop(c, block, scoreboard);
2230*61046927SAndroid Build Coastguard Worker time++;
2231*61046927SAndroid Build Coastguard Worker }
2232*61046927SAndroid Build Coastguard Worker
2233*61046927SAndroid Build Coastguard Worker /* Find how far back into previous instructions we can put the THRSW. */
2234*61046927SAndroid Build Coastguard Worker int slots_filled = 0;
2235*61046927SAndroid Build Coastguard Worker int invalid_sig_count = 0;
2236*61046927SAndroid Build Coastguard Worker int invalid_seq_count = 0;
2237*61046927SAndroid Build Coastguard Worker bool last_thrsw_after_invalid_ok = false;
2238*61046927SAndroid Build Coastguard Worker struct qinst *merge_inst = NULL;
2239*61046927SAndroid Build Coastguard Worker vir_for_each_inst_rev(prev_inst, block) {
2240*61046927SAndroid Build Coastguard Worker /* No emitting our thrsw while the previous thrsw hasn't
2241*61046927SAndroid Build Coastguard Worker * happened yet.
2242*61046927SAndroid Build Coastguard Worker */
2243*61046927SAndroid Build Coastguard Worker if (scoreboard->last_thrsw_tick + 3 >
2244*61046927SAndroid Build Coastguard Worker scoreboard->tick - (slots_filled + 1)) {
2245*61046927SAndroid Build Coastguard Worker break;
2246*61046927SAndroid Build Coastguard Worker }
2247*61046927SAndroid Build Coastguard Worker
2248*61046927SAndroid Build Coastguard Worker
2249*61046927SAndroid Build Coastguard Worker if (!valid_thrsw_sequence(c, scoreboard,
2250*61046927SAndroid Build Coastguard Worker prev_inst, slots_filled + 1,
2251*61046927SAndroid Build Coastguard Worker is_thrend)) {
2252*61046927SAndroid Build Coastguard Worker /* Even if the current sequence isn't valid, we may
2253*61046927SAndroid Build Coastguard Worker * be able to get a valid sequence by trying to move the
2254*61046927SAndroid Build Coastguard Worker * thrsw earlier, so keep going.
2255*61046927SAndroid Build Coastguard Worker */
2256*61046927SAndroid Build Coastguard Worker invalid_seq_count++;
2257*61046927SAndroid Build Coastguard Worker goto cont_block;
2258*61046927SAndroid Build Coastguard Worker }
2259*61046927SAndroid Build Coastguard Worker
2260*61046927SAndroid Build Coastguard Worker struct v3d_qpu_sig sig = prev_inst->qpu.sig;
2261*61046927SAndroid Build Coastguard Worker sig.thrsw = true;
2262*61046927SAndroid Build Coastguard Worker uint32_t packed_sig;
2263*61046927SAndroid Build Coastguard Worker if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig)) {
2264*61046927SAndroid Build Coastguard Worker /* If we can't merge the thrsw here because of signal
2265*61046927SAndroid Build Coastguard Worker * incompatibility, keep going, we might be able to
2266*61046927SAndroid Build Coastguard Worker * merge it in an earlier instruction.
2267*61046927SAndroid Build Coastguard Worker */
2268*61046927SAndroid Build Coastguard Worker invalid_sig_count++;
2269*61046927SAndroid Build Coastguard Worker goto cont_block;
2270*61046927SAndroid Build Coastguard Worker }
2271*61046927SAndroid Build Coastguard Worker
2272*61046927SAndroid Build Coastguard Worker /* For last thrsw we need 2 consecutive slots that are
2273*61046927SAndroid Build Coastguard Worker * thrsw compatible, so if we have previously jumped over
2274*61046927SAndroid Build Coastguard Worker * an incompatible signal, flag that we have found the first
2275*61046927SAndroid Build Coastguard Worker * valid slot here and keep going.
2276*61046927SAndroid Build Coastguard Worker */
2277*61046927SAndroid Build Coastguard Worker if (inst->is_last_thrsw && invalid_sig_count > 0 &&
2278*61046927SAndroid Build Coastguard Worker !last_thrsw_after_invalid_ok) {
2279*61046927SAndroid Build Coastguard Worker last_thrsw_after_invalid_ok = true;
2280*61046927SAndroid Build Coastguard Worker invalid_sig_count++;
2281*61046927SAndroid Build Coastguard Worker goto cont_block;
2282*61046927SAndroid Build Coastguard Worker }
2283*61046927SAndroid Build Coastguard Worker
2284*61046927SAndroid Build Coastguard Worker /* We can merge the thrsw in this instruction */
2285*61046927SAndroid Build Coastguard Worker last_thrsw_after_invalid_ok = false;
2286*61046927SAndroid Build Coastguard Worker invalid_sig_count = 0;
2287*61046927SAndroid Build Coastguard Worker invalid_seq_count = 0;
2288*61046927SAndroid Build Coastguard Worker merge_inst = prev_inst;
2289*61046927SAndroid Build Coastguard Worker
2290*61046927SAndroid Build Coastguard Worker cont_block:
2291*61046927SAndroid Build Coastguard Worker if (++slots_filled == 3)
2292*61046927SAndroid Build Coastguard Worker break;
2293*61046927SAndroid Build Coastguard Worker }
2294*61046927SAndroid Build Coastguard Worker
2295*61046927SAndroid Build Coastguard Worker /* If we jumped over a signal incompatibility and did not manage to
2296*61046927SAndroid Build Coastguard Worker * merge the thrsw in the end, we need to adjust slots filled to match
2297*61046927SAndroid Build Coastguard Worker * the last valid merge point.
2298*61046927SAndroid Build Coastguard Worker */
2299*61046927SAndroid Build Coastguard Worker assert((invalid_sig_count == 0 && invalid_seq_count == 0) ||
2300*61046927SAndroid Build Coastguard Worker slots_filled >= invalid_sig_count + invalid_seq_count);
2301*61046927SAndroid Build Coastguard Worker if (invalid_sig_count > 0)
2302*61046927SAndroid Build Coastguard Worker slots_filled -= invalid_sig_count;
2303*61046927SAndroid Build Coastguard Worker if (invalid_seq_count > 0)
2304*61046927SAndroid Build Coastguard Worker slots_filled -= invalid_seq_count;
2305*61046927SAndroid Build Coastguard Worker
2306*61046927SAndroid Build Coastguard Worker bool needs_free = false;
2307*61046927SAndroid Build Coastguard Worker if (merge_inst) {
2308*61046927SAndroid Build Coastguard Worker merge_inst->qpu.sig.thrsw = true;
2309*61046927SAndroid Build Coastguard Worker needs_free = true;
2310*61046927SAndroid Build Coastguard Worker scoreboard->last_thrsw_tick = scoreboard->tick - slots_filled;
2311*61046927SAndroid Build Coastguard Worker } else {
2312*61046927SAndroid Build Coastguard Worker scoreboard->last_thrsw_tick = scoreboard->tick;
2313*61046927SAndroid Build Coastguard Worker insert_scheduled_instruction(c, block, scoreboard, inst);
2314*61046927SAndroid Build Coastguard Worker time++;
2315*61046927SAndroid Build Coastguard Worker slots_filled++;
2316*61046927SAndroid Build Coastguard Worker merge_inst = inst;
2317*61046927SAndroid Build Coastguard Worker }
2318*61046927SAndroid Build Coastguard Worker
2319*61046927SAndroid Build Coastguard Worker scoreboard->first_thrsw_emitted = true;
2320*61046927SAndroid Build Coastguard Worker
2321*61046927SAndroid Build Coastguard Worker /* If we're emitting the last THRSW (other than program end), then
2322*61046927SAndroid Build Coastguard Worker * signal that to the HW by emitting two THRSWs in a row.
2323*61046927SAndroid Build Coastguard Worker */
2324*61046927SAndroid Build Coastguard Worker if (inst->is_last_thrsw) {
2325*61046927SAndroid Build Coastguard Worker if (slots_filled <= 1) {
2326*61046927SAndroid Build Coastguard Worker emit_nop(c, block, scoreboard);
2327*61046927SAndroid Build Coastguard Worker time++;
2328*61046927SAndroid Build Coastguard Worker }
2329*61046927SAndroid Build Coastguard Worker struct qinst *second_inst =
2330*61046927SAndroid Build Coastguard Worker (struct qinst *)merge_inst->link.next;
2331*61046927SAndroid Build Coastguard Worker second_inst->qpu.sig.thrsw = true;
2332*61046927SAndroid Build Coastguard Worker scoreboard->last_thrsw_emitted = true;
2333*61046927SAndroid Build Coastguard Worker }
2334*61046927SAndroid Build Coastguard Worker
2335*61046927SAndroid Build Coastguard Worker /* Make sure the thread end executes within the program lifespan */
2336*61046927SAndroid Build Coastguard Worker if (is_thrend) {
2337*61046927SAndroid Build Coastguard Worker for (int i = 0; i < 3 - slots_filled; i++) {
2338*61046927SAndroid Build Coastguard Worker emit_nop(c, block, scoreboard);
2339*61046927SAndroid Build Coastguard Worker time++;
2340*61046927SAndroid Build Coastguard Worker }
2341*61046927SAndroid Build Coastguard Worker }
2342*61046927SAndroid Build Coastguard Worker
2343*61046927SAndroid Build Coastguard Worker /* If we put our THRSW into another instruction, free up the
2344*61046927SAndroid Build Coastguard Worker * instruction that didn't end up scheduled into the list.
2345*61046927SAndroid Build Coastguard Worker */
2346*61046927SAndroid Build Coastguard Worker if (needs_free)
2347*61046927SAndroid Build Coastguard Worker free(inst);
2348*61046927SAndroid Build Coastguard Worker
2349*61046927SAndroid Build Coastguard Worker return time;
2350*61046927SAndroid Build Coastguard Worker }
2351*61046927SAndroid Build Coastguard Worker
2352*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_valid_in_branch_delay_slot(struct v3d_compile * c,struct qinst * inst)2353*61046927SAndroid Build Coastguard Worker qpu_inst_valid_in_branch_delay_slot(struct v3d_compile *c, struct qinst *inst)
2354*61046927SAndroid Build Coastguard Worker {
2355*61046927SAndroid Build Coastguard Worker if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
2356*61046927SAndroid Build Coastguard Worker return false;
2357*61046927SAndroid Build Coastguard Worker
2358*61046927SAndroid Build Coastguard Worker if (inst->qpu.sig.thrsw)
2359*61046927SAndroid Build Coastguard Worker return false;
2360*61046927SAndroid Build Coastguard Worker
2361*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_unifa(c->devinfo, &inst->qpu))
2362*61046927SAndroid Build Coastguard Worker return false;
2363*61046927SAndroid Build Coastguard Worker
2364*61046927SAndroid Build Coastguard Worker if (vir_has_uniform(inst))
2365*61046927SAndroid Build Coastguard Worker return false;
2366*61046927SAndroid Build Coastguard Worker
2367*61046927SAndroid Build Coastguard Worker return true;
2368*61046927SAndroid Build Coastguard Worker }
2369*61046927SAndroid Build Coastguard Worker
2370*61046927SAndroid Build Coastguard Worker static void
emit_branch(struct v3d_compile * c,struct qblock * block,struct choose_scoreboard * scoreboard,struct qinst * inst)2371*61046927SAndroid Build Coastguard Worker emit_branch(struct v3d_compile *c,
2372*61046927SAndroid Build Coastguard Worker struct qblock *block,
2373*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
2374*61046927SAndroid Build Coastguard Worker struct qinst *inst)
2375*61046927SAndroid Build Coastguard Worker {
2376*61046927SAndroid Build Coastguard Worker assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
2377*61046927SAndroid Build Coastguard Worker
2378*61046927SAndroid Build Coastguard Worker /* We should've not picked up a branch for the delay slots of a previous
2379*61046927SAndroid Build Coastguard Worker * thrsw, branch or unifa write instruction.
2380*61046927SAndroid Build Coastguard Worker */
2381*61046927SAndroid Build Coastguard Worker int branch_tick = scoreboard->tick;
2382*61046927SAndroid Build Coastguard Worker assert(scoreboard->last_thrsw_tick + 2 < branch_tick);
2383*61046927SAndroid Build Coastguard Worker assert(scoreboard->last_branch_tick + 3 < branch_tick);
2384*61046927SAndroid Build Coastguard Worker assert(scoreboard->last_unifa_write_tick + 3 < branch_tick);
2385*61046927SAndroid Build Coastguard Worker
2386*61046927SAndroid Build Coastguard Worker /* V3D 4.x can't place a branch with msfign != 0 and cond != 0,2,3 after
2387*61046927SAndroid Build Coastguard Worker * setmsf.
2388*61046927SAndroid Build Coastguard Worker */
2389*61046927SAndroid Build Coastguard Worker bool is_safe_msf_branch =
2390*61046927SAndroid Build Coastguard Worker c->devinfo->ver >= 71 ||
2391*61046927SAndroid Build Coastguard Worker inst->qpu.branch.msfign == V3D_QPU_MSFIGN_NONE ||
2392*61046927SAndroid Build Coastguard Worker inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_ALWAYS ||
2393*61046927SAndroid Build Coastguard Worker inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_A0 ||
2394*61046927SAndroid Build Coastguard Worker inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_NA0;
2395*61046927SAndroid Build Coastguard Worker assert(scoreboard->last_setmsf_tick != branch_tick - 1 ||
2396*61046927SAndroid Build Coastguard Worker is_safe_msf_branch);
2397*61046927SAndroid Build Coastguard Worker
2398*61046927SAndroid Build Coastguard Worker /* Insert the branch instruction */
2399*61046927SAndroid Build Coastguard Worker insert_scheduled_instruction(c, block, scoreboard, inst);
2400*61046927SAndroid Build Coastguard Worker
2401*61046927SAndroid Build Coastguard Worker /* Now see if we can move the branch instruction back into the
2402*61046927SAndroid Build Coastguard Worker * instruction stream to fill its delay slots
2403*61046927SAndroid Build Coastguard Worker */
2404*61046927SAndroid Build Coastguard Worker int slots_filled = 0;
2405*61046927SAndroid Build Coastguard Worker while (slots_filled < 3 && block->instructions.next != &inst->link) {
2406*61046927SAndroid Build Coastguard Worker struct qinst *prev_inst = (struct qinst *) inst->link.prev;
2407*61046927SAndroid Build Coastguard Worker assert(prev_inst->qpu.type != V3D_QPU_INSTR_TYPE_BRANCH);
2408*61046927SAndroid Build Coastguard Worker
2409*61046927SAndroid Build Coastguard Worker /* Can't move the branch instruction if that would place it
2410*61046927SAndroid Build Coastguard Worker * in the delay slots of other instructions.
2411*61046927SAndroid Build Coastguard Worker */
2412*61046927SAndroid Build Coastguard Worker if (scoreboard->last_branch_tick + 3 >=
2413*61046927SAndroid Build Coastguard Worker branch_tick - slots_filled - 1) {
2414*61046927SAndroid Build Coastguard Worker break;
2415*61046927SAndroid Build Coastguard Worker }
2416*61046927SAndroid Build Coastguard Worker
2417*61046927SAndroid Build Coastguard Worker if (scoreboard->last_thrsw_tick + 2 >=
2418*61046927SAndroid Build Coastguard Worker branch_tick - slots_filled - 1) {
2419*61046927SAndroid Build Coastguard Worker break;
2420*61046927SAndroid Build Coastguard Worker }
2421*61046927SAndroid Build Coastguard Worker
2422*61046927SAndroid Build Coastguard Worker if (scoreboard->last_unifa_write_tick + 3 >=
2423*61046927SAndroid Build Coastguard Worker branch_tick - slots_filled - 1) {
2424*61046927SAndroid Build Coastguard Worker break;
2425*61046927SAndroid Build Coastguard Worker }
2426*61046927SAndroid Build Coastguard Worker
2427*61046927SAndroid Build Coastguard Worker /* Do not move up a branch if it can disrupt an ldvary sequence
2428*61046927SAndroid Build Coastguard Worker * as that can cause stomping of the r5 register.
2429*61046927SAndroid Build Coastguard Worker */
2430*61046927SAndroid Build Coastguard Worker if (scoreboard->last_ldvary_tick + 2 >=
2431*61046927SAndroid Build Coastguard Worker branch_tick - slots_filled) {
2432*61046927SAndroid Build Coastguard Worker break;
2433*61046927SAndroid Build Coastguard Worker }
2434*61046927SAndroid Build Coastguard Worker
2435*61046927SAndroid Build Coastguard Worker /* Can't move a conditional branch before the instruction
2436*61046927SAndroid Build Coastguard Worker * that writes the flags for its condition.
2437*61046927SAndroid Build Coastguard Worker */
2438*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_flags(&prev_inst->qpu) &&
2439*61046927SAndroid Build Coastguard Worker inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
2440*61046927SAndroid Build Coastguard Worker break;
2441*61046927SAndroid Build Coastguard Worker }
2442*61046927SAndroid Build Coastguard Worker
2443*61046927SAndroid Build Coastguard Worker if (!qpu_inst_valid_in_branch_delay_slot(c, prev_inst))
2444*61046927SAndroid Build Coastguard Worker break;
2445*61046927SAndroid Build Coastguard Worker
2446*61046927SAndroid Build Coastguard Worker if (!is_safe_msf_branch) {
2447*61046927SAndroid Build Coastguard Worker struct qinst *prev_prev_inst =
2448*61046927SAndroid Build Coastguard Worker (struct qinst *) prev_inst->link.prev;
2449*61046927SAndroid Build Coastguard Worker if (prev_prev_inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
2450*61046927SAndroid Build Coastguard Worker prev_prev_inst->qpu.alu.add.op == V3D_QPU_A_SETMSF) {
2451*61046927SAndroid Build Coastguard Worker break;
2452*61046927SAndroid Build Coastguard Worker }
2453*61046927SAndroid Build Coastguard Worker }
2454*61046927SAndroid Build Coastguard Worker
2455*61046927SAndroid Build Coastguard Worker list_del(&prev_inst->link);
2456*61046927SAndroid Build Coastguard Worker list_add(&prev_inst->link, &inst->link);
2457*61046927SAndroid Build Coastguard Worker slots_filled++;
2458*61046927SAndroid Build Coastguard Worker }
2459*61046927SAndroid Build Coastguard Worker
2460*61046927SAndroid Build Coastguard Worker block->branch_qpu_ip = c->qpu_inst_count - 1 - slots_filled;
2461*61046927SAndroid Build Coastguard Worker scoreboard->last_branch_tick = branch_tick - slots_filled;
2462*61046927SAndroid Build Coastguard Worker
2463*61046927SAndroid Build Coastguard Worker /* Fill any remaining delay slots.
2464*61046927SAndroid Build Coastguard Worker *
2465*61046927SAndroid Build Coastguard Worker * For unconditional branches we'll try to fill these with the
2466*61046927SAndroid Build Coastguard Worker * first instructions in the successor block after scheduling
2467*61046927SAndroid Build Coastguard Worker * all blocks when setting up branch targets.
2468*61046927SAndroid Build Coastguard Worker */
2469*61046927SAndroid Build Coastguard Worker for (int i = 0; i < 3 - slots_filled; i++)
2470*61046927SAndroid Build Coastguard Worker emit_nop(c, block, scoreboard);
2471*61046927SAndroid Build Coastguard Worker }
2472*61046927SAndroid Build Coastguard Worker
2473*61046927SAndroid Build Coastguard Worker static bool
alu_reads_register(const struct v3d_device_info * devinfo,struct v3d_qpu_instr * inst,bool add,bool magic,uint32_t index)2474*61046927SAndroid Build Coastguard Worker alu_reads_register(const struct v3d_device_info *devinfo,
2475*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr *inst,
2476*61046927SAndroid Build Coastguard Worker bool add, bool magic, uint32_t index)
2477*61046927SAndroid Build Coastguard Worker {
2478*61046927SAndroid Build Coastguard Worker uint32_t num_src;
2479*61046927SAndroid Build Coastguard Worker if (add)
2480*61046927SAndroid Build Coastguard Worker num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
2481*61046927SAndroid Build Coastguard Worker else
2482*61046927SAndroid Build Coastguard Worker num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
2483*61046927SAndroid Build Coastguard Worker
2484*61046927SAndroid Build Coastguard Worker if (devinfo->ver == 42) {
2485*61046927SAndroid Build Coastguard Worker enum v3d_qpu_mux mux_a, mux_b;
2486*61046927SAndroid Build Coastguard Worker if (add) {
2487*61046927SAndroid Build Coastguard Worker mux_a = inst->alu.add.a.mux;
2488*61046927SAndroid Build Coastguard Worker mux_b = inst->alu.add.b.mux;
2489*61046927SAndroid Build Coastguard Worker } else {
2490*61046927SAndroid Build Coastguard Worker mux_a = inst->alu.mul.a.mux;
2491*61046927SAndroid Build Coastguard Worker mux_b = inst->alu.mul.b.mux;
2492*61046927SAndroid Build Coastguard Worker }
2493*61046927SAndroid Build Coastguard Worker
2494*61046927SAndroid Build Coastguard Worker for (int i = 0; i < num_src; i++) {
2495*61046927SAndroid Build Coastguard Worker if (magic) {
2496*61046927SAndroid Build Coastguard Worker if (i == 0 && mux_a == index)
2497*61046927SAndroid Build Coastguard Worker return true;
2498*61046927SAndroid Build Coastguard Worker if (i == 1 && mux_b == index)
2499*61046927SAndroid Build Coastguard Worker return true;
2500*61046927SAndroid Build Coastguard Worker } else {
2501*61046927SAndroid Build Coastguard Worker if (i == 0 && mux_a == V3D_QPU_MUX_A &&
2502*61046927SAndroid Build Coastguard Worker inst->raddr_a == index) {
2503*61046927SAndroid Build Coastguard Worker return true;
2504*61046927SAndroid Build Coastguard Worker }
2505*61046927SAndroid Build Coastguard Worker if (i == 0 && mux_a == V3D_QPU_MUX_B &&
2506*61046927SAndroid Build Coastguard Worker inst->raddr_b == index) {
2507*61046927SAndroid Build Coastguard Worker return true;
2508*61046927SAndroid Build Coastguard Worker }
2509*61046927SAndroid Build Coastguard Worker if (i == 1 && mux_b == V3D_QPU_MUX_A &&
2510*61046927SAndroid Build Coastguard Worker inst->raddr_a == index) {
2511*61046927SAndroid Build Coastguard Worker return true;
2512*61046927SAndroid Build Coastguard Worker }
2513*61046927SAndroid Build Coastguard Worker if (i == 1 && mux_b == V3D_QPU_MUX_B &&
2514*61046927SAndroid Build Coastguard Worker inst->raddr_b == index) {
2515*61046927SAndroid Build Coastguard Worker return true;
2516*61046927SAndroid Build Coastguard Worker }
2517*61046927SAndroid Build Coastguard Worker }
2518*61046927SAndroid Build Coastguard Worker }
2519*61046927SAndroid Build Coastguard Worker
2520*61046927SAndroid Build Coastguard Worker return false;
2521*61046927SAndroid Build Coastguard Worker }
2522*61046927SAndroid Build Coastguard Worker
2523*61046927SAndroid Build Coastguard Worker assert(devinfo->ver >= 71);
2524*61046927SAndroid Build Coastguard Worker assert(!magic);
2525*61046927SAndroid Build Coastguard Worker
2526*61046927SAndroid Build Coastguard Worker uint32_t raddr_a, raddr_b;
2527*61046927SAndroid Build Coastguard Worker if (add) {
2528*61046927SAndroid Build Coastguard Worker raddr_a = inst->alu.add.a.raddr;
2529*61046927SAndroid Build Coastguard Worker raddr_b = inst->alu.add.b.raddr;
2530*61046927SAndroid Build Coastguard Worker } else {
2531*61046927SAndroid Build Coastguard Worker raddr_a = inst->alu.mul.a.raddr;
2532*61046927SAndroid Build Coastguard Worker raddr_b = inst->alu.mul.b.raddr;
2533*61046927SAndroid Build Coastguard Worker }
2534*61046927SAndroid Build Coastguard Worker
2535*61046927SAndroid Build Coastguard Worker for (int i = 0; i < num_src; i++) {
2536*61046927SAndroid Build Coastguard Worker if (i == 0 && raddr_a == index)
2537*61046927SAndroid Build Coastguard Worker return true;
2538*61046927SAndroid Build Coastguard Worker if (i == 1 && raddr_b == index)
2539*61046927SAndroid Build Coastguard Worker return true;
2540*61046927SAndroid Build Coastguard Worker }
2541*61046927SAndroid Build Coastguard Worker
2542*61046927SAndroid Build Coastguard Worker return false;
2543*61046927SAndroid Build Coastguard Worker }
2544*61046927SAndroid Build Coastguard Worker
2545*61046927SAndroid Build Coastguard Worker /**
2546*61046927SAndroid Build Coastguard Worker * This takes and ldvary signal merged into 'inst' and tries to move it up to
2547*61046927SAndroid Build Coastguard Worker * the previous instruction to get good pipelining of ldvary sequences,
2548*61046927SAndroid Build Coastguard Worker * transforming this:
2549*61046927SAndroid Build Coastguard Worker *
2550*61046927SAndroid Build Coastguard Worker * nop ; nop ; ldvary.r4
2551*61046927SAndroid Build Coastguard Worker * nop ; fmul r0, r4, rf0 ;
2552*61046927SAndroid Build Coastguard Worker * fadd rf13, r0, r5 ; nop; ; ldvary.r1 <-- inst
2553*61046927SAndroid Build Coastguard Worker *
2554*61046927SAndroid Build Coastguard Worker * into:
2555*61046927SAndroid Build Coastguard Worker *
2556*61046927SAndroid Build Coastguard Worker * nop ; nop ; ldvary.r4
2557*61046927SAndroid Build Coastguard Worker * nop ; fmul r0, r4, rf0 ; ldvary.r1
2558*61046927SAndroid Build Coastguard Worker * fadd rf13, r0, r5 ; nop; ; <-- inst
2559*61046927SAndroid Build Coastguard Worker *
2560*61046927SAndroid Build Coastguard Worker * If we manage to do this successfully (we return true here), then flagging
2561*61046927SAndroid Build Coastguard Worker * the ldvary as "scheduled" may promote the follow-up fmul to a DAG head that
2562*61046927SAndroid Build Coastguard Worker * we will be able to pick up to merge into 'inst', leading to code like this:
2563*61046927SAndroid Build Coastguard Worker *
2564*61046927SAndroid Build Coastguard Worker * nop ; nop ; ldvary.r4
2565*61046927SAndroid Build Coastguard Worker * nop ; fmul r0, r4, rf0 ; ldvary.r1
2566*61046927SAndroid Build Coastguard Worker * fadd rf13, r0, r5 ; fmul r2, r1, rf0 ; <-- inst
2567*61046927SAndroid Build Coastguard Worker */
2568*61046927SAndroid Build Coastguard Worker static bool
fixup_pipelined_ldvary(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct qblock * block,struct v3d_qpu_instr * inst)2569*61046927SAndroid Build Coastguard Worker fixup_pipelined_ldvary(struct v3d_compile *c,
2570*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
2571*61046927SAndroid Build Coastguard Worker struct qblock *block,
2572*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr *inst)
2573*61046927SAndroid Build Coastguard Worker {
2574*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo = c->devinfo;
2575*61046927SAndroid Build Coastguard Worker
2576*61046927SAndroid Build Coastguard Worker /* We only call this if we have successfully merged an ldvary into a
2577*61046927SAndroid Build Coastguard Worker * previous instruction.
2578*61046927SAndroid Build Coastguard Worker */
2579*61046927SAndroid Build Coastguard Worker assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
2580*61046927SAndroid Build Coastguard Worker assert(inst->sig.ldvary);
2581*61046927SAndroid Build Coastguard Worker uint32_t ldvary_magic = inst->sig_magic;
2582*61046927SAndroid Build Coastguard Worker uint32_t ldvary_index = inst->sig_addr;
2583*61046927SAndroid Build Coastguard Worker
2584*61046927SAndroid Build Coastguard Worker /* The instruction in which we merged the ldvary cannot read
2585*61046927SAndroid Build Coastguard Worker * the ldvary destination, if it does, then moving the ldvary before
2586*61046927SAndroid Build Coastguard Worker * it would overwrite it.
2587*61046927SAndroid Build Coastguard Worker */
2588*61046927SAndroid Build Coastguard Worker if (alu_reads_register(devinfo, inst, true, ldvary_magic, ldvary_index))
2589*61046927SAndroid Build Coastguard Worker return false;
2590*61046927SAndroid Build Coastguard Worker if (alu_reads_register(devinfo, inst, false, ldvary_magic, ldvary_index))
2591*61046927SAndroid Build Coastguard Worker return false;
2592*61046927SAndroid Build Coastguard Worker
2593*61046927SAndroid Build Coastguard Worker /* The implicit ldvary destination may not be written to by a signal
2594*61046927SAndroid Build Coastguard Worker * in the instruction following ldvary. Since we are planning to move
2595*61046927SAndroid Build Coastguard Worker * ldvary to the previous instruction, this means we need to check if
2596*61046927SAndroid Build Coastguard Worker * the current instruction has any other signal that could create this
2597*61046927SAndroid Build Coastguard Worker * conflict. The only other signal that can write to the implicit
2598*61046927SAndroid Build Coastguard Worker * ldvary destination that is compatible with ldvary in the same
2599*61046927SAndroid Build Coastguard Worker * instruction is ldunif.
2600*61046927SAndroid Build Coastguard Worker */
2601*61046927SAndroid Build Coastguard Worker if (inst->sig.ldunif)
2602*61046927SAndroid Build Coastguard Worker return false;
2603*61046927SAndroid Build Coastguard Worker
2604*61046927SAndroid Build Coastguard Worker /* The previous instruction can't write to the same destination as the
2605*61046927SAndroid Build Coastguard Worker * ldvary.
2606*61046927SAndroid Build Coastguard Worker */
2607*61046927SAndroid Build Coastguard Worker struct qinst *prev = (struct qinst *) block->instructions.prev;
2608*61046927SAndroid Build Coastguard Worker if (!prev || prev->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
2609*61046927SAndroid Build Coastguard Worker return false;
2610*61046927SAndroid Build Coastguard Worker
2611*61046927SAndroid Build Coastguard Worker if (prev->qpu.alu.add.op != V3D_QPU_A_NOP) {
2612*61046927SAndroid Build Coastguard Worker if (prev->qpu.alu.add.magic_write == ldvary_magic &&
2613*61046927SAndroid Build Coastguard Worker prev->qpu.alu.add.waddr == ldvary_index) {
2614*61046927SAndroid Build Coastguard Worker return false;
2615*61046927SAndroid Build Coastguard Worker }
2616*61046927SAndroid Build Coastguard Worker }
2617*61046927SAndroid Build Coastguard Worker
2618*61046927SAndroid Build Coastguard Worker if (prev->qpu.alu.mul.op != V3D_QPU_M_NOP) {
2619*61046927SAndroid Build Coastguard Worker if (prev->qpu.alu.mul.magic_write == ldvary_magic &&
2620*61046927SAndroid Build Coastguard Worker prev->qpu.alu.mul.waddr == ldvary_index) {
2621*61046927SAndroid Build Coastguard Worker return false;
2622*61046927SAndroid Build Coastguard Worker }
2623*61046927SAndroid Build Coastguard Worker }
2624*61046927SAndroid Build Coastguard Worker
2625*61046927SAndroid Build Coastguard Worker /* The previous instruction cannot have a conflicting signal */
2626*61046927SAndroid Build Coastguard Worker if (v3d_qpu_sig_writes_address(devinfo, &prev->qpu.sig))
2627*61046927SAndroid Build Coastguard Worker return false;
2628*61046927SAndroid Build Coastguard Worker
2629*61046927SAndroid Build Coastguard Worker uint32_t sig;
2630*61046927SAndroid Build Coastguard Worker struct v3d_qpu_sig new_sig = prev->qpu.sig;
2631*61046927SAndroid Build Coastguard Worker new_sig.ldvary = true;
2632*61046927SAndroid Build Coastguard Worker if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
2633*61046927SAndroid Build Coastguard Worker return false;
2634*61046927SAndroid Build Coastguard Worker
2635*61046927SAndroid Build Coastguard Worker /* The previous instruction cannot use flags since ldvary uses the
2636*61046927SAndroid Build Coastguard Worker * 'cond' instruction field to store the destination.
2637*61046927SAndroid Build Coastguard Worker */
2638*61046927SAndroid Build Coastguard Worker if (v3d_qpu_writes_flags(&prev->qpu))
2639*61046927SAndroid Build Coastguard Worker return false;
2640*61046927SAndroid Build Coastguard Worker if (v3d_qpu_reads_flags(&prev->qpu))
2641*61046927SAndroid Build Coastguard Worker return false;
2642*61046927SAndroid Build Coastguard Worker
2643*61046927SAndroid Build Coastguard Worker /* We can't put an ldvary in the delay slots of a thrsw. We should've
2644*61046927SAndroid Build Coastguard Worker * prevented this when pairing up the ldvary with another instruction
2645*61046927SAndroid Build Coastguard Worker * and flagging it for a fixup. In V3D 7.x this is limited only to the
2646*61046927SAndroid Build Coastguard Worker * second delay slot.
2647*61046927SAndroid Build Coastguard Worker */
2648*61046927SAndroid Build Coastguard Worker assert((devinfo->ver == 42 &&
2649*61046927SAndroid Build Coastguard Worker scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) ||
2650*61046927SAndroid Build Coastguard Worker (devinfo->ver >= 71 &&
2651*61046927SAndroid Build Coastguard Worker scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1));
2652*61046927SAndroid Build Coastguard Worker
2653*61046927SAndroid Build Coastguard Worker /* Move the ldvary to the previous instruction and remove it from the
2654*61046927SAndroid Build Coastguard Worker * current one.
2655*61046927SAndroid Build Coastguard Worker */
2656*61046927SAndroid Build Coastguard Worker prev->qpu.sig.ldvary = true;
2657*61046927SAndroid Build Coastguard Worker prev->qpu.sig_magic = ldvary_magic;
2658*61046927SAndroid Build Coastguard Worker prev->qpu.sig_addr = ldvary_index;
2659*61046927SAndroid Build Coastguard Worker scoreboard->last_ldvary_tick = scoreboard->tick - 1;
2660*61046927SAndroid Build Coastguard Worker
2661*61046927SAndroid Build Coastguard Worker inst->sig.ldvary = false;
2662*61046927SAndroid Build Coastguard Worker inst->sig_magic = false;
2663*61046927SAndroid Build Coastguard Worker inst->sig_addr = 0;
2664*61046927SAndroid Build Coastguard Worker
2665*61046927SAndroid Build Coastguard Worker /* Update rf0 flops tracking for new ldvary delayed rf0 write tick */
2666*61046927SAndroid Build Coastguard Worker if (devinfo->ver >= 71) {
2667*61046927SAndroid Build Coastguard Worker scoreboard->last_implicit_rf0_write_tick = scoreboard->tick;
2668*61046927SAndroid Build Coastguard Worker set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
2669*61046927SAndroid Build Coastguard Worker }
2670*61046927SAndroid Build Coastguard Worker
2671*61046927SAndroid Build Coastguard Worker /* By moving ldvary to the previous instruction we make it update r5
2672*61046927SAndroid Build Coastguard Worker * (rf0 for ver >= 71) in the current one, so nothing else in it
2673*61046927SAndroid Build Coastguard Worker * should write this register.
2674*61046927SAndroid Build Coastguard Worker *
2675*61046927SAndroid Build Coastguard Worker * This should've been prevented by our depedency tracking, which
2676*61046927SAndroid Build Coastguard Worker * would not allow ldvary to be paired up with an instruction that
2677*61046927SAndroid Build Coastguard Worker * writes r5/rf0 (since our dependency tracking doesn't know that the
2678*61046927SAndroid Build Coastguard Worker * ldvary write to r5/rf0 happens in the next instruction).
2679*61046927SAndroid Build Coastguard Worker */
2680*61046927SAndroid Build Coastguard Worker assert(!v3d_qpu_writes_r5(devinfo, inst));
2681*61046927SAndroid Build Coastguard Worker assert(devinfo->ver == 42 ||
2682*61046927SAndroid Build Coastguard Worker (!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
2683*61046927SAndroid Build Coastguard Worker !v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
2684*61046927SAndroid Build Coastguard Worker
2685*61046927SAndroid Build Coastguard Worker return true;
2686*61046927SAndroid Build Coastguard Worker }
2687*61046927SAndroid Build Coastguard Worker
2688*61046927SAndroid Build Coastguard Worker static uint32_t
schedule_instructions(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct qblock * block,enum quniform_contents * orig_uniform_contents,uint32_t * orig_uniform_data,uint32_t * next_uniform)2689*61046927SAndroid Build Coastguard Worker schedule_instructions(struct v3d_compile *c,
2690*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
2691*61046927SAndroid Build Coastguard Worker struct qblock *block,
2692*61046927SAndroid Build Coastguard Worker enum quniform_contents *orig_uniform_contents,
2693*61046927SAndroid Build Coastguard Worker uint32_t *orig_uniform_data,
2694*61046927SAndroid Build Coastguard Worker uint32_t *next_uniform)
2695*61046927SAndroid Build Coastguard Worker {
2696*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo = c->devinfo;
2697*61046927SAndroid Build Coastguard Worker uint32_t time = 0;
2698*61046927SAndroid Build Coastguard Worker
2699*61046927SAndroid Build Coastguard Worker while (!list_is_empty(&scoreboard->dag->heads)) {
2700*61046927SAndroid Build Coastguard Worker struct schedule_node *chosen =
2701*61046927SAndroid Build Coastguard Worker choose_instruction_to_schedule(c, scoreboard, NULL);
2702*61046927SAndroid Build Coastguard Worker struct schedule_node *merge = NULL;
2703*61046927SAndroid Build Coastguard Worker
2704*61046927SAndroid Build Coastguard Worker /* If there are no valid instructions to schedule, drop a NOP
2705*61046927SAndroid Build Coastguard Worker * in.
2706*61046927SAndroid Build Coastguard Worker */
2707*61046927SAndroid Build Coastguard Worker struct qinst *qinst = chosen ? chosen->inst : vir_nop();
2708*61046927SAndroid Build Coastguard Worker struct v3d_qpu_instr *inst = &qinst->qpu;
2709*61046927SAndroid Build Coastguard Worker
2710*61046927SAndroid Build Coastguard Worker if (debug) {
2711*61046927SAndroid Build Coastguard Worker fprintf(stderr, "t=%4d: current list:\n",
2712*61046927SAndroid Build Coastguard Worker time);
2713*61046927SAndroid Build Coastguard Worker dump_state(devinfo, scoreboard->dag);
2714*61046927SAndroid Build Coastguard Worker fprintf(stderr, "t=%4d: chose: ", time);
2715*61046927SAndroid Build Coastguard Worker v3d_qpu_dump(devinfo, inst);
2716*61046927SAndroid Build Coastguard Worker fprintf(stderr, "\n");
2717*61046927SAndroid Build Coastguard Worker }
2718*61046927SAndroid Build Coastguard Worker
2719*61046927SAndroid Build Coastguard Worker /* We can't mark_instruction_scheduled() the chosen inst until
2720*61046927SAndroid Build Coastguard Worker * we're done identifying instructions to merge, so put the
2721*61046927SAndroid Build Coastguard Worker * merged instructions on a list for a moment.
2722*61046927SAndroid Build Coastguard Worker */
2723*61046927SAndroid Build Coastguard Worker struct list_head merged_list;
2724*61046927SAndroid Build Coastguard Worker list_inithead(&merged_list);
2725*61046927SAndroid Build Coastguard Worker
2726*61046927SAndroid Build Coastguard Worker /* Schedule this instruction onto the QPU list. Also try to
2727*61046927SAndroid Build Coastguard Worker * find an instruction to pair with it.
2728*61046927SAndroid Build Coastguard Worker */
2729*61046927SAndroid Build Coastguard Worker if (chosen) {
2730*61046927SAndroid Build Coastguard Worker time = MAX2(chosen->unblocked_time, time);
2731*61046927SAndroid Build Coastguard Worker pre_remove_head(scoreboard->dag, chosen);
2732*61046927SAndroid Build Coastguard Worker
2733*61046927SAndroid Build Coastguard Worker while ((merge =
2734*61046927SAndroid Build Coastguard Worker choose_instruction_to_schedule(c, scoreboard,
2735*61046927SAndroid Build Coastguard Worker chosen))) {
2736*61046927SAndroid Build Coastguard Worker time = MAX2(merge->unblocked_time, time);
2737*61046927SAndroid Build Coastguard Worker pre_remove_head(scoreboard->dag, merge);
2738*61046927SAndroid Build Coastguard Worker list_addtail(&merge->link, &merged_list);
2739*61046927SAndroid Build Coastguard Worker (void)qpu_merge_inst(devinfo, inst,
2740*61046927SAndroid Build Coastguard Worker inst, &merge->inst->qpu);
2741*61046927SAndroid Build Coastguard Worker if (merge->inst->uniform != -1) {
2742*61046927SAndroid Build Coastguard Worker chosen->inst->uniform =
2743*61046927SAndroid Build Coastguard Worker merge->inst->uniform;
2744*61046927SAndroid Build Coastguard Worker }
2745*61046927SAndroid Build Coastguard Worker
2746*61046927SAndroid Build Coastguard Worker chosen->inst->ldtmu_count +=
2747*61046927SAndroid Build Coastguard Worker merge->inst->ldtmu_count;
2748*61046927SAndroid Build Coastguard Worker
2749*61046927SAndroid Build Coastguard Worker if (debug) {
2750*61046927SAndroid Build Coastguard Worker fprintf(stderr, "t=%4d: merging: ",
2751*61046927SAndroid Build Coastguard Worker time);
2752*61046927SAndroid Build Coastguard Worker v3d_qpu_dump(devinfo, &merge->inst->qpu);
2753*61046927SAndroid Build Coastguard Worker fprintf(stderr, "\n");
2754*61046927SAndroid Build Coastguard Worker fprintf(stderr, " result: ");
2755*61046927SAndroid Build Coastguard Worker v3d_qpu_dump(devinfo, inst);
2756*61046927SAndroid Build Coastguard Worker fprintf(stderr, "\n");
2757*61046927SAndroid Build Coastguard Worker }
2758*61046927SAndroid Build Coastguard Worker
2759*61046927SAndroid Build Coastguard Worker if (scoreboard->fixup_ldvary) {
2760*61046927SAndroid Build Coastguard Worker scoreboard->fixup_ldvary = false;
2761*61046927SAndroid Build Coastguard Worker if (fixup_pipelined_ldvary(c, scoreboard, block, inst)) {
2762*61046927SAndroid Build Coastguard Worker /* Flag the ldvary as scheduled
2763*61046927SAndroid Build Coastguard Worker * now so we can try to merge the
2764*61046927SAndroid Build Coastguard Worker * follow-up instruction in the
2765*61046927SAndroid Build Coastguard Worker * the ldvary sequence into the
2766*61046927SAndroid Build Coastguard Worker * current instruction.
2767*61046927SAndroid Build Coastguard Worker */
2768*61046927SAndroid Build Coastguard Worker mark_instruction_scheduled(
2769*61046927SAndroid Build Coastguard Worker devinfo, scoreboard->dag,
2770*61046927SAndroid Build Coastguard Worker time, merge);
2771*61046927SAndroid Build Coastguard Worker }
2772*61046927SAndroid Build Coastguard Worker }
2773*61046927SAndroid Build Coastguard Worker }
2774*61046927SAndroid Build Coastguard Worker if (read_stalls(c->devinfo, scoreboard, inst))
2775*61046927SAndroid Build Coastguard Worker c->qpu_inst_stalled_count++;
2776*61046927SAndroid Build Coastguard Worker }
2777*61046927SAndroid Build Coastguard Worker
2778*61046927SAndroid Build Coastguard Worker /* Update the uniform index for the rewritten location --
2779*61046927SAndroid Build Coastguard Worker * branch target updating will still need to change
2780*61046927SAndroid Build Coastguard Worker * c->uniform_data[] using this index.
2781*61046927SAndroid Build Coastguard Worker */
2782*61046927SAndroid Build Coastguard Worker if (qinst->uniform != -1) {
2783*61046927SAndroid Build Coastguard Worker if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
2784*61046927SAndroid Build Coastguard Worker block->branch_uniform = *next_uniform;
2785*61046927SAndroid Build Coastguard Worker
2786*61046927SAndroid Build Coastguard Worker c->uniform_data[*next_uniform] =
2787*61046927SAndroid Build Coastguard Worker orig_uniform_data[qinst->uniform];
2788*61046927SAndroid Build Coastguard Worker c->uniform_contents[*next_uniform] =
2789*61046927SAndroid Build Coastguard Worker orig_uniform_contents[qinst->uniform];
2790*61046927SAndroid Build Coastguard Worker qinst->uniform = *next_uniform;
2791*61046927SAndroid Build Coastguard Worker (*next_uniform)++;
2792*61046927SAndroid Build Coastguard Worker }
2793*61046927SAndroid Build Coastguard Worker
2794*61046927SAndroid Build Coastguard Worker if (debug) {
2795*61046927SAndroid Build Coastguard Worker fprintf(stderr, "\n");
2796*61046927SAndroid Build Coastguard Worker }
2797*61046927SAndroid Build Coastguard Worker
2798*61046927SAndroid Build Coastguard Worker /* Now that we've scheduled a new instruction, some of its
2799*61046927SAndroid Build Coastguard Worker * children can be promoted to the list of instructions ready to
2800*61046927SAndroid Build Coastguard Worker * be scheduled. Update the children's unblocked time for this
2801*61046927SAndroid Build Coastguard Worker * DAG edge as we do so.
2802*61046927SAndroid Build Coastguard Worker */
2803*61046927SAndroid Build Coastguard Worker mark_instruction_scheduled(devinfo, scoreboard->dag, time, chosen);
2804*61046927SAndroid Build Coastguard Worker list_for_each_entry(struct schedule_node, merge, &merged_list,
2805*61046927SAndroid Build Coastguard Worker link) {
2806*61046927SAndroid Build Coastguard Worker mark_instruction_scheduled(devinfo, scoreboard->dag, time, merge);
2807*61046927SAndroid Build Coastguard Worker
2808*61046927SAndroid Build Coastguard Worker /* The merged VIR instruction doesn't get re-added to the
2809*61046927SAndroid Build Coastguard Worker * block, so free it now.
2810*61046927SAndroid Build Coastguard Worker */
2811*61046927SAndroid Build Coastguard Worker free(merge->inst);
2812*61046927SAndroid Build Coastguard Worker }
2813*61046927SAndroid Build Coastguard Worker
2814*61046927SAndroid Build Coastguard Worker if (inst->sig.thrsw) {
2815*61046927SAndroid Build Coastguard Worker time += emit_thrsw(c, block, scoreboard, qinst, false);
2816*61046927SAndroid Build Coastguard Worker } else if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
2817*61046927SAndroid Build Coastguard Worker emit_branch(c, block, scoreboard, qinst);
2818*61046927SAndroid Build Coastguard Worker } else {
2819*61046927SAndroid Build Coastguard Worker insert_scheduled_instruction(c, block,
2820*61046927SAndroid Build Coastguard Worker scoreboard, qinst);
2821*61046927SAndroid Build Coastguard Worker }
2822*61046927SAndroid Build Coastguard Worker }
2823*61046927SAndroid Build Coastguard Worker
2824*61046927SAndroid Build Coastguard Worker return time;
2825*61046927SAndroid Build Coastguard Worker }
2826*61046927SAndroid Build Coastguard Worker
2827*61046927SAndroid Build Coastguard Worker static uint32_t
qpu_schedule_instructions_block(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct qblock * block,enum quniform_contents * orig_uniform_contents,uint32_t * orig_uniform_data,uint32_t * next_uniform)2828*61046927SAndroid Build Coastguard Worker qpu_schedule_instructions_block(struct v3d_compile *c,
2829*61046927SAndroid Build Coastguard Worker struct choose_scoreboard *scoreboard,
2830*61046927SAndroid Build Coastguard Worker struct qblock *block,
2831*61046927SAndroid Build Coastguard Worker enum quniform_contents *orig_uniform_contents,
2832*61046927SAndroid Build Coastguard Worker uint32_t *orig_uniform_data,
2833*61046927SAndroid Build Coastguard Worker uint32_t *next_uniform)
2834*61046927SAndroid Build Coastguard Worker {
2835*61046927SAndroid Build Coastguard Worker void *mem_ctx = ralloc_context(NULL);
2836*61046927SAndroid Build Coastguard Worker scoreboard->dag = dag_create(mem_ctx);
2837*61046927SAndroid Build Coastguard Worker struct list_head setup_list;
2838*61046927SAndroid Build Coastguard Worker
2839*61046927SAndroid Build Coastguard Worker list_inithead(&setup_list);
2840*61046927SAndroid Build Coastguard Worker
2841*61046927SAndroid Build Coastguard Worker /* Wrap each instruction in a scheduler structure. */
2842*61046927SAndroid Build Coastguard Worker while (!list_is_empty(&block->instructions)) {
2843*61046927SAndroid Build Coastguard Worker struct qinst *qinst = (struct qinst *)block->instructions.next;
2844*61046927SAndroid Build Coastguard Worker struct schedule_node *n =
2845*61046927SAndroid Build Coastguard Worker rzalloc(mem_ctx, struct schedule_node);
2846*61046927SAndroid Build Coastguard Worker
2847*61046927SAndroid Build Coastguard Worker dag_init_node(scoreboard->dag, &n->dag);
2848*61046927SAndroid Build Coastguard Worker n->inst = qinst;
2849*61046927SAndroid Build Coastguard Worker
2850*61046927SAndroid Build Coastguard Worker list_del(&qinst->link);
2851*61046927SAndroid Build Coastguard Worker list_addtail(&n->link, &setup_list);
2852*61046927SAndroid Build Coastguard Worker }
2853*61046927SAndroid Build Coastguard Worker
2854*61046927SAndroid Build Coastguard Worker calculate_forward_deps(c, scoreboard->dag, &setup_list);
2855*61046927SAndroid Build Coastguard Worker calculate_reverse_deps(c, scoreboard->dag, &setup_list);
2856*61046927SAndroid Build Coastguard Worker
2857*61046927SAndroid Build Coastguard Worker dag_traverse_bottom_up(scoreboard->dag, compute_delay, c);
2858*61046927SAndroid Build Coastguard Worker
2859*61046927SAndroid Build Coastguard Worker uint32_t cycles = schedule_instructions(c, scoreboard, block,
2860*61046927SAndroid Build Coastguard Worker orig_uniform_contents,
2861*61046927SAndroid Build Coastguard Worker orig_uniform_data,
2862*61046927SAndroid Build Coastguard Worker next_uniform);
2863*61046927SAndroid Build Coastguard Worker
2864*61046927SAndroid Build Coastguard Worker ralloc_free(mem_ctx);
2865*61046927SAndroid Build Coastguard Worker scoreboard->dag = NULL;
2866*61046927SAndroid Build Coastguard Worker
2867*61046927SAndroid Build Coastguard Worker return cycles;
2868*61046927SAndroid Build Coastguard Worker }
2869*61046927SAndroid Build Coastguard Worker
2870*61046927SAndroid Build Coastguard Worker static void
qpu_set_branch_targets(struct v3d_compile * c)2871*61046927SAndroid Build Coastguard Worker qpu_set_branch_targets(struct v3d_compile *c)
2872*61046927SAndroid Build Coastguard Worker {
2873*61046927SAndroid Build Coastguard Worker vir_for_each_block(block, c) {
2874*61046927SAndroid Build Coastguard Worker /* The end block of the program has no branch. */
2875*61046927SAndroid Build Coastguard Worker if (!block->successors[0])
2876*61046927SAndroid Build Coastguard Worker continue;
2877*61046927SAndroid Build Coastguard Worker
2878*61046927SAndroid Build Coastguard Worker /* If there was no branch instruction, then the successor
2879*61046927SAndroid Build Coastguard Worker * block must follow immediately after this one.
2880*61046927SAndroid Build Coastguard Worker */
2881*61046927SAndroid Build Coastguard Worker if (block->branch_qpu_ip == ~0) {
2882*61046927SAndroid Build Coastguard Worker assert(block->end_qpu_ip + 1 ==
2883*61046927SAndroid Build Coastguard Worker block->successors[0]->start_qpu_ip);
2884*61046927SAndroid Build Coastguard Worker continue;
2885*61046927SAndroid Build Coastguard Worker }
2886*61046927SAndroid Build Coastguard Worker
2887*61046927SAndroid Build Coastguard Worker /* Walk back through the delay slots to find the branch
2888*61046927SAndroid Build Coastguard Worker * instr.
2889*61046927SAndroid Build Coastguard Worker */
2890*61046927SAndroid Build Coastguard Worker struct qinst *branch = NULL;
2891*61046927SAndroid Build Coastguard Worker struct list_head *entry = block->instructions.prev;
2892*61046927SAndroid Build Coastguard Worker int32_t delay_slot_count = -1;
2893*61046927SAndroid Build Coastguard Worker struct qinst *delay_slots_start = NULL;
2894*61046927SAndroid Build Coastguard Worker for (int i = 0; i < 3; i++) {
2895*61046927SAndroid Build Coastguard Worker entry = entry->prev;
2896*61046927SAndroid Build Coastguard Worker struct qinst *inst =
2897*61046927SAndroid Build Coastguard Worker container_of(entry, struct qinst, link);
2898*61046927SAndroid Build Coastguard Worker
2899*61046927SAndroid Build Coastguard Worker if (delay_slot_count == -1) {
2900*61046927SAndroid Build Coastguard Worker if (!v3d_qpu_is_nop(&inst->qpu))
2901*61046927SAndroid Build Coastguard Worker delay_slot_count = i;
2902*61046927SAndroid Build Coastguard Worker else
2903*61046927SAndroid Build Coastguard Worker delay_slots_start = inst;
2904*61046927SAndroid Build Coastguard Worker }
2905*61046927SAndroid Build Coastguard Worker
2906*61046927SAndroid Build Coastguard Worker if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
2907*61046927SAndroid Build Coastguard Worker branch = inst;
2908*61046927SAndroid Build Coastguard Worker break;
2909*61046927SAndroid Build Coastguard Worker }
2910*61046927SAndroid Build Coastguard Worker }
2911*61046927SAndroid Build Coastguard Worker assert(branch && branch->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
2912*61046927SAndroid Build Coastguard Worker assert(delay_slot_count >= 0 && delay_slot_count <= 3);
2913*61046927SAndroid Build Coastguard Worker assert(delay_slot_count == 0 || delay_slots_start != NULL);
2914*61046927SAndroid Build Coastguard Worker
2915*61046927SAndroid Build Coastguard Worker /* Make sure that the if-we-don't-jump
2916*61046927SAndroid Build Coastguard Worker * successor was scheduled just after the
2917*61046927SAndroid Build Coastguard Worker * delay slots.
2918*61046927SAndroid Build Coastguard Worker */
2919*61046927SAndroid Build Coastguard Worker assert(!block->successors[1] ||
2920*61046927SAndroid Build Coastguard Worker block->successors[1]->start_qpu_ip ==
2921*61046927SAndroid Build Coastguard Worker block->branch_qpu_ip + 4);
2922*61046927SAndroid Build Coastguard Worker
2923*61046927SAndroid Build Coastguard Worker branch->qpu.branch.offset =
2924*61046927SAndroid Build Coastguard Worker ((block->successors[0]->start_qpu_ip -
2925*61046927SAndroid Build Coastguard Worker (block->branch_qpu_ip + 4)) *
2926*61046927SAndroid Build Coastguard Worker sizeof(uint64_t));
2927*61046927SAndroid Build Coastguard Worker
2928*61046927SAndroid Build Coastguard Worker /* Set up the relative offset to jump in the
2929*61046927SAndroid Build Coastguard Worker * uniform stream.
2930*61046927SAndroid Build Coastguard Worker *
2931*61046927SAndroid Build Coastguard Worker * Use a temporary here, because
2932*61046927SAndroid Build Coastguard Worker * uniform_data[inst->uniform] may be shared
2933*61046927SAndroid Build Coastguard Worker * between multiple instructions.
2934*61046927SAndroid Build Coastguard Worker */
2935*61046927SAndroid Build Coastguard Worker assert(c->uniform_contents[branch->uniform] == QUNIFORM_CONSTANT);
2936*61046927SAndroid Build Coastguard Worker c->uniform_data[branch->uniform] =
2937*61046927SAndroid Build Coastguard Worker (block->successors[0]->start_uniform -
2938*61046927SAndroid Build Coastguard Worker (block->branch_uniform + 1)) * 4;
2939*61046927SAndroid Build Coastguard Worker
2940*61046927SAndroid Build Coastguard Worker /* If this is an unconditional branch, try to fill any remaining
2941*61046927SAndroid Build Coastguard Worker * delay slots with the initial instructions of the successor
2942*61046927SAndroid Build Coastguard Worker * block.
2943*61046927SAndroid Build Coastguard Worker *
2944*61046927SAndroid Build Coastguard Worker * FIXME: we can do the same for conditional branches if we
2945*61046927SAndroid Build Coastguard Worker * predicate the instructions to match the branch condition.
2946*61046927SAndroid Build Coastguard Worker */
2947*61046927SAndroid Build Coastguard Worker if (branch->qpu.branch.cond == V3D_QPU_BRANCH_COND_ALWAYS) {
2948*61046927SAndroid Build Coastguard Worker struct list_head *successor_insts =
2949*61046927SAndroid Build Coastguard Worker &block->successors[0]->instructions;
2950*61046927SAndroid Build Coastguard Worker delay_slot_count = MIN2(delay_slot_count,
2951*61046927SAndroid Build Coastguard Worker list_length(successor_insts));
2952*61046927SAndroid Build Coastguard Worker struct qinst *s_inst =
2953*61046927SAndroid Build Coastguard Worker (struct qinst *) successor_insts->next;
2954*61046927SAndroid Build Coastguard Worker struct qinst *slot = delay_slots_start;
2955*61046927SAndroid Build Coastguard Worker int slots_filled = 0;
2956*61046927SAndroid Build Coastguard Worker while (slots_filled < delay_slot_count &&
2957*61046927SAndroid Build Coastguard Worker qpu_inst_valid_in_branch_delay_slot(c, s_inst)) {
2958*61046927SAndroid Build Coastguard Worker memcpy(&slot->qpu, &s_inst->qpu,
2959*61046927SAndroid Build Coastguard Worker sizeof(slot->qpu));
2960*61046927SAndroid Build Coastguard Worker s_inst = (struct qinst *) s_inst->link.next;
2961*61046927SAndroid Build Coastguard Worker slot = (struct qinst *) slot->link.next;
2962*61046927SAndroid Build Coastguard Worker slots_filled++;
2963*61046927SAndroid Build Coastguard Worker }
2964*61046927SAndroid Build Coastguard Worker branch->qpu.branch.offset +=
2965*61046927SAndroid Build Coastguard Worker slots_filled * sizeof(uint64_t);
2966*61046927SAndroid Build Coastguard Worker }
2967*61046927SAndroid Build Coastguard Worker }
2968*61046927SAndroid Build Coastguard Worker }
2969*61046927SAndroid Build Coastguard Worker
2970*61046927SAndroid Build Coastguard Worker uint32_t
v3d_qpu_schedule_instructions(struct v3d_compile * c)2971*61046927SAndroid Build Coastguard Worker v3d_qpu_schedule_instructions(struct v3d_compile *c)
2972*61046927SAndroid Build Coastguard Worker {
2973*61046927SAndroid Build Coastguard Worker const struct v3d_device_info *devinfo = c->devinfo;
2974*61046927SAndroid Build Coastguard Worker struct qblock *end_block = list_last_entry(&c->blocks,
2975*61046927SAndroid Build Coastguard Worker struct qblock, link);
2976*61046927SAndroid Build Coastguard Worker
2977*61046927SAndroid Build Coastguard Worker /* We reorder the uniforms as we schedule instructions, so save the
2978*61046927SAndroid Build Coastguard Worker * old data off and replace it.
2979*61046927SAndroid Build Coastguard Worker */
2980*61046927SAndroid Build Coastguard Worker uint32_t *uniform_data = c->uniform_data;
2981*61046927SAndroid Build Coastguard Worker enum quniform_contents *uniform_contents = c->uniform_contents;
2982*61046927SAndroid Build Coastguard Worker c->uniform_contents = ralloc_array(c, enum quniform_contents,
2983*61046927SAndroid Build Coastguard Worker c->num_uniforms);
2984*61046927SAndroid Build Coastguard Worker c->uniform_data = ralloc_array(c, uint32_t, c->num_uniforms);
2985*61046927SAndroid Build Coastguard Worker c->uniform_array_size = c->num_uniforms;
2986*61046927SAndroid Build Coastguard Worker uint32_t next_uniform = 0;
2987*61046927SAndroid Build Coastguard Worker
2988*61046927SAndroid Build Coastguard Worker struct choose_scoreboard scoreboard;
2989*61046927SAndroid Build Coastguard Worker memset(&scoreboard, 0, sizeof(scoreboard));
2990*61046927SAndroid Build Coastguard Worker scoreboard.last_ldvary_tick = -10;
2991*61046927SAndroid Build Coastguard Worker scoreboard.last_unifa_write_tick = -10;
2992*61046927SAndroid Build Coastguard Worker scoreboard.last_magic_sfu_write_tick = -10;
2993*61046927SAndroid Build Coastguard Worker scoreboard.last_uniforms_reset_tick = -10;
2994*61046927SAndroid Build Coastguard Worker scoreboard.last_thrsw_tick = -10;
2995*61046927SAndroid Build Coastguard Worker scoreboard.last_branch_tick = -10;
2996*61046927SAndroid Build Coastguard Worker scoreboard.last_setmsf_tick = -10;
2997*61046927SAndroid Build Coastguard Worker scoreboard.last_stallable_sfu_tick = -10;
2998*61046927SAndroid Build Coastguard Worker scoreboard.first_ldtmu_after_thrsw = true;
2999*61046927SAndroid Build Coastguard Worker scoreboard.last_implicit_rf0_write_tick = - 10;
3000*61046927SAndroid Build Coastguard Worker
3001*61046927SAndroid Build Coastguard Worker if (debug) {
3002*61046927SAndroid Build Coastguard Worker fprintf(stderr, "Pre-schedule instructions\n");
3003*61046927SAndroid Build Coastguard Worker vir_for_each_block(block, c) {
3004*61046927SAndroid Build Coastguard Worker fprintf(stderr, "BLOCK %d\n", block->index);
3005*61046927SAndroid Build Coastguard Worker list_for_each_entry(struct qinst, qinst,
3006*61046927SAndroid Build Coastguard Worker &block->instructions, link) {
3007*61046927SAndroid Build Coastguard Worker v3d_qpu_dump(devinfo, &qinst->qpu);
3008*61046927SAndroid Build Coastguard Worker fprintf(stderr, "\n");
3009*61046927SAndroid Build Coastguard Worker }
3010*61046927SAndroid Build Coastguard Worker }
3011*61046927SAndroid Build Coastguard Worker fprintf(stderr, "\n");
3012*61046927SAndroid Build Coastguard Worker }
3013*61046927SAndroid Build Coastguard Worker
3014*61046927SAndroid Build Coastguard Worker uint32_t cycles = 0;
3015*61046927SAndroid Build Coastguard Worker vir_for_each_block(block, c) {
3016*61046927SAndroid Build Coastguard Worker block->start_qpu_ip = c->qpu_inst_count;
3017*61046927SAndroid Build Coastguard Worker block->branch_qpu_ip = ~0;
3018*61046927SAndroid Build Coastguard Worker block->start_uniform = next_uniform;
3019*61046927SAndroid Build Coastguard Worker
3020*61046927SAndroid Build Coastguard Worker cycles += qpu_schedule_instructions_block(c,
3021*61046927SAndroid Build Coastguard Worker &scoreboard,
3022*61046927SAndroid Build Coastguard Worker block,
3023*61046927SAndroid Build Coastguard Worker uniform_contents,
3024*61046927SAndroid Build Coastguard Worker uniform_data,
3025*61046927SAndroid Build Coastguard Worker &next_uniform);
3026*61046927SAndroid Build Coastguard Worker
3027*61046927SAndroid Build Coastguard Worker block->end_qpu_ip = c->qpu_inst_count - 1;
3028*61046927SAndroid Build Coastguard Worker }
3029*61046927SAndroid Build Coastguard Worker
3030*61046927SAndroid Build Coastguard Worker /* Emit the program-end THRSW instruction. */;
3031*61046927SAndroid Build Coastguard Worker struct qinst *thrsw = vir_nop();
3032*61046927SAndroid Build Coastguard Worker thrsw->qpu.sig.thrsw = true;
3033*61046927SAndroid Build Coastguard Worker emit_thrsw(c, end_block, &scoreboard, thrsw, true);
3034*61046927SAndroid Build Coastguard Worker
3035*61046927SAndroid Build Coastguard Worker qpu_set_branch_targets(c);
3036*61046927SAndroid Build Coastguard Worker
3037*61046927SAndroid Build Coastguard Worker assert(next_uniform == c->num_uniforms);
3038*61046927SAndroid Build Coastguard Worker
3039*61046927SAndroid Build Coastguard Worker return cycles;
3040*61046927SAndroid Build Coastguard Worker }
3041