xref: /aosp_15_r20/external/mesa3d/src/broadcom/compiler/qpu_schedule.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2010 Intel Corporation
3*61046927SAndroid Build Coastguard Worker  * Copyright © 2014-2017 Broadcom
4*61046927SAndroid Build Coastguard Worker  *
5*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
6*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
7*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
8*61046927SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9*61046927SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
10*61046927SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
11*61046927SAndroid Build Coastguard Worker  *
12*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
13*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
14*61046927SAndroid Build Coastguard Worker  * Software.
15*61046927SAndroid Build Coastguard Worker  *
16*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19*61046927SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*61046927SAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21*61046927SAndroid Build Coastguard Worker  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22*61046927SAndroid Build Coastguard Worker  * IN THE SOFTWARE.
23*61046927SAndroid Build Coastguard Worker  */
24*61046927SAndroid Build Coastguard Worker 
25*61046927SAndroid Build Coastguard Worker /**
26*61046927SAndroid Build Coastguard Worker  * @file
27*61046927SAndroid Build Coastguard Worker  *
28*61046927SAndroid Build Coastguard Worker  * The basic model of the list scheduler is to take a basic block, compute a
29*61046927SAndroid Build Coastguard Worker  * DAG of the dependencies, and make a list of the DAG heads.  Heuristically
30*61046927SAndroid Build Coastguard Worker  * pick a DAG head, then put all the children that are now DAG heads into the
31*61046927SAndroid Build Coastguard Worker  * list of things to schedule.
32*61046927SAndroid Build Coastguard Worker  *
33*61046927SAndroid Build Coastguard Worker  * The goal of scheduling here is to pack pairs of operations together in a
34*61046927SAndroid Build Coastguard Worker  * single QPU instruction.
35*61046927SAndroid Build Coastguard Worker  */
36*61046927SAndroid Build Coastguard Worker 
37*61046927SAndroid Build Coastguard Worker #include "qpu/qpu_disasm.h"
38*61046927SAndroid Build Coastguard Worker #include "v3d_compiler.h"
39*61046927SAndroid Build Coastguard Worker #include "util/ralloc.h"
40*61046927SAndroid Build Coastguard Worker #include "util/dag.h"
41*61046927SAndroid Build Coastguard Worker 
42*61046927SAndroid Build Coastguard Worker static bool debug;
43*61046927SAndroid Build Coastguard Worker 
44*61046927SAndroid Build Coastguard Worker struct schedule_node_child;
45*61046927SAndroid Build Coastguard Worker 
46*61046927SAndroid Build Coastguard Worker struct schedule_node {
47*61046927SAndroid Build Coastguard Worker         struct dag_node dag;
48*61046927SAndroid Build Coastguard Worker         struct list_head link;
49*61046927SAndroid Build Coastguard Worker         struct qinst *inst;
50*61046927SAndroid Build Coastguard Worker 
51*61046927SAndroid Build Coastguard Worker         /* Longest cycles + instruction_latency() of any parent of this node. */
52*61046927SAndroid Build Coastguard Worker         uint32_t unblocked_time;
53*61046927SAndroid Build Coastguard Worker 
54*61046927SAndroid Build Coastguard Worker         /**
55*61046927SAndroid Build Coastguard Worker          * Minimum number of cycles from scheduling this instruction until the
56*61046927SAndroid Build Coastguard Worker          * end of the program, based on the slowest dependency chain through
57*61046927SAndroid Build Coastguard Worker          * the children.
58*61046927SAndroid Build Coastguard Worker          */
59*61046927SAndroid Build Coastguard Worker         uint32_t delay;
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker         /**
62*61046927SAndroid Build Coastguard Worker          * cycles between this instruction being scheduled and when its result
63*61046927SAndroid Build Coastguard Worker          * can be consumed.
64*61046927SAndroid Build Coastguard Worker          */
65*61046927SAndroid Build Coastguard Worker         uint32_t latency;
66*61046927SAndroid Build Coastguard Worker };
67*61046927SAndroid Build Coastguard Worker 
68*61046927SAndroid Build Coastguard Worker /* When walking the instructions in reverse, we need to swap before/after in
69*61046927SAndroid Build Coastguard Worker  * add_dep().
70*61046927SAndroid Build Coastguard Worker  */
71*61046927SAndroid Build Coastguard Worker enum direction { F, R };
72*61046927SAndroid Build Coastguard Worker 
73*61046927SAndroid Build Coastguard Worker struct schedule_state {
74*61046927SAndroid Build Coastguard Worker         const struct v3d_device_info *devinfo;
75*61046927SAndroid Build Coastguard Worker         struct dag *dag;
76*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_r[6];
77*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_rf[64];
78*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_sf;
79*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_vpm_read;
80*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_tmu_write;
81*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_tmu_config;
82*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_tmu_read;
83*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_tlb;
84*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_vpm;
85*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_unif;
86*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_rtop;
87*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_unifa;
88*61046927SAndroid Build Coastguard Worker         struct schedule_node *last_setmsf;
89*61046927SAndroid Build Coastguard Worker         enum direction dir;
90*61046927SAndroid Build Coastguard Worker         /* Estimated cycle when the current instruction would start. */
91*61046927SAndroid Build Coastguard Worker         uint32_t time;
92*61046927SAndroid Build Coastguard Worker };
93*61046927SAndroid Build Coastguard Worker 
94*61046927SAndroid Build Coastguard Worker static void
add_dep(struct schedule_state * state,struct schedule_node * before,struct schedule_node * after,bool write)95*61046927SAndroid Build Coastguard Worker add_dep(struct schedule_state *state,
96*61046927SAndroid Build Coastguard Worker         struct schedule_node *before,
97*61046927SAndroid Build Coastguard Worker         struct schedule_node *after,
98*61046927SAndroid Build Coastguard Worker         bool write)
99*61046927SAndroid Build Coastguard Worker {
100*61046927SAndroid Build Coastguard Worker         bool write_after_read = !write && state->dir == R;
101*61046927SAndroid Build Coastguard Worker         uintptr_t edge_data = write_after_read;
102*61046927SAndroid Build Coastguard Worker 
103*61046927SAndroid Build Coastguard Worker         if (!before || !after)
104*61046927SAndroid Build Coastguard Worker                 return;
105*61046927SAndroid Build Coastguard Worker 
106*61046927SAndroid Build Coastguard Worker         assert(before != after);
107*61046927SAndroid Build Coastguard Worker 
108*61046927SAndroid Build Coastguard Worker         if (state->dir == F)
109*61046927SAndroid Build Coastguard Worker                 dag_add_edge(&before->dag, &after->dag, edge_data);
110*61046927SAndroid Build Coastguard Worker         else
111*61046927SAndroid Build Coastguard Worker                 dag_add_edge(&after->dag, &before->dag, edge_data);
112*61046927SAndroid Build Coastguard Worker }
113*61046927SAndroid Build Coastguard Worker 
114*61046927SAndroid Build Coastguard Worker static void
add_read_dep(struct schedule_state * state,struct schedule_node * before,struct schedule_node * after)115*61046927SAndroid Build Coastguard Worker add_read_dep(struct schedule_state *state,
116*61046927SAndroid Build Coastguard Worker               struct schedule_node *before,
117*61046927SAndroid Build Coastguard Worker               struct schedule_node *after)
118*61046927SAndroid Build Coastguard Worker {
119*61046927SAndroid Build Coastguard Worker         add_dep(state, before, after, false);
120*61046927SAndroid Build Coastguard Worker }
121*61046927SAndroid Build Coastguard Worker 
122*61046927SAndroid Build Coastguard Worker static void
add_write_dep(struct schedule_state * state,struct schedule_node ** before,struct schedule_node * after)123*61046927SAndroid Build Coastguard Worker add_write_dep(struct schedule_state *state,
124*61046927SAndroid Build Coastguard Worker               struct schedule_node **before,
125*61046927SAndroid Build Coastguard Worker               struct schedule_node *after)
126*61046927SAndroid Build Coastguard Worker {
127*61046927SAndroid Build Coastguard Worker         add_dep(state, *before, after, true);
128*61046927SAndroid Build Coastguard Worker         *before = after;
129*61046927SAndroid Build Coastguard Worker }
130*61046927SAndroid Build Coastguard Worker 
131*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_is_tlb(const struct v3d_qpu_instr * inst)132*61046927SAndroid Build Coastguard Worker qpu_inst_is_tlb(const struct v3d_qpu_instr *inst)
133*61046927SAndroid Build Coastguard Worker {
134*61046927SAndroid Build Coastguard Worker         if (inst->sig.ldtlb || inst->sig.ldtlbu)
135*61046927SAndroid Build Coastguard Worker                 return true;
136*61046927SAndroid Build Coastguard Worker 
137*61046927SAndroid Build Coastguard Worker         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
138*61046927SAndroid Build Coastguard Worker                 return false;
139*61046927SAndroid Build Coastguard Worker 
140*61046927SAndroid Build Coastguard Worker         if (inst->alu.add.op != V3D_QPU_A_NOP &&
141*61046927SAndroid Build Coastguard Worker             inst->alu.add.magic_write &&
142*61046927SAndroid Build Coastguard Worker             (inst->alu.add.waddr == V3D_QPU_WADDR_TLB ||
143*61046927SAndroid Build Coastguard Worker              inst->alu.add.waddr == V3D_QPU_WADDR_TLBU))
144*61046927SAndroid Build Coastguard Worker                 return true;
145*61046927SAndroid Build Coastguard Worker 
146*61046927SAndroid Build Coastguard Worker         if (inst->alu.mul.op != V3D_QPU_M_NOP &&
147*61046927SAndroid Build Coastguard Worker             inst->alu.mul.magic_write &&
148*61046927SAndroid Build Coastguard Worker             (inst->alu.mul.waddr == V3D_QPU_WADDR_TLB ||
149*61046927SAndroid Build Coastguard Worker              inst->alu.mul.waddr == V3D_QPU_WADDR_TLBU))
150*61046927SAndroid Build Coastguard Worker                 return true;
151*61046927SAndroid Build Coastguard Worker 
152*61046927SAndroid Build Coastguard Worker         return false;
153*61046927SAndroid Build Coastguard Worker }
154*61046927SAndroid Build Coastguard Worker 
155*61046927SAndroid Build Coastguard Worker static void
process_mux_deps(struct schedule_state * state,struct schedule_node * n,enum v3d_qpu_mux mux)156*61046927SAndroid Build Coastguard Worker process_mux_deps(struct schedule_state *state, struct schedule_node *n,
157*61046927SAndroid Build Coastguard Worker                  enum v3d_qpu_mux mux)
158*61046927SAndroid Build Coastguard Worker {
159*61046927SAndroid Build Coastguard Worker         assert(state->devinfo->ver < 71);
160*61046927SAndroid Build Coastguard Worker         switch (mux) {
161*61046927SAndroid Build Coastguard Worker         case V3D_QPU_MUX_A:
162*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
163*61046927SAndroid Build Coastguard Worker                 break;
164*61046927SAndroid Build Coastguard Worker         case V3D_QPU_MUX_B:
165*61046927SAndroid Build Coastguard Worker                 if (!n->inst->qpu.sig.small_imm_b) {
166*61046927SAndroid Build Coastguard Worker                         add_read_dep(state,
167*61046927SAndroid Build Coastguard Worker                                      state->last_rf[n->inst->qpu.raddr_b], n);
168*61046927SAndroid Build Coastguard Worker                 }
169*61046927SAndroid Build Coastguard Worker                 break;
170*61046927SAndroid Build Coastguard Worker         default:
171*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_r[mux - V3D_QPU_MUX_R0], n);
172*61046927SAndroid Build Coastguard Worker                 break;
173*61046927SAndroid Build Coastguard Worker         }
174*61046927SAndroid Build Coastguard Worker }
175*61046927SAndroid Build Coastguard Worker 
176*61046927SAndroid Build Coastguard Worker 
177*61046927SAndroid Build Coastguard Worker static void
process_raddr_deps(struct schedule_state * state,struct schedule_node * n,uint8_t raddr,bool is_small_imm)178*61046927SAndroid Build Coastguard Worker process_raddr_deps(struct schedule_state *state, struct schedule_node *n,
179*61046927SAndroid Build Coastguard Worker                    uint8_t raddr, bool is_small_imm)
180*61046927SAndroid Build Coastguard Worker {
181*61046927SAndroid Build Coastguard Worker         assert(state->devinfo->ver >= 71);
182*61046927SAndroid Build Coastguard Worker 
183*61046927SAndroid Build Coastguard Worker         if (!is_small_imm)
184*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_rf[raddr], n);
185*61046927SAndroid Build Coastguard Worker }
186*61046927SAndroid Build Coastguard Worker 
187*61046927SAndroid Build Coastguard Worker static bool
tmu_write_is_sequence_terminator(uint32_t waddr)188*61046927SAndroid Build Coastguard Worker tmu_write_is_sequence_terminator(uint32_t waddr)
189*61046927SAndroid Build Coastguard Worker {
190*61046927SAndroid Build Coastguard Worker         switch (waddr) {
191*61046927SAndroid Build Coastguard Worker         case V3D_QPU_WADDR_TMUS:
192*61046927SAndroid Build Coastguard Worker         case V3D_QPU_WADDR_TMUSCM:
193*61046927SAndroid Build Coastguard Worker         case V3D_QPU_WADDR_TMUSF:
194*61046927SAndroid Build Coastguard Worker         case V3D_QPU_WADDR_TMUSLOD:
195*61046927SAndroid Build Coastguard Worker         case V3D_QPU_WADDR_TMUA:
196*61046927SAndroid Build Coastguard Worker         case V3D_QPU_WADDR_TMUAU:
197*61046927SAndroid Build Coastguard Worker                 return true;
198*61046927SAndroid Build Coastguard Worker         default:
199*61046927SAndroid Build Coastguard Worker                 return false;
200*61046927SAndroid Build Coastguard Worker         }
201*61046927SAndroid Build Coastguard Worker }
202*61046927SAndroid Build Coastguard Worker 
203*61046927SAndroid Build Coastguard Worker static bool
can_reorder_tmu_write(const struct v3d_device_info * devinfo,uint32_t waddr)204*61046927SAndroid Build Coastguard Worker can_reorder_tmu_write(const struct v3d_device_info *devinfo, uint32_t waddr)
205*61046927SAndroid Build Coastguard Worker {
206*61046927SAndroid Build Coastguard Worker         if (tmu_write_is_sequence_terminator(waddr))
207*61046927SAndroid Build Coastguard Worker                 return false;
208*61046927SAndroid Build Coastguard Worker 
209*61046927SAndroid Build Coastguard Worker         if (waddr == V3D_QPU_WADDR_TMUD)
210*61046927SAndroid Build Coastguard Worker                 return false;
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker         return true;
213*61046927SAndroid Build Coastguard Worker }
214*61046927SAndroid Build Coastguard Worker 
215*61046927SAndroid Build Coastguard Worker static void
process_waddr_deps(struct schedule_state * state,struct schedule_node * n,uint32_t waddr,bool magic)216*61046927SAndroid Build Coastguard Worker process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
217*61046927SAndroid Build Coastguard Worker                    uint32_t waddr, bool magic)
218*61046927SAndroid Build Coastguard Worker {
219*61046927SAndroid Build Coastguard Worker         if (!magic) {
220*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_rf[waddr], n);
221*61046927SAndroid Build Coastguard Worker         } else if (v3d_qpu_magic_waddr_is_tmu(state->devinfo, waddr)) {
222*61046927SAndroid Build Coastguard Worker                 if (can_reorder_tmu_write(state->devinfo, waddr))
223*61046927SAndroid Build Coastguard Worker                         add_read_dep(state, state->last_tmu_write, n);
224*61046927SAndroid Build Coastguard Worker                 else
225*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_tmu_write, n);
226*61046927SAndroid Build Coastguard Worker 
227*61046927SAndroid Build Coastguard Worker                 if (tmu_write_is_sequence_terminator(waddr))
228*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_tmu_config, n);
229*61046927SAndroid Build Coastguard Worker         } else if (v3d_qpu_magic_waddr_is_sfu(waddr)) {
230*61046927SAndroid Build Coastguard Worker                 /* Handled by v3d_qpu_writes_r4() check. */
231*61046927SAndroid Build Coastguard Worker         } else {
232*61046927SAndroid Build Coastguard Worker                 switch (waddr) {
233*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_R0:
234*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_R1:
235*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_R2:
236*61046927SAndroid Build Coastguard Worker                         add_write_dep(state,
237*61046927SAndroid Build Coastguard Worker                                       &state->last_r[waddr - V3D_QPU_WADDR_R0],
238*61046927SAndroid Build Coastguard Worker                                       n);
239*61046927SAndroid Build Coastguard Worker                         break;
240*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_R3:
241*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_R4:
242*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_R5:
243*61046927SAndroid Build Coastguard Worker                         /* Handled by v3d_qpu_writes_r*() checks below. */
244*61046927SAndroid Build Coastguard Worker                         break;
245*61046927SAndroid Build Coastguard Worker 
246*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_VPM:
247*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_VPMU:
248*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_vpm, n);
249*61046927SAndroid Build Coastguard Worker                         break;
250*61046927SAndroid Build Coastguard Worker 
251*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_TLB:
252*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_TLBU:
253*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_tlb, n);
254*61046927SAndroid Build Coastguard Worker                         break;
255*61046927SAndroid Build Coastguard Worker 
256*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_SYNC:
257*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_SYNCB:
258*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_SYNCU:
259*61046927SAndroid Build Coastguard Worker                         /* For CS barrier(): Sync against any other memory
260*61046927SAndroid Build Coastguard Worker                          * accesses.  There doesn't appear to be any need for
261*61046927SAndroid Build Coastguard Worker                          * barriers to affect ALU operations.
262*61046927SAndroid Build Coastguard Worker                          */
263*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_tmu_write, n);
264*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_tmu_read, n);
265*61046927SAndroid Build Coastguard Worker                         break;
266*61046927SAndroid Build Coastguard Worker 
267*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_UNIFA:
268*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_unifa, n);
269*61046927SAndroid Build Coastguard Worker                         break;
270*61046927SAndroid Build Coastguard Worker 
271*61046927SAndroid Build Coastguard Worker                 case V3D_QPU_WADDR_NOP:
272*61046927SAndroid Build Coastguard Worker                         break;
273*61046927SAndroid Build Coastguard Worker 
274*61046927SAndroid Build Coastguard Worker                 default:
275*61046927SAndroid Build Coastguard Worker                         fprintf(stderr, "Unknown waddr %d\n", waddr);
276*61046927SAndroid Build Coastguard Worker                         abort();
277*61046927SAndroid Build Coastguard Worker                 }
278*61046927SAndroid Build Coastguard Worker         }
279*61046927SAndroid Build Coastguard Worker }
280*61046927SAndroid Build Coastguard Worker 
281*61046927SAndroid Build Coastguard Worker /**
282*61046927SAndroid Build Coastguard Worker  * Common code for dependencies that need to be tracked both forward and
283*61046927SAndroid Build Coastguard Worker  * backward.
284*61046927SAndroid Build Coastguard Worker  *
285*61046927SAndroid Build Coastguard Worker  * This is for things like "all reads of r4 have to happen between the r4
286*61046927SAndroid Build Coastguard Worker  * writes that surround them".
287*61046927SAndroid Build Coastguard Worker  */
288*61046927SAndroid Build Coastguard Worker static void
calculate_deps(struct schedule_state * state,struct schedule_node * n)289*61046927SAndroid Build Coastguard Worker calculate_deps(struct schedule_state *state, struct schedule_node *n)
290*61046927SAndroid Build Coastguard Worker {
291*61046927SAndroid Build Coastguard Worker         const struct v3d_device_info *devinfo = state->devinfo;
292*61046927SAndroid Build Coastguard Worker         struct qinst *qinst = n->inst;
293*61046927SAndroid Build Coastguard Worker         struct v3d_qpu_instr *inst = &qinst->qpu;
294*61046927SAndroid Build Coastguard Worker         /* If the input and output segments are shared, then all VPM reads to
295*61046927SAndroid Build Coastguard Worker          * a location need to happen before all writes.  We handle this by
296*61046927SAndroid Build Coastguard Worker          * serializing all VPM operations for now.
297*61046927SAndroid Build Coastguard Worker          *
298*61046927SAndroid Build Coastguard Worker          * FIXME: we are assuming that the segments are shared. That is
299*61046927SAndroid Build Coastguard Worker          * correct right now as we are only using shared, but technically you
300*61046927SAndroid Build Coastguard Worker          * can choose.
301*61046927SAndroid Build Coastguard Worker          */
302*61046927SAndroid Build Coastguard Worker         bool separate_vpm_segment = false;
303*61046927SAndroid Build Coastguard Worker 
304*61046927SAndroid Build Coastguard Worker         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
305*61046927SAndroid Build Coastguard Worker                 if (inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS)
306*61046927SAndroid Build Coastguard Worker                         add_read_dep(state, state->last_sf, n);
307*61046927SAndroid Build Coastguard Worker 
308*61046927SAndroid Build Coastguard Worker                 /* XXX: BDI */
309*61046927SAndroid Build Coastguard Worker                 /* XXX: BDU */
310*61046927SAndroid Build Coastguard Worker                 /* XXX: ub */
311*61046927SAndroid Build Coastguard Worker                 /* XXX: raddr_a */
312*61046927SAndroid Build Coastguard Worker 
313*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_unif, n);
314*61046927SAndroid Build Coastguard Worker                 return;
315*61046927SAndroid Build Coastguard Worker         }
316*61046927SAndroid Build Coastguard Worker 
317*61046927SAndroid Build Coastguard Worker         assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
318*61046927SAndroid Build Coastguard Worker 
319*61046927SAndroid Build Coastguard Worker         /* XXX: LOAD_IMM */
320*61046927SAndroid Build Coastguard Worker 
321*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) {
322*61046927SAndroid Build Coastguard Worker                 if (devinfo->ver < 71) {
323*61046927SAndroid Build Coastguard Worker                         process_mux_deps(state, n, inst->alu.add.a.mux);
324*61046927SAndroid Build Coastguard Worker                 } else {
325*61046927SAndroid Build Coastguard Worker                         process_raddr_deps(state, n, inst->alu.add.a.raddr,
326*61046927SAndroid Build Coastguard Worker                                            inst->sig.small_imm_a);
327*61046927SAndroid Build Coastguard Worker                 }
328*61046927SAndroid Build Coastguard Worker         }
329*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) {
330*61046927SAndroid Build Coastguard Worker                 if (devinfo->ver < 71) {
331*61046927SAndroid Build Coastguard Worker                         process_mux_deps(state, n, inst->alu.add.b.mux);
332*61046927SAndroid Build Coastguard Worker                 } else {
333*61046927SAndroid Build Coastguard Worker                         process_raddr_deps(state, n, inst->alu.add.b.raddr,
334*61046927SAndroid Build Coastguard Worker                                            inst->sig.small_imm_b);
335*61046927SAndroid Build Coastguard Worker                 }
336*61046927SAndroid Build Coastguard Worker         }
337*61046927SAndroid Build Coastguard Worker 
338*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) {
339*61046927SAndroid Build Coastguard Worker                 if (devinfo->ver < 71) {
340*61046927SAndroid Build Coastguard Worker                         process_mux_deps(state, n, inst->alu.mul.a.mux);
341*61046927SAndroid Build Coastguard Worker                 } else {
342*61046927SAndroid Build Coastguard Worker                         process_raddr_deps(state, n, inst->alu.mul.a.raddr,
343*61046927SAndroid Build Coastguard Worker                                            inst->sig.small_imm_c);
344*61046927SAndroid Build Coastguard Worker                 }
345*61046927SAndroid Build Coastguard Worker         }
346*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) {
347*61046927SAndroid Build Coastguard Worker                 if (devinfo->ver < 71) {
348*61046927SAndroid Build Coastguard Worker                         process_mux_deps(state, n, inst->alu.mul.b.mux);
349*61046927SAndroid Build Coastguard Worker                 } else {
350*61046927SAndroid Build Coastguard Worker                         process_raddr_deps(state, n, inst->alu.mul.b.raddr,
351*61046927SAndroid Build Coastguard Worker                                            inst->sig.small_imm_d);
352*61046927SAndroid Build Coastguard Worker                 }
353*61046927SAndroid Build Coastguard Worker         }
354*61046927SAndroid Build Coastguard Worker 
355*61046927SAndroid Build Coastguard Worker         switch (inst->alu.add.op) {
356*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_VPMSETUP:
357*61046927SAndroid Build Coastguard Worker                 /* Could distinguish read/write by unpacking the uniform. */
358*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_vpm, n);
359*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_vpm_read, n);
360*61046927SAndroid Build Coastguard Worker                 break;
361*61046927SAndroid Build Coastguard Worker 
362*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_STVPMV:
363*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_STVPMD:
364*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_STVPMP:
365*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_vpm, n);
366*61046927SAndroid Build Coastguard Worker                 break;
367*61046927SAndroid Build Coastguard Worker 
368*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_LDVPMV_IN:
369*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_LDVPMD_IN:
370*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_LDVPMG_IN:
371*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_LDVPMP:
372*61046927SAndroid Build Coastguard Worker                 if (!separate_vpm_segment)
373*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_vpm, n);
374*61046927SAndroid Build Coastguard Worker                 break;
375*61046927SAndroid Build Coastguard Worker 
376*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_VPMWT:
377*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_vpm, n);
378*61046927SAndroid Build Coastguard Worker                 break;
379*61046927SAndroid Build Coastguard Worker 
380*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_MSF:
381*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_tlb, n);
382*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_setmsf, n);
383*61046927SAndroid Build Coastguard Worker                 break;
384*61046927SAndroid Build Coastguard Worker 
385*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_SETMSF:
386*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_setmsf, n);
387*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_tmu_write, n);
388*61046927SAndroid Build Coastguard Worker                 FALLTHROUGH;
389*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_SETREVF:
390*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_tlb, n);
391*61046927SAndroid Build Coastguard Worker                 break;
392*61046927SAndroid Build Coastguard Worker 
393*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_BALLOT:
394*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_BCASTF:
395*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_ALLEQ:
396*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_ALLFEQ:
397*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_setmsf, n);
398*61046927SAndroid Build Coastguard Worker                 break;
399*61046927SAndroid Build Coastguard Worker 
400*61046927SAndroid Build Coastguard Worker         default:
401*61046927SAndroid Build Coastguard Worker                 break;
402*61046927SAndroid Build Coastguard Worker         }
403*61046927SAndroid Build Coastguard Worker 
404*61046927SAndroid Build Coastguard Worker         switch (inst->alu.mul.op) {
405*61046927SAndroid Build Coastguard Worker         case V3D_QPU_M_MULTOP:
406*61046927SAndroid Build Coastguard Worker         case V3D_QPU_M_UMUL24:
407*61046927SAndroid Build Coastguard Worker                 /* MULTOP sets rtop, and UMUL24 implicitly reads rtop and
408*61046927SAndroid Build Coastguard Worker                  * resets it to 0.  We could possibly reorder umul24s relative
409*61046927SAndroid Build Coastguard Worker                  * to each other, but for now just keep all the MUL parts in
410*61046927SAndroid Build Coastguard Worker                  * order.
411*61046927SAndroid Build Coastguard Worker                  */
412*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_rtop, n);
413*61046927SAndroid Build Coastguard Worker                 break;
414*61046927SAndroid Build Coastguard Worker         default:
415*61046927SAndroid Build Coastguard Worker                 break;
416*61046927SAndroid Build Coastguard Worker         }
417*61046927SAndroid Build Coastguard Worker 
418*61046927SAndroid Build Coastguard Worker         if (inst->alu.add.op != V3D_QPU_A_NOP) {
419*61046927SAndroid Build Coastguard Worker                 process_waddr_deps(state, n, inst->alu.add.waddr,
420*61046927SAndroid Build Coastguard Worker                                    inst->alu.add.magic_write);
421*61046927SAndroid Build Coastguard Worker         }
422*61046927SAndroid Build Coastguard Worker         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
423*61046927SAndroid Build Coastguard Worker                 process_waddr_deps(state, n, inst->alu.mul.waddr,
424*61046927SAndroid Build Coastguard Worker                                    inst->alu.mul.magic_write);
425*61046927SAndroid Build Coastguard Worker         }
426*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
427*61046927SAndroid Build Coastguard Worker                 process_waddr_deps(state, n, inst->sig_addr,
428*61046927SAndroid Build Coastguard Worker                                    inst->sig_magic);
429*61046927SAndroid Build Coastguard Worker         }
430*61046927SAndroid Build Coastguard Worker 
431*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_r3(devinfo, inst))
432*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_r[3], n);
433*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_r4(devinfo, inst))
434*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_r[4], n);
435*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_r5(devinfo, inst))
436*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_r[5], n);
437*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_rf0_implicitly(devinfo, inst))
438*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_rf[0], n);
439*61046927SAndroid Build Coastguard Worker 
440*61046927SAndroid Build Coastguard Worker         /* If we add any more dependencies here we should consider whether we
441*61046927SAndroid Build Coastguard Worker          * also need to update qpu_inst_after_thrsw_valid_in_delay_slot.
442*61046927SAndroid Build Coastguard Worker          */
443*61046927SAndroid Build Coastguard Worker         if (inst->sig.thrsw) {
444*61046927SAndroid Build Coastguard Worker                 /* All accumulator contents and flags are undefined after the
445*61046927SAndroid Build Coastguard Worker                  * switch.
446*61046927SAndroid Build Coastguard Worker                  */
447*61046927SAndroid Build Coastguard Worker                 for (int i = 0; i < ARRAY_SIZE(state->last_r); i++)
448*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_r[i], n);
449*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_sf, n);
450*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_rtop, n);
451*61046927SAndroid Build Coastguard Worker 
452*61046927SAndroid Build Coastguard Worker                 /* Scoreboard-locking operations have to stay after the last
453*61046927SAndroid Build Coastguard Worker                  * thread switch.
454*61046927SAndroid Build Coastguard Worker                  */
455*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_tlb, n);
456*61046927SAndroid Build Coastguard Worker 
457*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_tmu_write, n);
458*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_tmu_config, n);
459*61046927SAndroid Build Coastguard Worker         }
460*61046927SAndroid Build Coastguard Worker 
461*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_waits_on_tmu(inst)) {
462*61046927SAndroid Build Coastguard Worker                 /* TMU loads are coming from a FIFO, so ordering is important.
463*61046927SAndroid Build Coastguard Worker                  */
464*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_tmu_read, n);
465*61046927SAndroid Build Coastguard Worker                 /* Keep TMU loads after their TMU lookup terminator */
466*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_tmu_config, n);
467*61046927SAndroid Build Coastguard Worker         }
468*61046927SAndroid Build Coastguard Worker 
469*61046927SAndroid Build Coastguard Worker         /* Allow wrtmuc to be reordered with other instructions in the
470*61046927SAndroid Build Coastguard Worker          * same TMU sequence by using a read dependency on the last TMU
471*61046927SAndroid Build Coastguard Worker          * sequence terminator.
472*61046927SAndroid Build Coastguard Worker          */
473*61046927SAndroid Build Coastguard Worker         if (inst->sig.wrtmuc)
474*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_tmu_config, n);
475*61046927SAndroid Build Coastguard Worker 
476*61046927SAndroid Build Coastguard Worker         if (inst->sig.ldtlb | inst->sig.ldtlbu)
477*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_tlb, n);
478*61046927SAndroid Build Coastguard Worker 
479*61046927SAndroid Build Coastguard Worker         if (inst->sig.ldvpm) {
480*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_vpm_read, n);
481*61046927SAndroid Build Coastguard Worker 
482*61046927SAndroid Build Coastguard Worker                 /* At least for now, we're doing shared I/O segments, so queue
483*61046927SAndroid Build Coastguard Worker                  * all writes after all reads.
484*61046927SAndroid Build Coastguard Worker                  */
485*61046927SAndroid Build Coastguard Worker                 if (!separate_vpm_segment)
486*61046927SAndroid Build Coastguard Worker                         add_write_dep(state, &state->last_vpm, n);
487*61046927SAndroid Build Coastguard Worker         }
488*61046927SAndroid Build Coastguard Worker 
489*61046927SAndroid Build Coastguard Worker         /* inst->sig.ldunif or sideband uniform read */
490*61046927SAndroid Build Coastguard Worker         if (vir_has_uniform(qinst))
491*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_unif, n);
492*61046927SAndroid Build Coastguard Worker 
493*61046927SAndroid Build Coastguard Worker         /* Both unifa and ldunifa must preserve ordering */
494*61046927SAndroid Build Coastguard Worker         if (inst->sig.ldunifa || inst->sig.ldunifarf)
495*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_unifa, n);
496*61046927SAndroid Build Coastguard Worker 
497*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_reads_flags(inst))
498*61046927SAndroid Build Coastguard Worker                 add_read_dep(state, state->last_sf, n);
499*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_flags(inst))
500*61046927SAndroid Build Coastguard Worker                 add_write_dep(state, &state->last_sf, n);
501*61046927SAndroid Build Coastguard Worker }
502*61046927SAndroid Build Coastguard Worker 
503*61046927SAndroid Build Coastguard Worker static void
calculate_forward_deps(struct v3d_compile * c,struct dag * dag,struct list_head * schedule_list)504*61046927SAndroid Build Coastguard Worker calculate_forward_deps(struct v3d_compile *c, struct dag *dag,
505*61046927SAndroid Build Coastguard Worker                        struct list_head *schedule_list)
506*61046927SAndroid Build Coastguard Worker {
507*61046927SAndroid Build Coastguard Worker         struct schedule_state state;
508*61046927SAndroid Build Coastguard Worker 
509*61046927SAndroid Build Coastguard Worker         memset(&state, 0, sizeof(state));
510*61046927SAndroid Build Coastguard Worker         state.dag = dag;
511*61046927SAndroid Build Coastguard Worker         state.devinfo = c->devinfo;
512*61046927SAndroid Build Coastguard Worker         state.dir = F;
513*61046927SAndroid Build Coastguard Worker 
514*61046927SAndroid Build Coastguard Worker         list_for_each_entry(struct schedule_node, node, schedule_list, link)
515*61046927SAndroid Build Coastguard Worker                 calculate_deps(&state, node);
516*61046927SAndroid Build Coastguard Worker }
517*61046927SAndroid Build Coastguard Worker 
518*61046927SAndroid Build Coastguard Worker static void
calculate_reverse_deps(struct v3d_compile * c,struct dag * dag,struct list_head * schedule_list)519*61046927SAndroid Build Coastguard Worker calculate_reverse_deps(struct v3d_compile *c, struct dag *dag,
520*61046927SAndroid Build Coastguard Worker                        struct list_head *schedule_list)
521*61046927SAndroid Build Coastguard Worker {
522*61046927SAndroid Build Coastguard Worker         struct schedule_state state;
523*61046927SAndroid Build Coastguard Worker 
524*61046927SAndroid Build Coastguard Worker         memset(&state, 0, sizeof(state));
525*61046927SAndroid Build Coastguard Worker         state.dag = dag;
526*61046927SAndroid Build Coastguard Worker         state.devinfo = c->devinfo;
527*61046927SAndroid Build Coastguard Worker         state.dir = R;
528*61046927SAndroid Build Coastguard Worker 
529*61046927SAndroid Build Coastguard Worker         list_for_each_entry_rev(struct schedule_node, node, schedule_list,
530*61046927SAndroid Build Coastguard Worker                                 link) {
531*61046927SAndroid Build Coastguard Worker                 calculate_deps(&state, (struct schedule_node *)node);
532*61046927SAndroid Build Coastguard Worker         }
533*61046927SAndroid Build Coastguard Worker }
534*61046927SAndroid Build Coastguard Worker 
535*61046927SAndroid Build Coastguard Worker struct choose_scoreboard {
536*61046927SAndroid Build Coastguard Worker         struct dag *dag;
537*61046927SAndroid Build Coastguard Worker         int tick;
538*61046927SAndroid Build Coastguard Worker         int last_magic_sfu_write_tick;
539*61046927SAndroid Build Coastguard Worker         int last_stallable_sfu_reg;
540*61046927SAndroid Build Coastguard Worker         int last_stallable_sfu_tick;
541*61046927SAndroid Build Coastguard Worker         int last_ldvary_tick;
542*61046927SAndroid Build Coastguard Worker         int last_unifa_write_tick;
543*61046927SAndroid Build Coastguard Worker         int last_uniforms_reset_tick;
544*61046927SAndroid Build Coastguard Worker         int last_thrsw_tick;
545*61046927SAndroid Build Coastguard Worker         int last_branch_tick;
546*61046927SAndroid Build Coastguard Worker         int last_setmsf_tick;
547*61046927SAndroid Build Coastguard Worker         bool first_thrsw_emitted;
548*61046927SAndroid Build Coastguard Worker         bool last_thrsw_emitted;
549*61046927SAndroid Build Coastguard Worker         bool fixup_ldvary;
550*61046927SAndroid Build Coastguard Worker         int ldvary_count;
551*61046927SAndroid Build Coastguard Worker         int pending_ldtmu_count;
552*61046927SAndroid Build Coastguard Worker         bool first_ldtmu_after_thrsw;
553*61046927SAndroid Build Coastguard Worker 
554*61046927SAndroid Build Coastguard Worker         /* V3D 7.x */
555*61046927SAndroid Build Coastguard Worker         int last_implicit_rf0_write_tick;
556*61046927SAndroid Build Coastguard Worker         bool has_rf0_flops_conflict;
557*61046927SAndroid Build Coastguard Worker };
558*61046927SAndroid Build Coastguard Worker 
559*61046927SAndroid Build Coastguard Worker static bool
mux_reads_too_soon(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst,enum v3d_qpu_mux mux)560*61046927SAndroid Build Coastguard Worker mux_reads_too_soon(struct choose_scoreboard *scoreboard,
561*61046927SAndroid Build Coastguard Worker                    const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
562*61046927SAndroid Build Coastguard Worker {
563*61046927SAndroid Build Coastguard Worker         switch (mux) {
564*61046927SAndroid Build Coastguard Worker         case V3D_QPU_MUX_R4:
565*61046927SAndroid Build Coastguard Worker                 if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick <= 2)
566*61046927SAndroid Build Coastguard Worker                         return true;
567*61046927SAndroid Build Coastguard Worker                 break;
568*61046927SAndroid Build Coastguard Worker 
569*61046927SAndroid Build Coastguard Worker         case V3D_QPU_MUX_R5:
570*61046927SAndroid Build Coastguard Worker                 if (scoreboard->tick - scoreboard->last_ldvary_tick <= 1)
571*61046927SAndroid Build Coastguard Worker                         return true;
572*61046927SAndroid Build Coastguard Worker                 break;
573*61046927SAndroid Build Coastguard Worker         default:
574*61046927SAndroid Build Coastguard Worker                 break;
575*61046927SAndroid Build Coastguard Worker         }
576*61046927SAndroid Build Coastguard Worker 
577*61046927SAndroid Build Coastguard Worker         return false;
578*61046927SAndroid Build Coastguard Worker }
579*61046927SAndroid Build Coastguard Worker 
580*61046927SAndroid Build Coastguard Worker static bool
reads_too_soon(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst,uint8_t raddr)581*61046927SAndroid Build Coastguard Worker reads_too_soon(struct choose_scoreboard *scoreboard,
582*61046927SAndroid Build Coastguard Worker                const struct v3d_qpu_instr *inst, uint8_t raddr)
583*61046927SAndroid Build Coastguard Worker {
584*61046927SAndroid Build Coastguard Worker         switch (raddr) {
585*61046927SAndroid Build Coastguard Worker         case 0: /* ldvary delayed write of C coefficient to rf0 */
586*61046927SAndroid Build Coastguard Worker                 if (scoreboard->tick - scoreboard->last_ldvary_tick <= 1)
587*61046927SAndroid Build Coastguard Worker                         return true;
588*61046927SAndroid Build Coastguard Worker                 break;
589*61046927SAndroid Build Coastguard Worker         default:
590*61046927SAndroid Build Coastguard Worker                 break;
591*61046927SAndroid Build Coastguard Worker         }
592*61046927SAndroid Build Coastguard Worker 
593*61046927SAndroid Build Coastguard Worker         return false;
594*61046927SAndroid Build Coastguard Worker }
595*61046927SAndroid Build Coastguard Worker 
596*61046927SAndroid Build Coastguard Worker static bool
reads_too_soon_after_write(const struct v3d_device_info * devinfo,struct choose_scoreboard * scoreboard,struct qinst * qinst)597*61046927SAndroid Build Coastguard Worker reads_too_soon_after_write(const struct v3d_device_info *devinfo,
598*61046927SAndroid Build Coastguard Worker                            struct choose_scoreboard *scoreboard,
599*61046927SAndroid Build Coastguard Worker                            struct qinst *qinst)
600*61046927SAndroid Build Coastguard Worker {
601*61046927SAndroid Build Coastguard Worker         const struct v3d_qpu_instr *inst = &qinst->qpu;
602*61046927SAndroid Build Coastguard Worker 
603*61046927SAndroid Build Coastguard Worker         /* XXX: Branching off of raddr. */
604*61046927SAndroid Build Coastguard Worker         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
605*61046927SAndroid Build Coastguard Worker                 return false;
606*61046927SAndroid Build Coastguard Worker 
607*61046927SAndroid Build Coastguard Worker         assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
608*61046927SAndroid Build Coastguard Worker 
609*61046927SAndroid Build Coastguard Worker         if (inst->alu.add.op != V3D_QPU_A_NOP) {
610*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0) {
611*61046927SAndroid Build Coastguard Worker                         if (devinfo->ver < 71) {
612*61046927SAndroid Build Coastguard Worker                                 if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.a.mux))
613*61046927SAndroid Build Coastguard Worker                                         return true;
614*61046927SAndroid Build Coastguard Worker                         } else {
615*61046927SAndroid Build Coastguard Worker                                 if (reads_too_soon(scoreboard, inst, inst->alu.add.a.raddr))
616*61046927SAndroid Build Coastguard Worker                                         return true;
617*61046927SAndroid Build Coastguard Worker                         }
618*61046927SAndroid Build Coastguard Worker                 }
619*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1) {
620*61046927SAndroid Build Coastguard Worker                         if (devinfo->ver < 71) {
621*61046927SAndroid Build Coastguard Worker                                 if (mux_reads_too_soon(scoreboard, inst, inst->alu.add.b.mux))
622*61046927SAndroid Build Coastguard Worker                                         return true;
623*61046927SAndroid Build Coastguard Worker                         } else {
624*61046927SAndroid Build Coastguard Worker                                 if (reads_too_soon(scoreboard, inst, inst->alu.add.b.raddr))
625*61046927SAndroid Build Coastguard Worker                                         return true;
626*61046927SAndroid Build Coastguard Worker                         }
627*61046927SAndroid Build Coastguard Worker                 }
628*61046927SAndroid Build Coastguard Worker         }
629*61046927SAndroid Build Coastguard Worker 
630*61046927SAndroid Build Coastguard Worker         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
631*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0) {
632*61046927SAndroid Build Coastguard Worker                         if (devinfo->ver < 71) {
633*61046927SAndroid Build Coastguard Worker                                 if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a.mux))
634*61046927SAndroid Build Coastguard Worker                                         return true;
635*61046927SAndroid Build Coastguard Worker                         } else {
636*61046927SAndroid Build Coastguard Worker                                 if (reads_too_soon(scoreboard, inst, inst->alu.mul.a.raddr))
637*61046927SAndroid Build Coastguard Worker                                         return true;
638*61046927SAndroid Build Coastguard Worker                         }
639*61046927SAndroid Build Coastguard Worker                 }
640*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1) {
641*61046927SAndroid Build Coastguard Worker                         if (devinfo->ver < 71) {
642*61046927SAndroid Build Coastguard Worker                                 if (mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b.mux))
643*61046927SAndroid Build Coastguard Worker                                         return true;
644*61046927SAndroid Build Coastguard Worker                         } else {
645*61046927SAndroid Build Coastguard Worker                                 if (reads_too_soon(scoreboard, inst, inst->alu.mul.b.raddr))
646*61046927SAndroid Build Coastguard Worker                                         return true;
647*61046927SAndroid Build Coastguard Worker                         }
648*61046927SAndroid Build Coastguard Worker                 }
649*61046927SAndroid Build Coastguard Worker         }
650*61046927SAndroid Build Coastguard Worker 
651*61046927SAndroid Build Coastguard Worker         /* XXX: imm */
652*61046927SAndroid Build Coastguard Worker 
653*61046927SAndroid Build Coastguard Worker         return false;
654*61046927SAndroid Build Coastguard Worker }
655*61046927SAndroid Build Coastguard Worker 
656*61046927SAndroid Build Coastguard Worker static bool
writes_too_soon_after_write(const struct v3d_device_info * devinfo,struct choose_scoreboard * scoreboard,struct qinst * qinst)657*61046927SAndroid Build Coastguard Worker writes_too_soon_after_write(const struct v3d_device_info *devinfo,
658*61046927SAndroid Build Coastguard Worker                             struct choose_scoreboard *scoreboard,
659*61046927SAndroid Build Coastguard Worker                             struct qinst *qinst)
660*61046927SAndroid Build Coastguard Worker {
661*61046927SAndroid Build Coastguard Worker         const struct v3d_qpu_instr *inst = &qinst->qpu;
662*61046927SAndroid Build Coastguard Worker 
663*61046927SAndroid Build Coastguard Worker         /* Don't schedule any other r4 write too soon after an SFU write.
664*61046927SAndroid Build Coastguard Worker          * This would normally be prevented by dependency tracking, but might
665*61046927SAndroid Build Coastguard Worker          * occur if a dead SFU computation makes it to scheduling.
666*61046927SAndroid Build Coastguard Worker          */
667*61046927SAndroid Build Coastguard Worker         if (scoreboard->tick - scoreboard->last_magic_sfu_write_tick < 2 &&
668*61046927SAndroid Build Coastguard Worker             v3d_qpu_writes_r4(devinfo, inst))
669*61046927SAndroid Build Coastguard Worker                 return true;
670*61046927SAndroid Build Coastguard Worker 
671*61046927SAndroid Build Coastguard Worker         if (devinfo->ver == 42)
672*61046927SAndroid Build Coastguard Worker            return false;
673*61046927SAndroid Build Coastguard Worker 
674*61046927SAndroid Build Coastguard Worker         /* Don't schedule anything that writes rf0 right after ldvary, since
675*61046927SAndroid Build Coastguard Worker          * that would clash with the ldvary's delayed rf0 write (the exception
676*61046927SAndroid Build Coastguard Worker          * is another ldvary, since its implicit rf0 write would also have
677*61046927SAndroid Build Coastguard Worker          * one cycle of delay and would not clash).
678*61046927SAndroid Build Coastguard Worker          */
679*61046927SAndroid Build Coastguard Worker         if (scoreboard->last_ldvary_tick + 1 == scoreboard->tick &&
680*61046927SAndroid Build Coastguard Worker             (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) ||
681*61046927SAndroid Build Coastguard Worker              (v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
682*61046927SAndroid Build Coastguard Worker               !inst->sig.ldvary))) {
683*61046927SAndroid Build Coastguard Worker             return true;
684*61046927SAndroid Build Coastguard Worker        }
685*61046927SAndroid Build Coastguard Worker 
686*61046927SAndroid Build Coastguard Worker         return false;
687*61046927SAndroid Build Coastguard Worker }
688*61046927SAndroid Build Coastguard Worker 
689*61046927SAndroid Build Coastguard Worker static bool
scoreboard_is_locked(struct choose_scoreboard * scoreboard,bool lock_scoreboard_on_first_thrsw)690*61046927SAndroid Build Coastguard Worker scoreboard_is_locked(struct choose_scoreboard *scoreboard,
691*61046927SAndroid Build Coastguard Worker                      bool lock_scoreboard_on_first_thrsw)
692*61046927SAndroid Build Coastguard Worker {
693*61046927SAndroid Build Coastguard Worker         if (lock_scoreboard_on_first_thrsw) {
694*61046927SAndroid Build Coastguard Worker                 return scoreboard->first_thrsw_emitted &&
695*61046927SAndroid Build Coastguard Worker                        scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
696*61046927SAndroid Build Coastguard Worker         }
697*61046927SAndroid Build Coastguard Worker 
698*61046927SAndroid Build Coastguard Worker         return scoreboard->last_thrsw_emitted &&
699*61046927SAndroid Build Coastguard Worker                scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
700*61046927SAndroid Build Coastguard Worker }
701*61046927SAndroid Build Coastguard Worker 
702*61046927SAndroid Build Coastguard Worker static bool
pixel_scoreboard_too_soon(struct v3d_compile * c,struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst)703*61046927SAndroid Build Coastguard Worker pixel_scoreboard_too_soon(struct v3d_compile *c,
704*61046927SAndroid Build Coastguard Worker                           struct choose_scoreboard *scoreboard,
705*61046927SAndroid Build Coastguard Worker                           const struct v3d_qpu_instr *inst)
706*61046927SAndroid Build Coastguard Worker {
707*61046927SAndroid Build Coastguard Worker         return qpu_inst_is_tlb(inst) &&
708*61046927SAndroid Build Coastguard Worker                !scoreboard_is_locked(scoreboard,
709*61046927SAndroid Build Coastguard Worker                                      c->lock_scoreboard_on_first_thrsw);
710*61046927SAndroid Build Coastguard Worker }
711*61046927SAndroid Build Coastguard Worker 
712*61046927SAndroid Build Coastguard Worker static bool
qpu_instruction_uses_rf(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst,uint32_t waddr)713*61046927SAndroid Build Coastguard Worker qpu_instruction_uses_rf(const struct v3d_device_info *devinfo,
714*61046927SAndroid Build Coastguard Worker                         const struct v3d_qpu_instr *inst,
715*61046927SAndroid Build Coastguard Worker                         uint32_t waddr) {
716*61046927SAndroid Build Coastguard Worker 
717*61046927SAndroid Build Coastguard Worker         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
718*61046927SAndroid Build Coastguard Worker            return false;
719*61046927SAndroid Build Coastguard Worker 
720*61046927SAndroid Build Coastguard Worker         if (devinfo->ver < 71) {
721*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
722*61046927SAndroid Build Coastguard Worker                     inst->raddr_a == waddr)
723*61046927SAndroid Build Coastguard Worker                         return true;
724*61046927SAndroid Build Coastguard Worker 
725*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
726*61046927SAndroid Build Coastguard Worker                     !inst->sig.small_imm_b && (inst->raddr_b == waddr))
727*61046927SAndroid Build Coastguard Worker                         return true;
728*61046927SAndroid Build Coastguard Worker         } else {
729*61046927SAndroid Build Coastguard Worker                 if (v3d71_qpu_reads_raddr(inst, waddr))
730*61046927SAndroid Build Coastguard Worker                         return true;
731*61046927SAndroid Build Coastguard Worker         }
732*61046927SAndroid Build Coastguard Worker 
733*61046927SAndroid Build Coastguard Worker         return false;
734*61046927SAndroid Build Coastguard Worker }
735*61046927SAndroid Build Coastguard Worker 
736*61046927SAndroid Build Coastguard Worker static bool
read_stalls(const struct v3d_device_info * devinfo,struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst)737*61046927SAndroid Build Coastguard Worker read_stalls(const struct v3d_device_info *devinfo,
738*61046927SAndroid Build Coastguard Worker             struct choose_scoreboard *scoreboard,
739*61046927SAndroid Build Coastguard Worker             const struct v3d_qpu_instr *inst)
740*61046927SAndroid Build Coastguard Worker {
741*61046927SAndroid Build Coastguard Worker         return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 &&
742*61046927SAndroid Build Coastguard Worker                 qpu_instruction_uses_rf(devinfo, inst,
743*61046927SAndroid Build Coastguard Worker                                         scoreboard->last_stallable_sfu_reg);
744*61046927SAndroid Build Coastguard Worker }
745*61046927SAndroid Build Coastguard Worker 
746*61046927SAndroid Build Coastguard Worker /* We define a max schedule priority to allow negative priorities as result of
747*61046927SAndroid Build Coastguard Worker  * subtracting this max when an instruction stalls. So instructions that
748*61046927SAndroid Build Coastguard Worker  * stall have lower priority than regular instructions. */
749*61046927SAndroid Build Coastguard Worker #define MAX_SCHEDULE_PRIORITY 16
750*61046927SAndroid Build Coastguard Worker 
751*61046927SAndroid Build Coastguard Worker static int
get_instruction_priority(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)752*61046927SAndroid Build Coastguard Worker get_instruction_priority(const struct v3d_device_info *devinfo,
753*61046927SAndroid Build Coastguard Worker                          const struct v3d_qpu_instr *inst)
754*61046927SAndroid Build Coastguard Worker {
755*61046927SAndroid Build Coastguard Worker         uint32_t baseline_score;
756*61046927SAndroid Build Coastguard Worker         uint32_t next_score = 0;
757*61046927SAndroid Build Coastguard Worker 
758*61046927SAndroid Build Coastguard Worker         /* Schedule TLB operations as late as possible, to get more
759*61046927SAndroid Build Coastguard Worker          * parallelism between shaders.
760*61046927SAndroid Build Coastguard Worker          */
761*61046927SAndroid Build Coastguard Worker         if (qpu_inst_is_tlb(inst))
762*61046927SAndroid Build Coastguard Worker                 return next_score;
763*61046927SAndroid Build Coastguard Worker         next_score++;
764*61046927SAndroid Build Coastguard Worker 
765*61046927SAndroid Build Coastguard Worker         /* Empirical testing shows that using priorities to hide latency of
766*61046927SAndroid Build Coastguard Worker          * TMU operations when scheduling QPU leads to slightly worse
767*61046927SAndroid Build Coastguard Worker          * performance, even at 2 threads. We think this is because the thread
768*61046927SAndroid Build Coastguard Worker          * switching is already quite effective at hiding latency and NIR
769*61046927SAndroid Build Coastguard Worker          * scheduling (and possibly TMU pipelining too) are sufficient to hide
770*61046927SAndroid Build Coastguard Worker          * TMU latency, so piling up on that here doesn't provide any benefits
771*61046927SAndroid Build Coastguard Worker          * and instead may cause us to postpone critical paths that depend on
772*61046927SAndroid Build Coastguard Worker          * the TMU results.
773*61046927SAndroid Build Coastguard Worker          */
774*61046927SAndroid Build Coastguard Worker #if 0
775*61046927SAndroid Build Coastguard Worker         /* Schedule texture read results collection late to hide latency. */
776*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_waits_on_tmu(inst))
777*61046927SAndroid Build Coastguard Worker                 return next_score;
778*61046927SAndroid Build Coastguard Worker         next_score++;
779*61046927SAndroid Build Coastguard Worker #endif
780*61046927SAndroid Build Coastguard Worker 
781*61046927SAndroid Build Coastguard Worker         /* Default score for things that aren't otherwise special. */
782*61046927SAndroid Build Coastguard Worker         baseline_score = next_score;
783*61046927SAndroid Build Coastguard Worker         next_score++;
784*61046927SAndroid Build Coastguard Worker 
785*61046927SAndroid Build Coastguard Worker #if 0
786*61046927SAndroid Build Coastguard Worker         /* Schedule texture read setup early to hide their latency better. */
787*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_tmu(devinfo, inst))
788*61046927SAndroid Build Coastguard Worker                 return next_score;
789*61046927SAndroid Build Coastguard Worker         next_score++;
790*61046927SAndroid Build Coastguard Worker #endif
791*61046927SAndroid Build Coastguard Worker 
792*61046927SAndroid Build Coastguard Worker         /* We should increase the maximum if we assert here */
793*61046927SAndroid Build Coastguard Worker         assert(next_score < MAX_SCHEDULE_PRIORITY);
794*61046927SAndroid Build Coastguard Worker 
795*61046927SAndroid Build Coastguard Worker         return baseline_score;
796*61046927SAndroid Build Coastguard Worker }
797*61046927SAndroid Build Coastguard Worker 
798*61046927SAndroid Build Coastguard Worker enum {
799*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_VPM_READ           = (1 << 0),
800*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_VPM_WRITE          = (1 << 1),
801*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_VPM_WAIT           = (1 << 2),
802*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_SFU                = (1 << 3),
803*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_TMU_WRITE          = (1 << 4),
804*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_TMU_READ           = (1 << 5),
805*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_TMU_WAIT           = (1 << 6),
806*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_TMU_WRTMUC_SIG     = (1 << 7),
807*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_TSY                = (1 << 8),
808*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_TLB_READ           = (1 << 9),
809*61046927SAndroid Build Coastguard Worker         V3D_PERIPHERAL_TLB_WRITE          = (1 << 10),
810*61046927SAndroid Build Coastguard Worker };
811*61046927SAndroid Build Coastguard Worker 
812*61046927SAndroid Build Coastguard Worker static uint32_t
qpu_peripherals(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * inst)813*61046927SAndroid Build Coastguard Worker qpu_peripherals(const struct v3d_device_info *devinfo,
814*61046927SAndroid Build Coastguard Worker                 const struct v3d_qpu_instr *inst)
815*61046927SAndroid Build Coastguard Worker {
816*61046927SAndroid Build Coastguard Worker         uint32_t result = 0;
817*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_reads_vpm(inst))
818*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_VPM_READ;
819*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_vpm(inst))
820*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_VPM_WRITE;
821*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_waits_vpm(inst))
822*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_VPM_WAIT;
823*61046927SAndroid Build Coastguard Worker 
824*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_tmu(devinfo, inst))
825*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_TMU_WRITE;
826*61046927SAndroid Build Coastguard Worker         if (inst->sig.ldtmu)
827*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_TMU_READ;
828*61046927SAndroid Build Coastguard Worker         if (inst->sig.wrtmuc)
829*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_TMU_WRTMUC_SIG;
830*61046927SAndroid Build Coastguard Worker 
831*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_uses_sfu(inst))
832*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_SFU;
833*61046927SAndroid Build Coastguard Worker 
834*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_reads_tlb(inst))
835*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_TLB_READ;
836*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_tlb(inst))
837*61046927SAndroid Build Coastguard Worker                 result |= V3D_PERIPHERAL_TLB_WRITE;
838*61046927SAndroid Build Coastguard Worker 
839*61046927SAndroid Build Coastguard Worker         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
840*61046927SAndroid Build Coastguard Worker                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
841*61046927SAndroid Build Coastguard Worker                     inst->alu.add.magic_write &&
842*61046927SAndroid Build Coastguard Worker                     v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) {
843*61046927SAndroid Build Coastguard Worker                         result |= V3D_PERIPHERAL_TSY;
844*61046927SAndroid Build Coastguard Worker                 }
845*61046927SAndroid Build Coastguard Worker 
846*61046927SAndroid Build Coastguard Worker                 if (inst->alu.add.op == V3D_QPU_A_TMUWT)
847*61046927SAndroid Build Coastguard Worker                         result |= V3D_PERIPHERAL_TMU_WAIT;
848*61046927SAndroid Build Coastguard Worker         }
849*61046927SAndroid Build Coastguard Worker 
850*61046927SAndroid Build Coastguard Worker         return result;
851*61046927SAndroid Build Coastguard Worker }
852*61046927SAndroid Build Coastguard Worker 
853*61046927SAndroid Build Coastguard Worker static bool
qpu_compatible_peripheral_access(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * a,const struct v3d_qpu_instr * b)854*61046927SAndroid Build Coastguard Worker qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
855*61046927SAndroid Build Coastguard Worker                                  const struct v3d_qpu_instr *a,
856*61046927SAndroid Build Coastguard Worker                                  const struct v3d_qpu_instr *b)
857*61046927SAndroid Build Coastguard Worker {
858*61046927SAndroid Build Coastguard Worker         const uint32_t a_peripherals = qpu_peripherals(devinfo, a);
859*61046927SAndroid Build Coastguard Worker         const uint32_t b_peripherals = qpu_peripherals(devinfo, b);
860*61046927SAndroid Build Coastguard Worker 
861*61046927SAndroid Build Coastguard Worker         /* We can always do one peripheral access per instruction. */
862*61046927SAndroid Build Coastguard Worker         if (util_bitcount(a_peripherals) + util_bitcount(b_peripherals) <= 1)
863*61046927SAndroid Build Coastguard Worker                 return true;
864*61046927SAndroid Build Coastguard Worker 
865*61046927SAndroid Build Coastguard Worker         /* V3D 4.x can't do more than one peripheral access except in a
866*61046927SAndroid Build Coastguard Worker          * few cases:
867*61046927SAndroid Build Coastguard Worker          */
868*61046927SAndroid Build Coastguard Worker         if (devinfo->ver == 42) {
869*61046927SAndroid Build Coastguard Worker                 /* WRTMUC signal with TMU register write (other than tmuc). */
870*61046927SAndroid Build Coastguard Worker                 if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
871*61046927SAndroid Build Coastguard Worker                     b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
872*61046927SAndroid Build Coastguard Worker                         return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
873*61046927SAndroid Build Coastguard Worker                 }
874*61046927SAndroid Build Coastguard Worker                 if (b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
875*61046927SAndroid Build Coastguard Worker                     a_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
876*61046927SAndroid Build Coastguard Worker                         return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
877*61046927SAndroid Build Coastguard Worker                 }
878*61046927SAndroid Build Coastguard Worker 
879*61046927SAndroid Build Coastguard Worker                 /* TMU read with VPM read/write. */
880*61046927SAndroid Build Coastguard Worker                 if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
881*61046927SAndroid Build Coastguard Worker                     (b_peripherals == V3D_PERIPHERAL_VPM_READ ||
882*61046927SAndroid Build Coastguard Worker                      b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
883*61046927SAndroid Build Coastguard Worker                         return true;
884*61046927SAndroid Build Coastguard Worker                 }
885*61046927SAndroid Build Coastguard Worker                 if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
886*61046927SAndroid Build Coastguard Worker                     (a_peripherals == V3D_PERIPHERAL_VPM_READ ||
887*61046927SAndroid Build Coastguard Worker                      a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
888*61046927SAndroid Build Coastguard Worker                         return true;
889*61046927SAndroid Build Coastguard Worker                 }
890*61046927SAndroid Build Coastguard Worker 
891*61046927SAndroid Build Coastguard Worker                 return false;
892*61046927SAndroid Build Coastguard Worker         }
893*61046927SAndroid Build Coastguard Worker 
894*61046927SAndroid Build Coastguard Worker         /* V3D 7.x can't have more than one of these restricted peripherals */
895*61046927SAndroid Build Coastguard Worker         const uint32_t restricted = V3D_PERIPHERAL_TMU_WRITE |
896*61046927SAndroid Build Coastguard Worker                                     V3D_PERIPHERAL_TMU_WRTMUC_SIG |
897*61046927SAndroid Build Coastguard Worker                                     V3D_PERIPHERAL_TSY |
898*61046927SAndroid Build Coastguard Worker                                     V3D_PERIPHERAL_TLB_READ |
899*61046927SAndroid Build Coastguard Worker                                     V3D_PERIPHERAL_SFU |
900*61046927SAndroid Build Coastguard Worker                                     V3D_PERIPHERAL_VPM_READ |
901*61046927SAndroid Build Coastguard Worker                                     V3D_PERIPHERAL_VPM_WRITE;
902*61046927SAndroid Build Coastguard Worker 
903*61046927SAndroid Build Coastguard Worker         const uint32_t a_restricted = a_peripherals & restricted;
904*61046927SAndroid Build Coastguard Worker         const uint32_t b_restricted = b_peripherals & restricted;
905*61046927SAndroid Build Coastguard Worker         if (a_restricted && b_restricted) {
906*61046927SAndroid Build Coastguard Worker                 /* WRTMUC signal with TMU register write (other than tmuc) is
907*61046927SAndroid Build Coastguard Worker                  * allowed though.
908*61046927SAndroid Build Coastguard Worker                  */
909*61046927SAndroid Build Coastguard Worker                 if (!((a_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
910*61046927SAndroid Build Coastguard Worker                        b_restricted == V3D_PERIPHERAL_TMU_WRITE &&
911*61046927SAndroid Build Coastguard Worker                        v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) ||
912*61046927SAndroid Build Coastguard Worker                       (b_restricted == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
913*61046927SAndroid Build Coastguard Worker                        a_restricted == V3D_PERIPHERAL_TMU_WRITE &&
914*61046927SAndroid Build Coastguard Worker                        v3d_qpu_writes_tmu_not_tmuc(devinfo, a)))) {
915*61046927SAndroid Build Coastguard Worker                         return false;
916*61046927SAndroid Build Coastguard Worker                 }
917*61046927SAndroid Build Coastguard Worker         }
918*61046927SAndroid Build Coastguard Worker 
919*61046927SAndroid Build Coastguard Worker         /* Only one TMU read per instruction */
920*61046927SAndroid Build Coastguard Worker         if ((a_peripherals & V3D_PERIPHERAL_TMU_READ) &&
921*61046927SAndroid Build Coastguard Worker             (b_peripherals & V3D_PERIPHERAL_TMU_READ)) {
922*61046927SAndroid Build Coastguard Worker                 return false;
923*61046927SAndroid Build Coastguard Worker         }
924*61046927SAndroid Build Coastguard Worker 
925*61046927SAndroid Build Coastguard Worker         /* Only one TLB access per instruction */
926*61046927SAndroid Build Coastguard Worker         if ((a_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
927*61046927SAndroid Build Coastguard Worker                               V3D_PERIPHERAL_TLB_READ)) &&
928*61046927SAndroid Build Coastguard Worker             (b_peripherals & (V3D_PERIPHERAL_TLB_WRITE |
929*61046927SAndroid Build Coastguard Worker                               V3D_PERIPHERAL_TLB_READ))) {
930*61046927SAndroid Build Coastguard Worker                 return false;
931*61046927SAndroid Build Coastguard Worker         }
932*61046927SAndroid Build Coastguard Worker 
933*61046927SAndroid Build Coastguard Worker         return true;
934*61046927SAndroid Build Coastguard Worker }
935*61046927SAndroid Build Coastguard Worker 
936*61046927SAndroid Build Coastguard Worker /* Compute a bitmask of which rf registers are used between
937*61046927SAndroid Build Coastguard Worker  * the two instructions.
938*61046927SAndroid Build Coastguard Worker  */
939*61046927SAndroid Build Coastguard Worker static uint64_t
qpu_raddrs_used(const struct v3d_qpu_instr * a,const struct v3d_qpu_instr * b)940*61046927SAndroid Build Coastguard Worker qpu_raddrs_used(const struct v3d_qpu_instr *a,
941*61046927SAndroid Build Coastguard Worker                 const struct v3d_qpu_instr *b)
942*61046927SAndroid Build Coastguard Worker {
943*61046927SAndroid Build Coastguard Worker         assert(a->type == V3D_QPU_INSTR_TYPE_ALU);
944*61046927SAndroid Build Coastguard Worker         assert(b->type == V3D_QPU_INSTR_TYPE_ALU);
945*61046927SAndroid Build Coastguard Worker 
946*61046927SAndroid Build Coastguard Worker         uint64_t raddrs_used = 0;
947*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_uses_mux(a, V3D_QPU_MUX_A))
948*61046927SAndroid Build Coastguard Worker                 raddrs_used |= (UINT64_C(1) << a->raddr_a);
949*61046927SAndroid Build Coastguard Worker         if (!a->sig.small_imm_b && v3d_qpu_uses_mux(a, V3D_QPU_MUX_B))
950*61046927SAndroid Build Coastguard Worker                 raddrs_used |= (UINT64_C(1) << a->raddr_b);
951*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_uses_mux(b, V3D_QPU_MUX_A))
952*61046927SAndroid Build Coastguard Worker                 raddrs_used |= (UINT64_C(1) << b->raddr_a);
953*61046927SAndroid Build Coastguard Worker         if (!b->sig.small_imm_b && v3d_qpu_uses_mux(b, V3D_QPU_MUX_B))
954*61046927SAndroid Build Coastguard Worker                 raddrs_used |= (UINT64_C(1) << b->raddr_b);
955*61046927SAndroid Build Coastguard Worker 
956*61046927SAndroid Build Coastguard Worker         return raddrs_used;
957*61046927SAndroid Build Coastguard Worker }
958*61046927SAndroid Build Coastguard Worker 
959*61046927SAndroid Build Coastguard Worker /* Takes two instructions and attempts to merge their raddr fields (including
960*61046927SAndroid Build Coastguard Worker  * small immediates) into one merged instruction. For V3D 4.x, returns false
961*61046927SAndroid Build Coastguard Worker  * if the two instructions access more than two different rf registers between
962*61046927SAndroid Build Coastguard Worker  * them, or more than one rf register and one small immediate. For 7.x returns
963*61046927SAndroid Build Coastguard Worker  * false if both instructions use small immediates.
964*61046927SAndroid Build Coastguard Worker  */
965*61046927SAndroid Build Coastguard Worker static bool
qpu_merge_raddrs(struct v3d_qpu_instr * result,const struct v3d_qpu_instr * add_instr,const struct v3d_qpu_instr * mul_instr,const struct v3d_device_info * devinfo)966*61046927SAndroid Build Coastguard Worker qpu_merge_raddrs(struct v3d_qpu_instr *result,
967*61046927SAndroid Build Coastguard Worker                  const struct v3d_qpu_instr *add_instr,
968*61046927SAndroid Build Coastguard Worker                  const struct v3d_qpu_instr *mul_instr,
969*61046927SAndroid Build Coastguard Worker                  const struct v3d_device_info *devinfo)
970*61046927SAndroid Build Coastguard Worker {
971*61046927SAndroid Build Coastguard Worker         if (devinfo->ver >= 71) {
972*61046927SAndroid Build Coastguard Worker                 assert(add_instr->sig.small_imm_a +
973*61046927SAndroid Build Coastguard Worker                        add_instr->sig.small_imm_b <= 1);
974*61046927SAndroid Build Coastguard Worker                 assert(add_instr->sig.small_imm_c +
975*61046927SAndroid Build Coastguard Worker                        add_instr->sig.small_imm_d == 0);
976*61046927SAndroid Build Coastguard Worker                 assert(mul_instr->sig.small_imm_a +
977*61046927SAndroid Build Coastguard Worker                        mul_instr->sig.small_imm_b == 0);
978*61046927SAndroid Build Coastguard Worker                 assert(mul_instr->sig.small_imm_c +
979*61046927SAndroid Build Coastguard Worker                        mul_instr->sig.small_imm_d <= 1);
980*61046927SAndroid Build Coastguard Worker 
981*61046927SAndroid Build Coastguard Worker                 result->sig.small_imm_a = add_instr->sig.small_imm_a;
982*61046927SAndroid Build Coastguard Worker                 result->sig.small_imm_b = add_instr->sig.small_imm_b;
983*61046927SAndroid Build Coastguard Worker                 result->sig.small_imm_c = mul_instr->sig.small_imm_c;
984*61046927SAndroid Build Coastguard Worker                 result->sig.small_imm_d = mul_instr->sig.small_imm_d;
985*61046927SAndroid Build Coastguard Worker 
986*61046927SAndroid Build Coastguard Worker                 return (result->sig.small_imm_a +
987*61046927SAndroid Build Coastguard Worker                         result->sig.small_imm_b +
988*61046927SAndroid Build Coastguard Worker                         result->sig.small_imm_c +
989*61046927SAndroid Build Coastguard Worker                         result->sig.small_imm_d) <= 1;
990*61046927SAndroid Build Coastguard Worker         }
991*61046927SAndroid Build Coastguard Worker 
992*61046927SAndroid Build Coastguard Worker         assert(devinfo->ver == 42);
993*61046927SAndroid Build Coastguard Worker 
994*61046927SAndroid Build Coastguard Worker         uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
995*61046927SAndroid Build Coastguard Worker         int naddrs = util_bitcount64(raddrs_used);
996*61046927SAndroid Build Coastguard Worker 
997*61046927SAndroid Build Coastguard Worker         if (naddrs > 2)
998*61046927SAndroid Build Coastguard Worker                 return false;
999*61046927SAndroid Build Coastguard Worker 
1000*61046927SAndroid Build Coastguard Worker         if ((add_instr->sig.small_imm_b || mul_instr->sig.small_imm_b)) {
1001*61046927SAndroid Build Coastguard Worker                 if (naddrs > 1)
1002*61046927SAndroid Build Coastguard Worker                         return false;
1003*61046927SAndroid Build Coastguard Worker 
1004*61046927SAndroid Build Coastguard Worker                 if (add_instr->sig.small_imm_b && mul_instr->sig.small_imm_b)
1005*61046927SAndroid Build Coastguard Worker                         if (add_instr->raddr_b != mul_instr->raddr_b)
1006*61046927SAndroid Build Coastguard Worker                                 return false;
1007*61046927SAndroid Build Coastguard Worker 
1008*61046927SAndroid Build Coastguard Worker                 result->sig.small_imm_b = true;
1009*61046927SAndroid Build Coastguard Worker                 result->raddr_b = add_instr->sig.small_imm_b ?
1010*61046927SAndroid Build Coastguard Worker                         add_instr->raddr_b : mul_instr->raddr_b;
1011*61046927SAndroid Build Coastguard Worker         }
1012*61046927SAndroid Build Coastguard Worker 
1013*61046927SAndroid Build Coastguard Worker         if (naddrs == 0)
1014*61046927SAndroid Build Coastguard Worker                 return true;
1015*61046927SAndroid Build Coastguard Worker 
1016*61046927SAndroid Build Coastguard Worker         int raddr_a = ffsll(raddrs_used) - 1;
1017*61046927SAndroid Build Coastguard Worker         raddrs_used &= ~(UINT64_C(1) << raddr_a);
1018*61046927SAndroid Build Coastguard Worker         result->raddr_a = raddr_a;
1019*61046927SAndroid Build Coastguard Worker 
1020*61046927SAndroid Build Coastguard Worker         if (!result->sig.small_imm_b) {
1021*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_B) &&
1022*61046927SAndroid Build Coastguard Worker                     raddr_a == add_instr->raddr_b) {
1023*61046927SAndroid Build Coastguard Worker                         if (add_instr->alu.add.a.mux == V3D_QPU_MUX_B)
1024*61046927SAndroid Build Coastguard Worker                                 result->alu.add.a.mux = V3D_QPU_MUX_A;
1025*61046927SAndroid Build Coastguard Worker                         if (add_instr->alu.add.b.mux == V3D_QPU_MUX_B &&
1026*61046927SAndroid Build Coastguard Worker                             v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) {
1027*61046927SAndroid Build Coastguard Worker                                 result->alu.add.b.mux = V3D_QPU_MUX_A;
1028*61046927SAndroid Build Coastguard Worker                         }
1029*61046927SAndroid Build Coastguard Worker                 }
1030*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_B) &&
1031*61046927SAndroid Build Coastguard Worker                     raddr_a == mul_instr->raddr_b) {
1032*61046927SAndroid Build Coastguard Worker                         if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_B)
1033*61046927SAndroid Build Coastguard Worker                                 result->alu.mul.a.mux = V3D_QPU_MUX_A;
1034*61046927SAndroid Build Coastguard Worker                         if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_B &&
1035*61046927SAndroid Build Coastguard Worker                             v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) {
1036*61046927SAndroid Build Coastguard Worker                                 result->alu.mul.b.mux = V3D_QPU_MUX_A;
1037*61046927SAndroid Build Coastguard Worker                         }
1038*61046927SAndroid Build Coastguard Worker                 }
1039*61046927SAndroid Build Coastguard Worker         }
1040*61046927SAndroid Build Coastguard Worker         if (!raddrs_used)
1041*61046927SAndroid Build Coastguard Worker                 return true;
1042*61046927SAndroid Build Coastguard Worker 
1043*61046927SAndroid Build Coastguard Worker         int raddr_b = ffsll(raddrs_used) - 1;
1044*61046927SAndroid Build Coastguard Worker         result->raddr_b = raddr_b;
1045*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_uses_mux(add_instr, V3D_QPU_MUX_A) &&
1046*61046927SAndroid Build Coastguard Worker             raddr_b == add_instr->raddr_a) {
1047*61046927SAndroid Build Coastguard Worker                 if (add_instr->alu.add.a.mux == V3D_QPU_MUX_A)
1048*61046927SAndroid Build Coastguard Worker                         result->alu.add.a.mux = V3D_QPU_MUX_B;
1049*61046927SAndroid Build Coastguard Worker                 if (add_instr->alu.add.b.mux == V3D_QPU_MUX_A &&
1050*61046927SAndroid Build Coastguard Worker                     v3d_qpu_add_op_num_src(add_instr->alu.add.op) > 1) {
1051*61046927SAndroid Build Coastguard Worker                         result->alu.add.b.mux = V3D_QPU_MUX_B;
1052*61046927SAndroid Build Coastguard Worker                 }
1053*61046927SAndroid Build Coastguard Worker         }
1054*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_uses_mux(mul_instr, V3D_QPU_MUX_A) &&
1055*61046927SAndroid Build Coastguard Worker             raddr_b == mul_instr->raddr_a) {
1056*61046927SAndroid Build Coastguard Worker                 if (mul_instr->alu.mul.a.mux == V3D_QPU_MUX_A)
1057*61046927SAndroid Build Coastguard Worker                         result->alu.mul.a.mux = V3D_QPU_MUX_B;
1058*61046927SAndroid Build Coastguard Worker                 if (mul_instr->alu.mul.b.mux == V3D_QPU_MUX_A &&
1059*61046927SAndroid Build Coastguard Worker                     v3d_qpu_mul_op_num_src(mul_instr->alu.mul.op) > 1) {
1060*61046927SAndroid Build Coastguard Worker                         result->alu.mul.b.mux = V3D_QPU_MUX_B;
1061*61046927SAndroid Build Coastguard Worker                 }
1062*61046927SAndroid Build Coastguard Worker         }
1063*61046927SAndroid Build Coastguard Worker 
1064*61046927SAndroid Build Coastguard Worker         return true;
1065*61046927SAndroid Build Coastguard Worker }
1066*61046927SAndroid Build Coastguard Worker 
1067*61046927SAndroid Build Coastguard Worker static bool
can_do_add_as_mul(enum v3d_qpu_add_op op)1068*61046927SAndroid Build Coastguard Worker can_do_add_as_mul(enum v3d_qpu_add_op op)
1069*61046927SAndroid Build Coastguard Worker {
1070*61046927SAndroid Build Coastguard Worker         switch (op) {
1071*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_ADD:
1072*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_SUB:
1073*61046927SAndroid Build Coastguard Worker                 return true;
1074*61046927SAndroid Build Coastguard Worker         default:
1075*61046927SAndroid Build Coastguard Worker                 return false;
1076*61046927SAndroid Build Coastguard Worker         }
1077*61046927SAndroid Build Coastguard Worker }
1078*61046927SAndroid Build Coastguard Worker 
1079*61046927SAndroid Build Coastguard Worker static enum v3d_qpu_mul_op
add_op_as_mul_op(enum v3d_qpu_add_op op)1080*61046927SAndroid Build Coastguard Worker add_op_as_mul_op(enum v3d_qpu_add_op op)
1081*61046927SAndroid Build Coastguard Worker {
1082*61046927SAndroid Build Coastguard Worker         switch (op) {
1083*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_ADD:
1084*61046927SAndroid Build Coastguard Worker                 return V3D_QPU_M_ADD;
1085*61046927SAndroid Build Coastguard Worker         case V3D_QPU_A_SUB:
1086*61046927SAndroid Build Coastguard Worker                 return V3D_QPU_M_SUB;
1087*61046927SAndroid Build Coastguard Worker         default:
1088*61046927SAndroid Build Coastguard Worker                 unreachable("unexpected add opcode");
1089*61046927SAndroid Build Coastguard Worker         }
1090*61046927SAndroid Build Coastguard Worker }
1091*61046927SAndroid Build Coastguard Worker 
1092*61046927SAndroid Build Coastguard Worker static void
qpu_convert_add_to_mul(const struct v3d_device_info * devinfo,struct v3d_qpu_instr * inst)1093*61046927SAndroid Build Coastguard Worker qpu_convert_add_to_mul(const struct v3d_device_info *devinfo,
1094*61046927SAndroid Build Coastguard Worker                        struct v3d_qpu_instr *inst)
1095*61046927SAndroid Build Coastguard Worker {
1096*61046927SAndroid Build Coastguard Worker         STATIC_ASSERT(sizeof(inst->alu.mul) == sizeof(inst->alu.add));
1097*61046927SAndroid Build Coastguard Worker         assert(inst->alu.add.op != V3D_QPU_A_NOP);
1098*61046927SAndroid Build Coastguard Worker         assert(inst->alu.mul.op == V3D_QPU_M_NOP);
1099*61046927SAndroid Build Coastguard Worker 
1100*61046927SAndroid Build Coastguard Worker         memcpy(&inst->alu.mul, &inst->alu.add, sizeof(inst->alu.mul));
1101*61046927SAndroid Build Coastguard Worker         inst->alu.mul.op = add_op_as_mul_op(inst->alu.add.op);
1102*61046927SAndroid Build Coastguard Worker         inst->alu.add.op = V3D_QPU_A_NOP;
1103*61046927SAndroid Build Coastguard Worker 
1104*61046927SAndroid Build Coastguard Worker         inst->flags.mc = inst->flags.ac;
1105*61046927SAndroid Build Coastguard Worker         inst->flags.mpf = inst->flags.apf;
1106*61046927SAndroid Build Coastguard Worker         inst->flags.muf = inst->flags.auf;
1107*61046927SAndroid Build Coastguard Worker         inst->flags.ac = V3D_QPU_COND_NONE;
1108*61046927SAndroid Build Coastguard Worker         inst->flags.apf = V3D_QPU_PF_NONE;
1109*61046927SAndroid Build Coastguard Worker         inst->flags.auf = V3D_QPU_UF_NONE;
1110*61046927SAndroid Build Coastguard Worker 
1111*61046927SAndroid Build Coastguard Worker         inst->alu.mul.output_pack = inst->alu.add.output_pack;
1112*61046927SAndroid Build Coastguard Worker 
1113*61046927SAndroid Build Coastguard Worker         inst->alu.mul.a.unpack = inst->alu.add.a.unpack;
1114*61046927SAndroid Build Coastguard Worker         inst->alu.mul.b.unpack = inst->alu.add.b.unpack;
1115*61046927SAndroid Build Coastguard Worker         inst->alu.add.output_pack = V3D_QPU_PACK_NONE;
1116*61046927SAndroid Build Coastguard Worker         inst->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1117*61046927SAndroid Build Coastguard Worker         inst->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1118*61046927SAndroid Build Coastguard Worker 
1119*61046927SAndroid Build Coastguard Worker         if (devinfo->ver >= 71) {
1120*61046927SAndroid Build Coastguard Worker                 assert(!inst->sig.small_imm_c && !inst->sig.small_imm_d);
1121*61046927SAndroid Build Coastguard Worker                 assert(inst->sig.small_imm_a + inst->sig.small_imm_b <= 1);
1122*61046927SAndroid Build Coastguard Worker                 if (inst->sig.small_imm_a) {
1123*61046927SAndroid Build Coastguard Worker                         inst->sig.small_imm_c = true;
1124*61046927SAndroid Build Coastguard Worker                         inst->sig.small_imm_a = false;
1125*61046927SAndroid Build Coastguard Worker                 } else if (inst->sig.small_imm_b) {
1126*61046927SAndroid Build Coastguard Worker                         inst->sig.small_imm_d = true;
1127*61046927SAndroid Build Coastguard Worker                         inst->sig.small_imm_b = false;
1128*61046927SAndroid Build Coastguard Worker                 }
1129*61046927SAndroid Build Coastguard Worker         }
1130*61046927SAndroid Build Coastguard Worker }
1131*61046927SAndroid Build Coastguard Worker 
1132*61046927SAndroid Build Coastguard Worker static bool
can_do_mul_as_add(const struct v3d_device_info * devinfo,enum v3d_qpu_mul_op op)1133*61046927SAndroid Build Coastguard Worker can_do_mul_as_add(const struct v3d_device_info *devinfo, enum v3d_qpu_mul_op op)
1134*61046927SAndroid Build Coastguard Worker {
1135*61046927SAndroid Build Coastguard Worker         switch (op) {
1136*61046927SAndroid Build Coastguard Worker         case V3D_QPU_M_MOV:
1137*61046927SAndroid Build Coastguard Worker         case V3D_QPU_M_FMOV:
1138*61046927SAndroid Build Coastguard Worker                 return devinfo->ver >= 71;
1139*61046927SAndroid Build Coastguard Worker         default:
1140*61046927SAndroid Build Coastguard Worker                 return false;
1141*61046927SAndroid Build Coastguard Worker         }
1142*61046927SAndroid Build Coastguard Worker }
1143*61046927SAndroid Build Coastguard Worker 
1144*61046927SAndroid Build Coastguard Worker static enum v3d_qpu_mul_op
mul_op_as_add_op(enum v3d_qpu_mul_op op)1145*61046927SAndroid Build Coastguard Worker mul_op_as_add_op(enum v3d_qpu_mul_op op)
1146*61046927SAndroid Build Coastguard Worker {
1147*61046927SAndroid Build Coastguard Worker         switch (op) {
1148*61046927SAndroid Build Coastguard Worker         case V3D_QPU_M_MOV:
1149*61046927SAndroid Build Coastguard Worker                 return V3D_QPU_A_MOV;
1150*61046927SAndroid Build Coastguard Worker         case V3D_QPU_M_FMOV:
1151*61046927SAndroid Build Coastguard Worker                 return V3D_QPU_A_FMOV;
1152*61046927SAndroid Build Coastguard Worker         default:
1153*61046927SAndroid Build Coastguard Worker                 unreachable("unexpected mov opcode");
1154*61046927SAndroid Build Coastguard Worker         }
1155*61046927SAndroid Build Coastguard Worker }
1156*61046927SAndroid Build Coastguard Worker 
1157*61046927SAndroid Build Coastguard Worker static void
qpu_convert_mul_to_add(struct v3d_qpu_instr * inst)1158*61046927SAndroid Build Coastguard Worker qpu_convert_mul_to_add(struct v3d_qpu_instr *inst)
1159*61046927SAndroid Build Coastguard Worker {
1160*61046927SAndroid Build Coastguard Worker         STATIC_ASSERT(sizeof(inst->alu.add) == sizeof(inst->alu.mul));
1161*61046927SAndroid Build Coastguard Worker         assert(inst->alu.mul.op != V3D_QPU_M_NOP);
1162*61046927SAndroid Build Coastguard Worker         assert(inst->alu.add.op == V3D_QPU_A_NOP);
1163*61046927SAndroid Build Coastguard Worker 
1164*61046927SAndroid Build Coastguard Worker         memcpy(&inst->alu.add, &inst->alu.mul, sizeof(inst->alu.add));
1165*61046927SAndroid Build Coastguard Worker         inst->alu.add.op = mul_op_as_add_op(inst->alu.mul.op);
1166*61046927SAndroid Build Coastguard Worker         inst->alu.mul.op = V3D_QPU_M_NOP;
1167*61046927SAndroid Build Coastguard Worker 
1168*61046927SAndroid Build Coastguard Worker         inst->flags.ac = inst->flags.mc;
1169*61046927SAndroid Build Coastguard Worker         inst->flags.apf = inst->flags.mpf;
1170*61046927SAndroid Build Coastguard Worker         inst->flags.auf = inst->flags.muf;
1171*61046927SAndroid Build Coastguard Worker         inst->flags.mc = V3D_QPU_COND_NONE;
1172*61046927SAndroid Build Coastguard Worker         inst->flags.mpf = V3D_QPU_PF_NONE;
1173*61046927SAndroid Build Coastguard Worker         inst->flags.muf = V3D_QPU_UF_NONE;
1174*61046927SAndroid Build Coastguard Worker 
1175*61046927SAndroid Build Coastguard Worker         inst->alu.add.output_pack = inst->alu.mul.output_pack;
1176*61046927SAndroid Build Coastguard Worker         inst->alu.add.a.unpack = inst->alu.mul.a.unpack;
1177*61046927SAndroid Build Coastguard Worker         inst->alu.add.b.unpack = inst->alu.mul.b.unpack;
1178*61046927SAndroid Build Coastguard Worker         inst->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1179*61046927SAndroid Build Coastguard Worker         inst->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1180*61046927SAndroid Build Coastguard Worker         inst->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1181*61046927SAndroid Build Coastguard Worker 
1182*61046927SAndroid Build Coastguard Worker         assert(!inst->sig.small_imm_a && !inst->sig.small_imm_b);
1183*61046927SAndroid Build Coastguard Worker         assert(inst->sig.small_imm_c + inst->sig.small_imm_d <= 1);
1184*61046927SAndroid Build Coastguard Worker         if (inst->sig.small_imm_c) {
1185*61046927SAndroid Build Coastguard Worker                 inst->sig.small_imm_a = true;
1186*61046927SAndroid Build Coastguard Worker                 inst->sig.small_imm_c = false;
1187*61046927SAndroid Build Coastguard Worker         } else if (inst->sig.small_imm_d) {
1188*61046927SAndroid Build Coastguard Worker                 inst->sig.small_imm_b = true;
1189*61046927SAndroid Build Coastguard Worker                 inst->sig.small_imm_d = false;
1190*61046927SAndroid Build Coastguard Worker         }
1191*61046927SAndroid Build Coastguard Worker }
1192*61046927SAndroid Build Coastguard Worker 
1193*61046927SAndroid Build Coastguard Worker static bool
qpu_merge_inst(const struct v3d_device_info * devinfo,struct v3d_qpu_instr * result,const struct v3d_qpu_instr * a,const struct v3d_qpu_instr * b)1194*61046927SAndroid Build Coastguard Worker qpu_merge_inst(const struct v3d_device_info *devinfo,
1195*61046927SAndroid Build Coastguard Worker                struct v3d_qpu_instr *result,
1196*61046927SAndroid Build Coastguard Worker                const struct v3d_qpu_instr *a,
1197*61046927SAndroid Build Coastguard Worker                const struct v3d_qpu_instr *b)
1198*61046927SAndroid Build Coastguard Worker {
1199*61046927SAndroid Build Coastguard Worker         if (a->type != V3D_QPU_INSTR_TYPE_ALU ||
1200*61046927SAndroid Build Coastguard Worker             b->type != V3D_QPU_INSTR_TYPE_ALU) {
1201*61046927SAndroid Build Coastguard Worker                 return false;
1202*61046927SAndroid Build Coastguard Worker         }
1203*61046927SAndroid Build Coastguard Worker 
1204*61046927SAndroid Build Coastguard Worker         if (!qpu_compatible_peripheral_access(devinfo, a, b))
1205*61046927SAndroid Build Coastguard Worker                 return false;
1206*61046927SAndroid Build Coastguard Worker 
1207*61046927SAndroid Build Coastguard Worker         struct v3d_qpu_instr merge = *a;
1208*61046927SAndroid Build Coastguard Worker         const struct v3d_qpu_instr *add_instr = NULL, *mul_instr = NULL;
1209*61046927SAndroid Build Coastguard Worker 
1210*61046927SAndroid Build Coastguard Worker         struct v3d_qpu_instr mul_inst;
1211*61046927SAndroid Build Coastguard Worker         if (b->alu.add.op != V3D_QPU_A_NOP) {
1212*61046927SAndroid Build Coastguard Worker                 if (a->alu.add.op == V3D_QPU_A_NOP) {
1213*61046927SAndroid Build Coastguard Worker                         merge.alu.add = b->alu.add;
1214*61046927SAndroid Build Coastguard Worker 
1215*61046927SAndroid Build Coastguard Worker                         merge.flags.ac = b->flags.ac;
1216*61046927SAndroid Build Coastguard Worker                         merge.flags.apf = b->flags.apf;
1217*61046927SAndroid Build Coastguard Worker                         merge.flags.auf = b->flags.auf;
1218*61046927SAndroid Build Coastguard Worker 
1219*61046927SAndroid Build Coastguard Worker                         add_instr = b;
1220*61046927SAndroid Build Coastguard Worker                         mul_instr = a;
1221*61046927SAndroid Build Coastguard Worker                 }
1222*61046927SAndroid Build Coastguard Worker                 /* If a's add op is used but its mul op is not, then see if we
1223*61046927SAndroid Build Coastguard Worker                  * can convert either a's add op or b's add op to a mul op
1224*61046927SAndroid Build Coastguard Worker                  * so we can merge.
1225*61046927SAndroid Build Coastguard Worker                  */
1226*61046927SAndroid Build Coastguard Worker                 else if (a->alu.mul.op == V3D_QPU_M_NOP &&
1227*61046927SAndroid Build Coastguard Worker                          can_do_add_as_mul(b->alu.add.op)) {
1228*61046927SAndroid Build Coastguard Worker                         mul_inst = *b;
1229*61046927SAndroid Build Coastguard Worker                         qpu_convert_add_to_mul(devinfo, &mul_inst);
1230*61046927SAndroid Build Coastguard Worker 
1231*61046927SAndroid Build Coastguard Worker                         merge.alu.mul = mul_inst.alu.mul;
1232*61046927SAndroid Build Coastguard Worker 
1233*61046927SAndroid Build Coastguard Worker                         merge.flags.mc = mul_inst.flags.mc;
1234*61046927SAndroid Build Coastguard Worker                         merge.flags.mpf = mul_inst.flags.mpf;
1235*61046927SAndroid Build Coastguard Worker                         merge.flags.muf = mul_inst.flags.muf;
1236*61046927SAndroid Build Coastguard Worker 
1237*61046927SAndroid Build Coastguard Worker                         add_instr = a;
1238*61046927SAndroid Build Coastguard Worker                         mul_instr = &mul_inst;
1239*61046927SAndroid Build Coastguard Worker                 } else if (a->alu.mul.op == V3D_QPU_M_NOP &&
1240*61046927SAndroid Build Coastguard Worker                            can_do_add_as_mul(a->alu.add.op)) {
1241*61046927SAndroid Build Coastguard Worker                         mul_inst = *a;
1242*61046927SAndroid Build Coastguard Worker                         qpu_convert_add_to_mul(devinfo, &mul_inst);
1243*61046927SAndroid Build Coastguard Worker 
1244*61046927SAndroid Build Coastguard Worker                         merge = mul_inst;
1245*61046927SAndroid Build Coastguard Worker                         merge.alu.add = b->alu.add;
1246*61046927SAndroid Build Coastguard Worker 
1247*61046927SAndroid Build Coastguard Worker                         merge.flags.ac = b->flags.ac;
1248*61046927SAndroid Build Coastguard Worker                         merge.flags.apf = b->flags.apf;
1249*61046927SAndroid Build Coastguard Worker                         merge.flags.auf = b->flags.auf;
1250*61046927SAndroid Build Coastguard Worker 
1251*61046927SAndroid Build Coastguard Worker                         add_instr = b;
1252*61046927SAndroid Build Coastguard Worker                         mul_instr = &mul_inst;
1253*61046927SAndroid Build Coastguard Worker                 } else {
1254*61046927SAndroid Build Coastguard Worker                         return false;
1255*61046927SAndroid Build Coastguard Worker                 }
1256*61046927SAndroid Build Coastguard Worker         }
1257*61046927SAndroid Build Coastguard Worker 
1258*61046927SAndroid Build Coastguard Worker         struct v3d_qpu_instr add_inst;
1259*61046927SAndroid Build Coastguard Worker         if (b->alu.mul.op != V3D_QPU_M_NOP) {
1260*61046927SAndroid Build Coastguard Worker                 if (a->alu.mul.op == V3D_QPU_M_NOP) {
1261*61046927SAndroid Build Coastguard Worker                         merge.alu.mul = b->alu.mul;
1262*61046927SAndroid Build Coastguard Worker 
1263*61046927SAndroid Build Coastguard Worker                         merge.flags.mc = b->flags.mc;
1264*61046927SAndroid Build Coastguard Worker                         merge.flags.mpf = b->flags.mpf;
1265*61046927SAndroid Build Coastguard Worker                         merge.flags.muf = b->flags.muf;
1266*61046927SAndroid Build Coastguard Worker 
1267*61046927SAndroid Build Coastguard Worker                         mul_instr = b;
1268*61046927SAndroid Build Coastguard Worker                         add_instr = a;
1269*61046927SAndroid Build Coastguard Worker                 }
1270*61046927SAndroid Build Coastguard Worker                 /* If a's mul op is used but its add op is not, then see if we
1271*61046927SAndroid Build Coastguard Worker                  * can convert either a's mul op or b's mul op to an add op
1272*61046927SAndroid Build Coastguard Worker                  * so we can merge.
1273*61046927SAndroid Build Coastguard Worker                  */
1274*61046927SAndroid Build Coastguard Worker                 else if (a->alu.add.op == V3D_QPU_A_NOP &&
1275*61046927SAndroid Build Coastguard Worker                          can_do_mul_as_add(devinfo, b->alu.mul.op)) {
1276*61046927SAndroid Build Coastguard Worker                         add_inst = *b;
1277*61046927SAndroid Build Coastguard Worker                         qpu_convert_mul_to_add(&add_inst);
1278*61046927SAndroid Build Coastguard Worker 
1279*61046927SAndroid Build Coastguard Worker                         merge.alu.add = add_inst.alu.add;
1280*61046927SAndroid Build Coastguard Worker 
1281*61046927SAndroid Build Coastguard Worker                         merge.flags.ac = add_inst.flags.ac;
1282*61046927SAndroid Build Coastguard Worker                         merge.flags.apf = add_inst.flags.apf;
1283*61046927SAndroid Build Coastguard Worker                         merge.flags.auf = add_inst.flags.auf;
1284*61046927SAndroid Build Coastguard Worker 
1285*61046927SAndroid Build Coastguard Worker                         mul_instr = a;
1286*61046927SAndroid Build Coastguard Worker                         add_instr = &add_inst;
1287*61046927SAndroid Build Coastguard Worker                 } else if (a->alu.add.op == V3D_QPU_A_NOP &&
1288*61046927SAndroid Build Coastguard Worker                            can_do_mul_as_add(devinfo, a->alu.mul.op)) {
1289*61046927SAndroid Build Coastguard Worker                         add_inst = *a;
1290*61046927SAndroid Build Coastguard Worker                         qpu_convert_mul_to_add(&add_inst);
1291*61046927SAndroid Build Coastguard Worker 
1292*61046927SAndroid Build Coastguard Worker                         merge = add_inst;
1293*61046927SAndroid Build Coastguard Worker                         merge.alu.mul = b->alu.mul;
1294*61046927SAndroid Build Coastguard Worker 
1295*61046927SAndroid Build Coastguard Worker                         merge.flags.mc = b->flags.mc;
1296*61046927SAndroid Build Coastguard Worker                         merge.flags.mpf = b->flags.mpf;
1297*61046927SAndroid Build Coastguard Worker                         merge.flags.muf = b->flags.muf;
1298*61046927SAndroid Build Coastguard Worker 
1299*61046927SAndroid Build Coastguard Worker                         mul_instr = b;
1300*61046927SAndroid Build Coastguard Worker                         add_instr = &add_inst;
1301*61046927SAndroid Build Coastguard Worker                 } else {
1302*61046927SAndroid Build Coastguard Worker                         return false;
1303*61046927SAndroid Build Coastguard Worker                 }
1304*61046927SAndroid Build Coastguard Worker         }
1305*61046927SAndroid Build Coastguard Worker 
1306*61046927SAndroid Build Coastguard Worker         /* V3D 4.x and earlier use muxes to select the inputs for the ALUs and
1307*61046927SAndroid Build Coastguard Worker          * they have restrictions on the number of raddrs that can be adressed
1308*61046927SAndroid Build Coastguard Worker          * in a single instruction. In V3D 7.x, we don't have that restriction,
1309*61046927SAndroid Build Coastguard Worker          * but we are still limited to a single small immediate per instruction.
1310*61046927SAndroid Build Coastguard Worker          */
1311*61046927SAndroid Build Coastguard Worker         if (add_instr && mul_instr &&
1312*61046927SAndroid Build Coastguard Worker             !qpu_merge_raddrs(&merge, add_instr, mul_instr, devinfo)) {
1313*61046927SAndroid Build Coastguard Worker                 return false;
1314*61046927SAndroid Build Coastguard Worker         }
1315*61046927SAndroid Build Coastguard Worker 
1316*61046927SAndroid Build Coastguard Worker         merge.sig.thrsw |= b->sig.thrsw;
1317*61046927SAndroid Build Coastguard Worker         merge.sig.ldunif |= b->sig.ldunif;
1318*61046927SAndroid Build Coastguard Worker         merge.sig.ldunifrf |= b->sig.ldunifrf;
1319*61046927SAndroid Build Coastguard Worker         merge.sig.ldunifa |= b->sig.ldunifa;
1320*61046927SAndroid Build Coastguard Worker         merge.sig.ldunifarf |= b->sig.ldunifarf;
1321*61046927SAndroid Build Coastguard Worker         merge.sig.ldtmu |= b->sig.ldtmu;
1322*61046927SAndroid Build Coastguard Worker         merge.sig.ldvary |= b->sig.ldvary;
1323*61046927SAndroid Build Coastguard Worker         merge.sig.ldvpm |= b->sig.ldvpm;
1324*61046927SAndroid Build Coastguard Worker         merge.sig.ldtlb |= b->sig.ldtlb;
1325*61046927SAndroid Build Coastguard Worker         merge.sig.ldtlbu |= b->sig.ldtlbu;
1326*61046927SAndroid Build Coastguard Worker         merge.sig.ucb |= b->sig.ucb;
1327*61046927SAndroid Build Coastguard Worker         merge.sig.rotate |= b->sig.rotate;
1328*61046927SAndroid Build Coastguard Worker         merge.sig.wrtmuc |= b->sig.wrtmuc;
1329*61046927SAndroid Build Coastguard Worker 
1330*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_sig_writes_address(devinfo, &a->sig) &&
1331*61046927SAndroid Build Coastguard Worker             v3d_qpu_sig_writes_address(devinfo, &b->sig))
1332*61046927SAndroid Build Coastguard Worker                 return false;
1333*61046927SAndroid Build Coastguard Worker         merge.sig_addr |= b->sig_addr;
1334*61046927SAndroid Build Coastguard Worker         merge.sig_magic |= b->sig_magic;
1335*61046927SAndroid Build Coastguard Worker 
1336*61046927SAndroid Build Coastguard Worker         uint64_t packed;
1337*61046927SAndroid Build Coastguard Worker         bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
1338*61046927SAndroid Build Coastguard Worker 
1339*61046927SAndroid Build Coastguard Worker         *result = merge;
1340*61046927SAndroid Build Coastguard Worker         /* No modifying the real instructions on failure. */
1341*61046927SAndroid Build Coastguard Worker         assert(ok || (a != result && b != result));
1342*61046927SAndroid Build Coastguard Worker 
1343*61046927SAndroid Build Coastguard Worker         return ok;
1344*61046927SAndroid Build Coastguard Worker }
1345*61046927SAndroid Build Coastguard Worker 
1346*61046927SAndroid Build Coastguard Worker static inline bool
try_skip_for_ldvary_pipelining(const struct v3d_qpu_instr * inst)1347*61046927SAndroid Build Coastguard Worker try_skip_for_ldvary_pipelining(const struct v3d_qpu_instr *inst)
1348*61046927SAndroid Build Coastguard Worker {
1349*61046927SAndroid Build Coastguard Worker         return inst->sig.ldunif || inst->sig.ldunifrf;
1350*61046927SAndroid Build Coastguard Worker }
1351*61046927SAndroid Build Coastguard Worker 
1352*61046927SAndroid Build Coastguard Worker static bool
1353*61046927SAndroid Build Coastguard Worker qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile *c,
1354*61046927SAndroid Build Coastguard Worker                                          struct choose_scoreboard *scoreboard,
1355*61046927SAndroid Build Coastguard Worker                                          const struct qinst *qinst);
1356*61046927SAndroid Build Coastguard Worker 
1357*61046927SAndroid Build Coastguard Worker static struct schedule_node *
choose_instruction_to_schedule(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct schedule_node * prev_inst)1358*61046927SAndroid Build Coastguard Worker choose_instruction_to_schedule(struct v3d_compile *c,
1359*61046927SAndroid Build Coastguard Worker                                struct choose_scoreboard *scoreboard,
1360*61046927SAndroid Build Coastguard Worker                                struct schedule_node *prev_inst)
1361*61046927SAndroid Build Coastguard Worker {
1362*61046927SAndroid Build Coastguard Worker         struct schedule_node *chosen = NULL;
1363*61046927SAndroid Build Coastguard Worker         int chosen_prio = 0;
1364*61046927SAndroid Build Coastguard Worker 
1365*61046927SAndroid Build Coastguard Worker         /* Don't pair up anything with a thread switch signal -- emit_thrsw()
1366*61046927SAndroid Build Coastguard Worker          * will handle pairing it along with filling the delay slots.
1367*61046927SAndroid Build Coastguard Worker          */
1368*61046927SAndroid Build Coastguard Worker         if (prev_inst) {
1369*61046927SAndroid Build Coastguard Worker                 if (prev_inst->inst->qpu.sig.thrsw)
1370*61046927SAndroid Build Coastguard Worker                         return NULL;
1371*61046927SAndroid Build Coastguard Worker         }
1372*61046927SAndroid Build Coastguard Worker 
1373*61046927SAndroid Build Coastguard Worker         bool ldvary_pipelining = c->s->info.stage == MESA_SHADER_FRAGMENT &&
1374*61046927SAndroid Build Coastguard Worker                                  scoreboard->ldvary_count < c->num_inputs;
1375*61046927SAndroid Build Coastguard Worker         bool skipped_insts_for_ldvary_pipelining = false;
1376*61046927SAndroid Build Coastguard Worker retry:
1377*61046927SAndroid Build Coastguard Worker         list_for_each_entry(struct schedule_node, n, &scoreboard->dag->heads,
1378*61046927SAndroid Build Coastguard Worker                             dag.link) {
1379*61046927SAndroid Build Coastguard Worker                 const struct v3d_qpu_instr *inst = &n->inst->qpu;
1380*61046927SAndroid Build Coastguard Worker 
1381*61046927SAndroid Build Coastguard Worker                 if (ldvary_pipelining && try_skip_for_ldvary_pipelining(inst)) {
1382*61046927SAndroid Build Coastguard Worker                         skipped_insts_for_ldvary_pipelining = true;
1383*61046927SAndroid Build Coastguard Worker                         continue;
1384*61046927SAndroid Build Coastguard Worker                 }
1385*61046927SAndroid Build Coastguard Worker 
1386*61046927SAndroid Build Coastguard Worker                 /* Don't choose the branch instruction until it's the last one
1387*61046927SAndroid Build Coastguard Worker                  * left.  We'll move it up to fit its delay slots after we
1388*61046927SAndroid Build Coastguard Worker                  * choose it.
1389*61046927SAndroid Build Coastguard Worker                  */
1390*61046927SAndroid Build Coastguard Worker                 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH &&
1391*61046927SAndroid Build Coastguard Worker                     !list_is_singular(&scoreboard->dag->heads)) {
1392*61046927SAndroid Build Coastguard Worker                         continue;
1393*61046927SAndroid Build Coastguard Worker                 }
1394*61046927SAndroid Build Coastguard Worker 
1395*61046927SAndroid Build Coastguard Worker                 /* We need to have 3 delay slots between a write to unifa and
1396*61046927SAndroid Build Coastguard Worker                  * a follow-up ldunifa.
1397*61046927SAndroid Build Coastguard Worker                  */
1398*61046927SAndroid Build Coastguard Worker                 if ((inst->sig.ldunifa || inst->sig.ldunifarf) &&
1399*61046927SAndroid Build Coastguard Worker                     scoreboard->tick - scoreboard->last_unifa_write_tick <= 3)
1400*61046927SAndroid Build Coastguard Worker                         continue;
1401*61046927SAndroid Build Coastguard Worker 
1402*61046927SAndroid Build Coastguard Worker                 /* "An instruction must not read from a location in physical
1403*61046927SAndroid Build Coastguard Worker                  *  regfile A or B that was written to by the previous
1404*61046927SAndroid Build Coastguard Worker                  *  instruction."
1405*61046927SAndroid Build Coastguard Worker                  */
1406*61046927SAndroid Build Coastguard Worker                 if (reads_too_soon_after_write(c->devinfo, scoreboard, n->inst))
1407*61046927SAndroid Build Coastguard Worker                         continue;
1408*61046927SAndroid Build Coastguard Worker 
1409*61046927SAndroid Build Coastguard Worker                 if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst))
1410*61046927SAndroid Build Coastguard Worker                         continue;
1411*61046927SAndroid Build Coastguard Worker 
1412*61046927SAndroid Build Coastguard Worker                 /* "Before doing a TLB access a scoreboard wait must have been
1413*61046927SAndroid Build Coastguard Worker                  *  done. This happens either on the first or last thread
1414*61046927SAndroid Build Coastguard Worker                  *  switch, depending on a setting (scb_wait_on_first_thrsw) in
1415*61046927SAndroid Build Coastguard Worker                  *  the shader state."
1416*61046927SAndroid Build Coastguard Worker                  */
1417*61046927SAndroid Build Coastguard Worker                 if (pixel_scoreboard_too_soon(c, scoreboard, inst))
1418*61046927SAndroid Build Coastguard Worker                         continue;
1419*61046927SAndroid Build Coastguard Worker 
1420*61046927SAndroid Build Coastguard Worker                 /* ldunif and ldvary both write the same register (r5 for v42
1421*61046927SAndroid Build Coastguard Worker                  * and below, rf0 for v71), but ldunif does so a tick sooner.
1422*61046927SAndroid Build Coastguard Worker                  * If the ldvary's register wasn't used, then ldunif might
1423*61046927SAndroid Build Coastguard Worker                  * otherwise get scheduled so ldunif and ldvary try to update
1424*61046927SAndroid Build Coastguard Worker                  * the register in the same tick.
1425*61046927SAndroid Build Coastguard Worker                  */
1426*61046927SAndroid Build Coastguard Worker                 if ((inst->sig.ldunif || inst->sig.ldunifa) &&
1427*61046927SAndroid Build Coastguard Worker                     scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
1428*61046927SAndroid Build Coastguard Worker                         continue;
1429*61046927SAndroid Build Coastguard Worker                 }
1430*61046927SAndroid Build Coastguard Worker 
1431*61046927SAndroid Build Coastguard Worker                 /* If we are in a thrsw delay slot check that this instruction
1432*61046927SAndroid Build Coastguard Worker                  * is valid for that.
1433*61046927SAndroid Build Coastguard Worker                  */
1434*61046927SAndroid Build Coastguard Worker                 if (scoreboard->last_thrsw_tick + 2 >= scoreboard->tick &&
1435*61046927SAndroid Build Coastguard Worker                     !qpu_inst_after_thrsw_valid_in_delay_slot(c, scoreboard,
1436*61046927SAndroid Build Coastguard Worker                                                               n->inst)) {
1437*61046927SAndroid Build Coastguard Worker                         continue;
1438*61046927SAndroid Build Coastguard Worker                 }
1439*61046927SAndroid Build Coastguard Worker 
1440*61046927SAndroid Build Coastguard Worker                 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
1441*61046927SAndroid Build Coastguard Worker                         /* Don't try to put a branch in the delay slots of another
1442*61046927SAndroid Build Coastguard Worker                          * branch or a unifa write.
1443*61046927SAndroid Build Coastguard Worker                          */
1444*61046927SAndroid Build Coastguard Worker                         if (scoreboard->last_branch_tick + 3 >= scoreboard->tick)
1445*61046927SAndroid Build Coastguard Worker                                 continue;
1446*61046927SAndroid Build Coastguard Worker                         if (scoreboard->last_unifa_write_tick + 3 >= scoreboard->tick)
1447*61046927SAndroid Build Coastguard Worker                                 continue;
1448*61046927SAndroid Build Coastguard Worker 
1449*61046927SAndroid Build Coastguard Worker                         /* No branch with cond != 0,2,3 and msfign != 0 after
1450*61046927SAndroid Build Coastguard Worker                          * setmsf.
1451*61046927SAndroid Build Coastguard Worker                          */
1452*61046927SAndroid Build Coastguard Worker                         if (scoreboard->last_setmsf_tick == scoreboard->tick - 1 &&
1453*61046927SAndroid Build Coastguard Worker                             inst->branch.msfign != V3D_QPU_MSFIGN_NONE &&
1454*61046927SAndroid Build Coastguard Worker                             inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS &&
1455*61046927SAndroid Build Coastguard Worker                             inst->branch.cond != V3D_QPU_BRANCH_COND_A0 &&
1456*61046927SAndroid Build Coastguard Worker                             inst->branch.cond != V3D_QPU_BRANCH_COND_NA0) {
1457*61046927SAndroid Build Coastguard Worker                                 continue;
1458*61046927SAndroid Build Coastguard Worker                         }
1459*61046927SAndroid Build Coastguard Worker                 }
1460*61046927SAndroid Build Coastguard Worker 
1461*61046927SAndroid Build Coastguard Worker                 /* If we're trying to pair with another instruction, check
1462*61046927SAndroid Build Coastguard Worker                  * that they're compatible.
1463*61046927SAndroid Build Coastguard Worker                  */
1464*61046927SAndroid Build Coastguard Worker                 if (prev_inst) {
1465*61046927SAndroid Build Coastguard Worker                         /* Don't pair up a thread switch signal -- we'll
1466*61046927SAndroid Build Coastguard Worker                          * handle pairing it when we pick it on its own.
1467*61046927SAndroid Build Coastguard Worker                          */
1468*61046927SAndroid Build Coastguard Worker                         if (inst->sig.thrsw)
1469*61046927SAndroid Build Coastguard Worker                                 continue;
1470*61046927SAndroid Build Coastguard Worker 
1471*61046927SAndroid Build Coastguard Worker                         if (prev_inst->inst->uniform != -1 &&
1472*61046927SAndroid Build Coastguard Worker                             n->inst->uniform != -1)
1473*61046927SAndroid Build Coastguard Worker                                 continue;
1474*61046927SAndroid Build Coastguard Worker 
1475*61046927SAndroid Build Coastguard Worker                        /* Simulator complains if we have two uniforms loaded in
1476*61046927SAndroid Build Coastguard Worker                         * the the same instruction, which could happen if we
1477*61046927SAndroid Build Coastguard Worker                         * have a ldunif or sideband uniform and we pair that
1478*61046927SAndroid Build Coastguard Worker                         * with ldunifa.
1479*61046927SAndroid Build Coastguard Worker                         */
1480*61046927SAndroid Build Coastguard Worker                         if (vir_has_uniform(prev_inst->inst) &&
1481*61046927SAndroid Build Coastguard Worker                             (inst->sig.ldunifa || inst->sig.ldunifarf)) {
1482*61046927SAndroid Build Coastguard Worker                                 continue;
1483*61046927SAndroid Build Coastguard Worker                         }
1484*61046927SAndroid Build Coastguard Worker 
1485*61046927SAndroid Build Coastguard Worker                         if ((prev_inst->inst->qpu.sig.ldunifa ||
1486*61046927SAndroid Build Coastguard Worker                              prev_inst->inst->qpu.sig.ldunifarf) &&
1487*61046927SAndroid Build Coastguard Worker                             vir_has_uniform(n->inst)) {
1488*61046927SAndroid Build Coastguard Worker                                 continue;
1489*61046927SAndroid Build Coastguard Worker                         }
1490*61046927SAndroid Build Coastguard Worker 
1491*61046927SAndroid Build Coastguard Worker                         /* Don't merge TLB instructions before we have acquired
1492*61046927SAndroid Build Coastguard Worker                          * the scoreboard lock.
1493*61046927SAndroid Build Coastguard Worker                          */
1494*61046927SAndroid Build Coastguard Worker                         if (pixel_scoreboard_too_soon(c, scoreboard, inst))
1495*61046927SAndroid Build Coastguard Worker                                 continue;
1496*61046927SAndroid Build Coastguard Worker 
1497*61046927SAndroid Build Coastguard Worker                         /* When we successfully pair up an ldvary we then try
1498*61046927SAndroid Build Coastguard Worker                          * to merge it into the previous instruction if
1499*61046927SAndroid Build Coastguard Worker                          * possible to improve pipelining. Don't pick up the
1500*61046927SAndroid Build Coastguard Worker                          * ldvary now if the follow-up fixup would place
1501*61046927SAndroid Build Coastguard Worker                          * it in the delay slots of a thrsw, which is not
1502*61046927SAndroid Build Coastguard Worker                          * allowed and would prevent the fixup from being
1503*61046927SAndroid Build Coastguard Worker                          * successful. In V3D 7.x we can allow this to happen
1504*61046927SAndroid Build Coastguard Worker                          * as long as it is not the last delay slot.
1505*61046927SAndroid Build Coastguard Worker                          */
1506*61046927SAndroid Build Coastguard Worker                         if (inst->sig.ldvary) {
1507*61046927SAndroid Build Coastguard Worker                                 if (c->devinfo->ver == 42 &&
1508*61046927SAndroid Build Coastguard Worker                                     scoreboard->last_thrsw_tick + 2 >=
1509*61046927SAndroid Build Coastguard Worker                                     scoreboard->tick - 1) {
1510*61046927SAndroid Build Coastguard Worker                                         continue;
1511*61046927SAndroid Build Coastguard Worker                                 }
1512*61046927SAndroid Build Coastguard Worker                                 if (c->devinfo->ver >= 71 &&
1513*61046927SAndroid Build Coastguard Worker                                     scoreboard->last_thrsw_tick + 2 ==
1514*61046927SAndroid Build Coastguard Worker                                     scoreboard->tick - 1) {
1515*61046927SAndroid Build Coastguard Worker                                         continue;
1516*61046927SAndroid Build Coastguard Worker                                 }
1517*61046927SAndroid Build Coastguard Worker                         }
1518*61046927SAndroid Build Coastguard Worker 
1519*61046927SAndroid Build Coastguard Worker                         /* We can emit a new tmu lookup with a previous ldtmu
1520*61046927SAndroid Build Coastguard Worker                          * if doing this would free just enough space in the
1521*61046927SAndroid Build Coastguard Worker                          * TMU output fifo so we don't overflow, however, this
1522*61046927SAndroid Build Coastguard Worker                          * is only safe if the ldtmu cannot stall.
1523*61046927SAndroid Build Coastguard Worker                          *
1524*61046927SAndroid Build Coastguard Worker                          * A ldtmu can stall if it is not the first following a
1525*61046927SAndroid Build Coastguard Worker                          * thread switch and corresponds to the first word of a
1526*61046927SAndroid Build Coastguard Worker                          * read request.
1527*61046927SAndroid Build Coastguard Worker                          *
1528*61046927SAndroid Build Coastguard Worker                          * FIXME: For now we forbid pairing up a new lookup
1529*61046927SAndroid Build Coastguard Worker                          * with a previous ldtmu that is not the first after a
1530*61046927SAndroid Build Coastguard Worker                          * thrsw if that could overflow the TMU output fifo
1531*61046927SAndroid Build Coastguard Worker                          * regardless of whether the ldtmu is reading the first
1532*61046927SAndroid Build Coastguard Worker                          * word of a TMU result or not, since we don't track
1533*61046927SAndroid Build Coastguard Worker                          * this aspect in the compiler yet.
1534*61046927SAndroid Build Coastguard Worker                          */
1535*61046927SAndroid Build Coastguard Worker                         if (prev_inst->inst->qpu.sig.ldtmu &&
1536*61046927SAndroid Build Coastguard Worker                             !scoreboard->first_ldtmu_after_thrsw &&
1537*61046927SAndroid Build Coastguard Worker                             (scoreboard->pending_ldtmu_count +
1538*61046927SAndroid Build Coastguard Worker                              n->inst->ldtmu_count > 16 / c->threads)) {
1539*61046927SAndroid Build Coastguard Worker                                 continue;
1540*61046927SAndroid Build Coastguard Worker                         }
1541*61046927SAndroid Build Coastguard Worker 
1542*61046927SAndroid Build Coastguard Worker                         struct v3d_qpu_instr merged_inst;
1543*61046927SAndroid Build Coastguard Worker                         if (!qpu_merge_inst(c->devinfo, &merged_inst,
1544*61046927SAndroid Build Coastguard Worker                                             &prev_inst->inst->qpu, inst)) {
1545*61046927SAndroid Build Coastguard Worker                                 continue;
1546*61046927SAndroid Build Coastguard Worker                         }
1547*61046927SAndroid Build Coastguard Worker                 }
1548*61046927SAndroid Build Coastguard Worker 
1549*61046927SAndroid Build Coastguard Worker                 int prio = get_instruction_priority(c->devinfo, inst);
1550*61046927SAndroid Build Coastguard Worker 
1551*61046927SAndroid Build Coastguard Worker                 if (read_stalls(c->devinfo, scoreboard, inst)) {
1552*61046927SAndroid Build Coastguard Worker                         /* Don't merge an instruction that stalls */
1553*61046927SAndroid Build Coastguard Worker                         if (prev_inst)
1554*61046927SAndroid Build Coastguard Worker                                 continue;
1555*61046927SAndroid Build Coastguard Worker                         else {
1556*61046927SAndroid Build Coastguard Worker                                 /* Any instruction that don't stall will have
1557*61046927SAndroid Build Coastguard Worker                                  * higher scheduling priority */
1558*61046927SAndroid Build Coastguard Worker                                 prio -= MAX_SCHEDULE_PRIORITY;
1559*61046927SAndroid Build Coastguard Worker                                 assert(prio < 0);
1560*61046927SAndroid Build Coastguard Worker                         }
1561*61046927SAndroid Build Coastguard Worker                 }
1562*61046927SAndroid Build Coastguard Worker 
1563*61046927SAndroid Build Coastguard Worker                 /* Found a valid instruction.  If nothing better comes along,
1564*61046927SAndroid Build Coastguard Worker                  * this one works.
1565*61046927SAndroid Build Coastguard Worker                  */
1566*61046927SAndroid Build Coastguard Worker                 if (!chosen) {
1567*61046927SAndroid Build Coastguard Worker                         chosen = n;
1568*61046927SAndroid Build Coastguard Worker                         chosen_prio = prio;
1569*61046927SAndroid Build Coastguard Worker                         continue;
1570*61046927SAndroid Build Coastguard Worker                 }
1571*61046927SAndroid Build Coastguard Worker 
1572*61046927SAndroid Build Coastguard Worker                 if (prio > chosen_prio) {
1573*61046927SAndroid Build Coastguard Worker                         chosen = n;
1574*61046927SAndroid Build Coastguard Worker                         chosen_prio = prio;
1575*61046927SAndroid Build Coastguard Worker                 } else if (prio < chosen_prio) {
1576*61046927SAndroid Build Coastguard Worker                         continue;
1577*61046927SAndroid Build Coastguard Worker                 }
1578*61046927SAndroid Build Coastguard Worker 
1579*61046927SAndroid Build Coastguard Worker                 if (n->delay > chosen->delay) {
1580*61046927SAndroid Build Coastguard Worker                         chosen = n;
1581*61046927SAndroid Build Coastguard Worker                         chosen_prio = prio;
1582*61046927SAndroid Build Coastguard Worker                 } else if (n->delay < chosen->delay) {
1583*61046927SAndroid Build Coastguard Worker                         continue;
1584*61046927SAndroid Build Coastguard Worker                 }
1585*61046927SAndroid Build Coastguard Worker         }
1586*61046927SAndroid Build Coastguard Worker 
1587*61046927SAndroid Build Coastguard Worker         /* If we did not find any instruction to schedule but we discarded
1588*61046927SAndroid Build Coastguard Worker          * some of them to prioritize ldvary pipelining, try again.
1589*61046927SAndroid Build Coastguard Worker          */
1590*61046927SAndroid Build Coastguard Worker         if (!chosen && !prev_inst && skipped_insts_for_ldvary_pipelining) {
1591*61046927SAndroid Build Coastguard Worker                 skipped_insts_for_ldvary_pipelining = false;
1592*61046927SAndroid Build Coastguard Worker                 ldvary_pipelining = false;
1593*61046927SAndroid Build Coastguard Worker                 goto retry;
1594*61046927SAndroid Build Coastguard Worker         }
1595*61046927SAndroid Build Coastguard Worker 
1596*61046927SAndroid Build Coastguard Worker         if (chosen && chosen->inst->qpu.sig.ldvary) {
1597*61046927SAndroid Build Coastguard Worker                 scoreboard->ldvary_count++;
1598*61046927SAndroid Build Coastguard Worker                 /* If we are pairing an ldvary, flag it so we can fix it up for
1599*61046927SAndroid Build Coastguard Worker                  * optimal pipelining of ldvary sequences.
1600*61046927SAndroid Build Coastguard Worker                  */
1601*61046927SAndroid Build Coastguard Worker                 if (prev_inst)
1602*61046927SAndroid Build Coastguard Worker                         scoreboard->fixup_ldvary = true;
1603*61046927SAndroid Build Coastguard Worker         }
1604*61046927SAndroid Build Coastguard Worker 
1605*61046927SAndroid Build Coastguard Worker         return chosen;
1606*61046927SAndroid Build Coastguard Worker }
1607*61046927SAndroid Build Coastguard Worker 
1608*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_for_magic_waddr(struct choose_scoreboard * scoreboard,enum v3d_qpu_waddr waddr,const struct v3d_device_info * devinfo)1609*61046927SAndroid Build Coastguard Worker update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
1610*61046927SAndroid Build Coastguard Worker                                   enum v3d_qpu_waddr waddr,
1611*61046927SAndroid Build Coastguard Worker                                   const struct v3d_device_info *devinfo)
1612*61046927SAndroid Build Coastguard Worker {
1613*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_magic_waddr_is_sfu(waddr))
1614*61046927SAndroid Build Coastguard Worker                 scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
1615*61046927SAndroid Build Coastguard Worker         else if (waddr == V3D_QPU_WADDR_UNIFA)
1616*61046927SAndroid Build Coastguard Worker                 scoreboard->last_unifa_write_tick = scoreboard->tick;
1617*61046927SAndroid Build Coastguard Worker }
1618*61046927SAndroid Build Coastguard Worker 
1619*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst)1620*61046927SAndroid Build Coastguard Worker update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard *scoreboard,
1621*61046927SAndroid Build Coastguard Worker                                       const struct v3d_qpu_instr *inst)
1622*61046927SAndroid Build Coastguard Worker {
1623*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_instr_is_sfu(inst)) {
1624*61046927SAndroid Build Coastguard Worker                 scoreboard->last_stallable_sfu_reg = inst->alu.add.waddr;
1625*61046927SAndroid Build Coastguard Worker                 scoreboard->last_stallable_sfu_tick = scoreboard->tick;
1626*61046927SAndroid Build Coastguard Worker         }
1627*61046927SAndroid Build Coastguard Worker }
1628*61046927SAndroid Build Coastguard Worker 
1629*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_tmu_tracking(struct choose_scoreboard * scoreboard,const struct qinst * inst)1630*61046927SAndroid Build Coastguard Worker update_scoreboard_tmu_tracking(struct choose_scoreboard *scoreboard,
1631*61046927SAndroid Build Coastguard Worker                                const struct qinst *inst)
1632*61046927SAndroid Build Coastguard Worker {
1633*61046927SAndroid Build Coastguard Worker         /* Track if the have seen any ldtmu after the last thread switch */
1634*61046927SAndroid Build Coastguard Worker         if (scoreboard->tick == scoreboard->last_thrsw_tick + 2)
1635*61046927SAndroid Build Coastguard Worker                 scoreboard->first_ldtmu_after_thrsw = true;
1636*61046927SAndroid Build Coastguard Worker 
1637*61046927SAndroid Build Coastguard Worker         /* Track the number of pending ldtmu instructions for outstanding
1638*61046927SAndroid Build Coastguard Worker          * TMU lookups.
1639*61046927SAndroid Build Coastguard Worker          */
1640*61046927SAndroid Build Coastguard Worker         scoreboard->pending_ldtmu_count += inst->ldtmu_count;
1641*61046927SAndroid Build Coastguard Worker         if (inst->qpu.sig.ldtmu) {
1642*61046927SAndroid Build Coastguard Worker                 assert(scoreboard->pending_ldtmu_count > 0);
1643*61046927SAndroid Build Coastguard Worker                 scoreboard->pending_ldtmu_count--;
1644*61046927SAndroid Build Coastguard Worker                 scoreboard->first_ldtmu_after_thrsw = false;
1645*61046927SAndroid Build Coastguard Worker         }
1646*61046927SAndroid Build Coastguard Worker }
1647*61046927SAndroid Build Coastguard Worker 
1648*61046927SAndroid Build Coastguard Worker static void
set_has_rf0_flops_conflict(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst,const struct v3d_device_info * devinfo)1649*61046927SAndroid Build Coastguard Worker set_has_rf0_flops_conflict(struct choose_scoreboard *scoreboard,
1650*61046927SAndroid Build Coastguard Worker                            const struct v3d_qpu_instr *inst,
1651*61046927SAndroid Build Coastguard Worker                            const struct v3d_device_info *devinfo)
1652*61046927SAndroid Build Coastguard Worker {
1653*61046927SAndroid Build Coastguard Worker         if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick &&
1654*61046927SAndroid Build Coastguard Worker             v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
1655*61046927SAndroid Build Coastguard Worker             !inst->sig_magic) {
1656*61046927SAndroid Build Coastguard Worker                 scoreboard->has_rf0_flops_conflict = true;
1657*61046927SAndroid Build Coastguard Worker         }
1658*61046927SAndroid Build Coastguard Worker }
1659*61046927SAndroid Build Coastguard Worker 
1660*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_for_rf0_flops(struct choose_scoreboard * scoreboard,const struct v3d_qpu_instr * inst,const struct v3d_device_info * devinfo)1661*61046927SAndroid Build Coastguard Worker update_scoreboard_for_rf0_flops(struct choose_scoreboard *scoreboard,
1662*61046927SAndroid Build Coastguard Worker                                 const struct v3d_qpu_instr *inst,
1663*61046927SAndroid Build Coastguard Worker                                 const struct v3d_device_info *devinfo)
1664*61046927SAndroid Build Coastguard Worker {
1665*61046927SAndroid Build Coastguard Worker         if (devinfo->ver < 71)
1666*61046927SAndroid Build Coastguard Worker                 return;
1667*61046927SAndroid Build Coastguard Worker 
1668*61046927SAndroid Build Coastguard Worker         /* Thread switch restrictions:
1669*61046927SAndroid Build Coastguard Worker          *
1670*61046927SAndroid Build Coastguard Worker          * At the point of a thread switch or thread end (when the actual
1671*61046927SAndroid Build Coastguard Worker          * thread switch or thread end happens, not when the signalling
1672*61046927SAndroid Build Coastguard Worker          * instruction is processed):
1673*61046927SAndroid Build Coastguard Worker          *
1674*61046927SAndroid Build Coastguard Worker          *    - If the most recent write to rf0 was from a ldunif, ldunifa, or
1675*61046927SAndroid Build Coastguard Worker          *      ldvary instruction in which another signal also wrote to the
1676*61046927SAndroid Build Coastguard Worker          *      register file, and the final instruction of the thread section
1677*61046927SAndroid Build Coastguard Worker          *      contained a signal which wrote to the register file, then the
1678*61046927SAndroid Build Coastguard Worker          *      value of rf0 is undefined at the start of the new section
1679*61046927SAndroid Build Coastguard Worker          *
1680*61046927SAndroid Build Coastguard Worker          * Here we use the scoreboard to track if our last rf0 implicit write
1681*61046927SAndroid Build Coastguard Worker          * happens at the same time that another signal writes the register
1682*61046927SAndroid Build Coastguard Worker          * file (has_rf0_flops_conflict). We will use that information when
1683*61046927SAndroid Build Coastguard Worker          * scheduling thrsw instructions to avoid putting anything in their
1684*61046927SAndroid Build Coastguard Worker          * last delay slot which has a signal that writes to the register file.
1685*61046927SAndroid Build Coastguard Worker          */
1686*61046927SAndroid Build Coastguard Worker 
1687*61046927SAndroid Build Coastguard Worker         /* Reset tracking if we have an explicit rf0 write or we are starting
1688*61046927SAndroid Build Coastguard Worker          * a new thread section.
1689*61046927SAndroid Build Coastguard Worker          */
1690*61046927SAndroid Build Coastguard Worker         if (v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0) ||
1691*61046927SAndroid Build Coastguard Worker             scoreboard->tick - scoreboard->last_thrsw_tick == 3) {
1692*61046927SAndroid Build Coastguard Worker                 scoreboard->last_implicit_rf0_write_tick = -10;
1693*61046927SAndroid Build Coastguard Worker                 scoreboard->has_rf0_flops_conflict = false;
1694*61046927SAndroid Build Coastguard Worker         }
1695*61046927SAndroid Build Coastguard Worker 
1696*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_rf0_implicitly(devinfo, inst)) {
1697*61046927SAndroid Build Coastguard Worker                 scoreboard->last_implicit_rf0_write_tick = inst->sig.ldvary ?
1698*61046927SAndroid Build Coastguard Worker                         scoreboard->tick + 1 : scoreboard->tick;
1699*61046927SAndroid Build Coastguard Worker         }
1700*61046927SAndroid Build Coastguard Worker 
1701*61046927SAndroid Build Coastguard Worker         set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
1702*61046927SAndroid Build Coastguard Worker }
1703*61046927SAndroid Build Coastguard Worker 
1704*61046927SAndroid Build Coastguard Worker static void
update_scoreboard_for_chosen(struct choose_scoreboard * scoreboard,const struct qinst * qinst,const struct v3d_device_info * devinfo)1705*61046927SAndroid Build Coastguard Worker update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
1706*61046927SAndroid Build Coastguard Worker                              const struct qinst *qinst,
1707*61046927SAndroid Build Coastguard Worker                              const struct v3d_device_info *devinfo)
1708*61046927SAndroid Build Coastguard Worker {
1709*61046927SAndroid Build Coastguard Worker         const struct v3d_qpu_instr *inst = &qinst->qpu;
1710*61046927SAndroid Build Coastguard Worker 
1711*61046927SAndroid Build Coastguard Worker         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
1712*61046927SAndroid Build Coastguard Worker                 return;
1713*61046927SAndroid Build Coastguard Worker 
1714*61046927SAndroid Build Coastguard Worker         assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
1715*61046927SAndroid Build Coastguard Worker 
1716*61046927SAndroid Build Coastguard Worker         if (inst->alu.add.op != V3D_QPU_A_NOP)  {
1717*61046927SAndroid Build Coastguard Worker                 if (inst->alu.add.magic_write) {
1718*61046927SAndroid Build Coastguard Worker                         update_scoreboard_for_magic_waddr(scoreboard,
1719*61046927SAndroid Build Coastguard Worker                                                           inst->alu.add.waddr,
1720*61046927SAndroid Build Coastguard Worker                                                           devinfo);
1721*61046927SAndroid Build Coastguard Worker                 } else {
1722*61046927SAndroid Build Coastguard Worker                         update_scoreboard_for_sfu_stall_waddr(scoreboard,
1723*61046927SAndroid Build Coastguard Worker                                                               inst);
1724*61046927SAndroid Build Coastguard Worker                 }
1725*61046927SAndroid Build Coastguard Worker 
1726*61046927SAndroid Build Coastguard Worker                 if (inst->alu.add.op == V3D_QPU_A_SETMSF)
1727*61046927SAndroid Build Coastguard Worker                         scoreboard->last_setmsf_tick = scoreboard->tick;
1728*61046927SAndroid Build Coastguard Worker         }
1729*61046927SAndroid Build Coastguard Worker 
1730*61046927SAndroid Build Coastguard Worker         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
1731*61046927SAndroid Build Coastguard Worker                 if (inst->alu.mul.magic_write) {
1732*61046927SAndroid Build Coastguard Worker                         update_scoreboard_for_magic_waddr(scoreboard,
1733*61046927SAndroid Build Coastguard Worker                                                           inst->alu.mul.waddr,
1734*61046927SAndroid Build Coastguard Worker                                                           devinfo);
1735*61046927SAndroid Build Coastguard Worker                 }
1736*61046927SAndroid Build Coastguard Worker         }
1737*61046927SAndroid Build Coastguard Worker 
1738*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && inst->sig_magic) {
1739*61046927SAndroid Build Coastguard Worker                 update_scoreboard_for_magic_waddr(scoreboard,
1740*61046927SAndroid Build Coastguard Worker                                                   inst->sig_addr,
1741*61046927SAndroid Build Coastguard Worker                                                   devinfo);
1742*61046927SAndroid Build Coastguard Worker         }
1743*61046927SAndroid Build Coastguard Worker 
1744*61046927SAndroid Build Coastguard Worker         if (inst->sig.ldvary)
1745*61046927SAndroid Build Coastguard Worker                 scoreboard->last_ldvary_tick = scoreboard->tick;
1746*61046927SAndroid Build Coastguard Worker 
1747*61046927SAndroid Build Coastguard Worker         update_scoreboard_for_rf0_flops(scoreboard, inst, devinfo);
1748*61046927SAndroid Build Coastguard Worker 
1749*61046927SAndroid Build Coastguard Worker         update_scoreboard_tmu_tracking(scoreboard, qinst);
1750*61046927SAndroid Build Coastguard Worker }
1751*61046927SAndroid Build Coastguard Worker 
1752*61046927SAndroid Build Coastguard Worker static void
dump_state(const struct v3d_device_info * devinfo,struct dag * dag)1753*61046927SAndroid Build Coastguard Worker dump_state(const struct v3d_device_info *devinfo, struct dag *dag)
1754*61046927SAndroid Build Coastguard Worker {
1755*61046927SAndroid Build Coastguard Worker         list_for_each_entry(struct schedule_node, n, &dag->heads, dag.link) {
1756*61046927SAndroid Build Coastguard Worker                 fprintf(stderr, "         t=%4d: ", n->unblocked_time);
1757*61046927SAndroid Build Coastguard Worker                 v3d_qpu_dump(devinfo, &n->inst->qpu);
1758*61046927SAndroid Build Coastguard Worker                 fprintf(stderr, "\n");
1759*61046927SAndroid Build Coastguard Worker 
1760*61046927SAndroid Build Coastguard Worker                 util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
1761*61046927SAndroid Build Coastguard Worker                         struct schedule_node *child =
1762*61046927SAndroid Build Coastguard Worker                                 (struct schedule_node *)edge->child;
1763*61046927SAndroid Build Coastguard Worker                         if (!child)
1764*61046927SAndroid Build Coastguard Worker                                 continue;
1765*61046927SAndroid Build Coastguard Worker 
1766*61046927SAndroid Build Coastguard Worker                         fprintf(stderr, "                 - ");
1767*61046927SAndroid Build Coastguard Worker                         v3d_qpu_dump(devinfo, &child->inst->qpu);
1768*61046927SAndroid Build Coastguard Worker                         fprintf(stderr, " (%d parents, %c)\n",
1769*61046927SAndroid Build Coastguard Worker                                 child->dag.parent_count,
1770*61046927SAndroid Build Coastguard Worker                                 edge->data ? 'w' : 'r');
1771*61046927SAndroid Build Coastguard Worker                 }
1772*61046927SAndroid Build Coastguard Worker         }
1773*61046927SAndroid Build Coastguard Worker }
1774*61046927SAndroid Build Coastguard Worker 
magic_waddr_latency(const struct v3d_device_info * devinfo,enum v3d_qpu_waddr waddr,const struct v3d_qpu_instr * after)1775*61046927SAndroid Build Coastguard Worker static uint32_t magic_waddr_latency(const struct v3d_device_info *devinfo,
1776*61046927SAndroid Build Coastguard Worker                                     enum v3d_qpu_waddr waddr,
1777*61046927SAndroid Build Coastguard Worker                                     const struct v3d_qpu_instr *after)
1778*61046927SAndroid Build Coastguard Worker {
1779*61046927SAndroid Build Coastguard Worker         /* Apply some huge latency between texture fetch requests and getting
1780*61046927SAndroid Build Coastguard Worker          * their results back.
1781*61046927SAndroid Build Coastguard Worker          *
1782*61046927SAndroid Build Coastguard Worker          * FIXME: This is actually pretty bogus.  If we do:
1783*61046927SAndroid Build Coastguard Worker          *
1784*61046927SAndroid Build Coastguard Worker          * mov tmu0_s, a
1785*61046927SAndroid Build Coastguard Worker          * <a bit of math>
1786*61046927SAndroid Build Coastguard Worker          * mov tmu0_s, b
1787*61046927SAndroid Build Coastguard Worker          * load_tmu0
1788*61046927SAndroid Build Coastguard Worker          * <more math>
1789*61046927SAndroid Build Coastguard Worker          * load_tmu0
1790*61046927SAndroid Build Coastguard Worker          *
1791*61046927SAndroid Build Coastguard Worker          * we count that as worse than
1792*61046927SAndroid Build Coastguard Worker          *
1793*61046927SAndroid Build Coastguard Worker          * mov tmu0_s, a
1794*61046927SAndroid Build Coastguard Worker          * mov tmu0_s, b
1795*61046927SAndroid Build Coastguard Worker          * <lots of math>
1796*61046927SAndroid Build Coastguard Worker          * load_tmu0
1797*61046927SAndroid Build Coastguard Worker          * <more math>
1798*61046927SAndroid Build Coastguard Worker          * load_tmu0
1799*61046927SAndroid Build Coastguard Worker          *
1800*61046927SAndroid Build Coastguard Worker          * because we associate the first load_tmu0 with the *second* tmu0_s.
1801*61046927SAndroid Build Coastguard Worker          */
1802*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_magic_waddr_is_tmu(devinfo, waddr) &&
1803*61046927SAndroid Build Coastguard Worker             v3d_qpu_waits_on_tmu(after)) {
1804*61046927SAndroid Build Coastguard Worker                 return 100;
1805*61046927SAndroid Build Coastguard Worker         }
1806*61046927SAndroid Build Coastguard Worker 
1807*61046927SAndroid Build Coastguard Worker         /* Assume that anything depending on us is consuming the SFU result. */
1808*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_magic_waddr_is_sfu(waddr))
1809*61046927SAndroid Build Coastguard Worker                 return 3;
1810*61046927SAndroid Build Coastguard Worker 
1811*61046927SAndroid Build Coastguard Worker         return 1;
1812*61046927SAndroid Build Coastguard Worker }
1813*61046927SAndroid Build Coastguard Worker 
1814*61046927SAndroid Build Coastguard Worker static uint32_t
instruction_latency(const struct v3d_device_info * devinfo,struct schedule_node * before,struct schedule_node * after)1815*61046927SAndroid Build Coastguard Worker instruction_latency(const struct v3d_device_info *devinfo,
1816*61046927SAndroid Build Coastguard Worker                     struct schedule_node *before, struct schedule_node *after)
1817*61046927SAndroid Build Coastguard Worker {
1818*61046927SAndroid Build Coastguard Worker         const struct v3d_qpu_instr *before_inst = &before->inst->qpu;
1819*61046927SAndroid Build Coastguard Worker         const struct v3d_qpu_instr *after_inst = &after->inst->qpu;
1820*61046927SAndroid Build Coastguard Worker         uint32_t latency = 1;
1821*61046927SAndroid Build Coastguard Worker 
1822*61046927SAndroid Build Coastguard Worker         if (before_inst->type != V3D_QPU_INSTR_TYPE_ALU ||
1823*61046927SAndroid Build Coastguard Worker             after_inst->type != V3D_QPU_INSTR_TYPE_ALU)
1824*61046927SAndroid Build Coastguard Worker                 return latency;
1825*61046927SAndroid Build Coastguard Worker 
1826*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_instr_is_sfu(before_inst))
1827*61046927SAndroid Build Coastguard Worker                 return 2;
1828*61046927SAndroid Build Coastguard Worker 
1829*61046927SAndroid Build Coastguard Worker         if (before_inst->alu.add.op != V3D_QPU_A_NOP &&
1830*61046927SAndroid Build Coastguard Worker             before_inst->alu.add.magic_write) {
1831*61046927SAndroid Build Coastguard Worker                 latency = MAX2(latency,
1832*61046927SAndroid Build Coastguard Worker                                magic_waddr_latency(devinfo,
1833*61046927SAndroid Build Coastguard Worker                                                    before_inst->alu.add.waddr,
1834*61046927SAndroid Build Coastguard Worker                                                    after_inst));
1835*61046927SAndroid Build Coastguard Worker         }
1836*61046927SAndroid Build Coastguard Worker 
1837*61046927SAndroid Build Coastguard Worker         if (before_inst->alu.mul.op != V3D_QPU_M_NOP &&
1838*61046927SAndroid Build Coastguard Worker             before_inst->alu.mul.magic_write) {
1839*61046927SAndroid Build Coastguard Worker                 latency = MAX2(latency,
1840*61046927SAndroid Build Coastguard Worker                                magic_waddr_latency(devinfo,
1841*61046927SAndroid Build Coastguard Worker                                                    before_inst->alu.mul.waddr,
1842*61046927SAndroid Build Coastguard Worker                                                    after_inst));
1843*61046927SAndroid Build Coastguard Worker         }
1844*61046927SAndroid Build Coastguard Worker 
1845*61046927SAndroid Build Coastguard Worker         return latency;
1846*61046927SAndroid Build Coastguard Worker }
1847*61046927SAndroid Build Coastguard Worker 
1848*61046927SAndroid Build Coastguard Worker /** Recursive computation of the delay member of a node. */
1849*61046927SAndroid Build Coastguard Worker static void
compute_delay(struct dag_node * node,void * state)1850*61046927SAndroid Build Coastguard Worker compute_delay(struct dag_node *node, void *state)
1851*61046927SAndroid Build Coastguard Worker {
1852*61046927SAndroid Build Coastguard Worker         struct schedule_node *n = (struct schedule_node *)node;
1853*61046927SAndroid Build Coastguard Worker         struct v3d_compile *c = (struct v3d_compile *) state;
1854*61046927SAndroid Build Coastguard Worker 
1855*61046927SAndroid Build Coastguard Worker         n->delay = 1;
1856*61046927SAndroid Build Coastguard Worker 
1857*61046927SAndroid Build Coastguard Worker         util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
1858*61046927SAndroid Build Coastguard Worker                 struct schedule_node *child =
1859*61046927SAndroid Build Coastguard Worker                         (struct schedule_node *)edge->child;
1860*61046927SAndroid Build Coastguard Worker 
1861*61046927SAndroid Build Coastguard Worker                 n->delay = MAX2(n->delay, (child->delay +
1862*61046927SAndroid Build Coastguard Worker                                            instruction_latency(c->devinfo, n,
1863*61046927SAndroid Build Coastguard Worker                                                                child)));
1864*61046927SAndroid Build Coastguard Worker         }
1865*61046927SAndroid Build Coastguard Worker }
1866*61046927SAndroid Build Coastguard Worker 
1867*61046927SAndroid Build Coastguard Worker /* Removes a DAG head, but removing only the WAR edges. (dag_prune_head()
1868*61046927SAndroid Build Coastguard Worker  * should be called on it later to finish pruning the other edges).
1869*61046927SAndroid Build Coastguard Worker  */
1870*61046927SAndroid Build Coastguard Worker static void
pre_remove_head(struct dag * dag,struct schedule_node * n)1871*61046927SAndroid Build Coastguard Worker pre_remove_head(struct dag *dag, struct schedule_node *n)
1872*61046927SAndroid Build Coastguard Worker {
1873*61046927SAndroid Build Coastguard Worker         list_delinit(&n->dag.link);
1874*61046927SAndroid Build Coastguard Worker 
1875*61046927SAndroid Build Coastguard Worker         util_dynarray_foreach(&n->dag.edges, struct dag_edge, edge) {
1876*61046927SAndroid Build Coastguard Worker                 if (edge->data)
1877*61046927SAndroid Build Coastguard Worker                         dag_remove_edge(dag, edge);
1878*61046927SAndroid Build Coastguard Worker         }
1879*61046927SAndroid Build Coastguard Worker }
1880*61046927SAndroid Build Coastguard Worker 
1881*61046927SAndroid Build Coastguard Worker static void
mark_instruction_scheduled(const struct v3d_device_info * devinfo,struct dag * dag,uint32_t time,struct schedule_node * node)1882*61046927SAndroid Build Coastguard Worker mark_instruction_scheduled(const struct v3d_device_info *devinfo,
1883*61046927SAndroid Build Coastguard Worker                            struct dag *dag,
1884*61046927SAndroid Build Coastguard Worker                            uint32_t time,
1885*61046927SAndroid Build Coastguard Worker                            struct schedule_node *node)
1886*61046927SAndroid Build Coastguard Worker {
1887*61046927SAndroid Build Coastguard Worker         if (!node)
1888*61046927SAndroid Build Coastguard Worker                 return;
1889*61046927SAndroid Build Coastguard Worker 
1890*61046927SAndroid Build Coastguard Worker         util_dynarray_foreach(&node->dag.edges, struct dag_edge, edge) {
1891*61046927SAndroid Build Coastguard Worker                 struct schedule_node *child =
1892*61046927SAndroid Build Coastguard Worker                         (struct schedule_node *)edge->child;
1893*61046927SAndroid Build Coastguard Worker 
1894*61046927SAndroid Build Coastguard Worker                 if (!child)
1895*61046927SAndroid Build Coastguard Worker                         continue;
1896*61046927SAndroid Build Coastguard Worker 
1897*61046927SAndroid Build Coastguard Worker                 uint32_t latency = instruction_latency(devinfo, node, child);
1898*61046927SAndroid Build Coastguard Worker 
1899*61046927SAndroid Build Coastguard Worker                 child->unblocked_time = MAX2(child->unblocked_time,
1900*61046927SAndroid Build Coastguard Worker                                              time + latency);
1901*61046927SAndroid Build Coastguard Worker         }
1902*61046927SAndroid Build Coastguard Worker         dag_prune_head(dag, &node->dag);
1903*61046927SAndroid Build Coastguard Worker }
1904*61046927SAndroid Build Coastguard Worker 
1905*61046927SAndroid Build Coastguard Worker static void
insert_scheduled_instruction(struct v3d_compile * c,struct qblock * block,struct choose_scoreboard * scoreboard,struct qinst * inst)1906*61046927SAndroid Build Coastguard Worker insert_scheduled_instruction(struct v3d_compile *c,
1907*61046927SAndroid Build Coastguard Worker                              struct qblock *block,
1908*61046927SAndroid Build Coastguard Worker                              struct choose_scoreboard *scoreboard,
1909*61046927SAndroid Build Coastguard Worker                              struct qinst *inst)
1910*61046927SAndroid Build Coastguard Worker {
1911*61046927SAndroid Build Coastguard Worker         list_addtail(&inst->link, &block->instructions);
1912*61046927SAndroid Build Coastguard Worker 
1913*61046927SAndroid Build Coastguard Worker         update_scoreboard_for_chosen(scoreboard, inst, c->devinfo);
1914*61046927SAndroid Build Coastguard Worker         c->qpu_inst_count++;
1915*61046927SAndroid Build Coastguard Worker         scoreboard->tick++;
1916*61046927SAndroid Build Coastguard Worker }
1917*61046927SAndroid Build Coastguard Worker 
1918*61046927SAndroid Build Coastguard Worker static struct qinst *
vir_nop()1919*61046927SAndroid Build Coastguard Worker vir_nop()
1920*61046927SAndroid Build Coastguard Worker {
1921*61046927SAndroid Build Coastguard Worker         struct qreg undef = vir_nop_reg();
1922*61046927SAndroid Build Coastguard Worker         struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
1923*61046927SAndroid Build Coastguard Worker 
1924*61046927SAndroid Build Coastguard Worker         return qinst;
1925*61046927SAndroid Build Coastguard Worker }
1926*61046927SAndroid Build Coastguard Worker 
1927*61046927SAndroid Build Coastguard Worker static void
emit_nop(struct v3d_compile * c,struct qblock * block,struct choose_scoreboard * scoreboard)1928*61046927SAndroid Build Coastguard Worker emit_nop(struct v3d_compile *c, struct qblock *block,
1929*61046927SAndroid Build Coastguard Worker          struct choose_scoreboard *scoreboard)
1930*61046927SAndroid Build Coastguard Worker {
1931*61046927SAndroid Build Coastguard Worker         insert_scheduled_instruction(c, block, scoreboard, vir_nop());
1932*61046927SAndroid Build Coastguard Worker }
1933*61046927SAndroid Build Coastguard Worker 
1934*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_valid_in_thrend_slot(struct v3d_compile * c,const struct qinst * qinst,int slot)1935*61046927SAndroid Build Coastguard Worker qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
1936*61046927SAndroid Build Coastguard Worker                               const struct qinst *qinst, int slot)
1937*61046927SAndroid Build Coastguard Worker {
1938*61046927SAndroid Build Coastguard Worker         const struct v3d_qpu_instr *inst = &qinst->qpu;
1939*61046927SAndroid Build Coastguard Worker 
1940*61046927SAndroid Build Coastguard Worker         if (slot == 2 && qinst->is_tlb_z_write)
1941*61046927SAndroid Build Coastguard Worker                 return false;
1942*61046927SAndroid Build Coastguard Worker 
1943*61046927SAndroid Build Coastguard Worker         if (slot > 0 && qinst->uniform != ~0)
1944*61046927SAndroid Build Coastguard Worker                 return false;
1945*61046927SAndroid Build Coastguard Worker 
1946*61046927SAndroid Build Coastguard Worker         if (c->devinfo->ver == 42 && v3d_qpu_waits_vpm(inst))
1947*61046927SAndroid Build Coastguard Worker                 return false;
1948*61046927SAndroid Build Coastguard Worker 
1949*61046927SAndroid Build Coastguard Worker         if (inst->sig.ldvary)
1950*61046927SAndroid Build Coastguard Worker                 return false;
1951*61046927SAndroid Build Coastguard Worker 
1952*61046927SAndroid Build Coastguard Worker         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
1953*61046927SAndroid Build Coastguard Worker                 /* GFXH-1625: TMUWT not allowed in the final instruction. */
1954*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver == 42 && slot == 2 &&
1955*61046927SAndroid Build Coastguard Worker                     inst->alu.add.op == V3D_QPU_A_TMUWT) {
1956*61046927SAndroid Build Coastguard Worker                         return false;
1957*61046927SAndroid Build Coastguard Worker                 }
1958*61046927SAndroid Build Coastguard Worker 
1959*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver == 42) {
1960*61046927SAndroid Build Coastguard Worker                         /* No writing physical registers at the end. */
1961*61046927SAndroid Build Coastguard Worker                         bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
1962*61046927SAndroid Build Coastguard Worker                         bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
1963*61046927SAndroid Build Coastguard Worker                         if ((!add_is_nop && !inst->alu.add.magic_write) ||
1964*61046927SAndroid Build Coastguard Worker                             (!mul_is_nop && !inst->alu.mul.magic_write)) {
1965*61046927SAndroid Build Coastguard Worker                                 return false;
1966*61046927SAndroid Build Coastguard Worker                         }
1967*61046927SAndroid Build Coastguard Worker 
1968*61046927SAndroid Build Coastguard Worker                         if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
1969*61046927SAndroid Build Coastguard Worker                             !inst->sig_magic) {
1970*61046927SAndroid Build Coastguard Worker                                 return false;
1971*61046927SAndroid Build Coastguard Worker                         }
1972*61046927SAndroid Build Coastguard Worker                 }
1973*61046927SAndroid Build Coastguard Worker 
1974*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver >= 71) {
1975*61046927SAndroid Build Coastguard Worker                         /* The thread end instruction must not write to the
1976*61046927SAndroid Build Coastguard Worker                          * register file via the add/mul ALUs.
1977*61046927SAndroid Build Coastguard Worker                          */
1978*61046927SAndroid Build Coastguard Worker                         if (slot == 0 &&
1979*61046927SAndroid Build Coastguard Worker                             (!inst->alu.add.magic_write ||
1980*61046927SAndroid Build Coastguard Worker                              !inst->alu.mul.magic_write)) {
1981*61046927SAndroid Build Coastguard Worker                                 return false;
1982*61046927SAndroid Build Coastguard Worker                         }
1983*61046927SAndroid Build Coastguard Worker                 }
1984*61046927SAndroid Build Coastguard Worker 
1985*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver == 42) {
1986*61046927SAndroid Build Coastguard Worker                         /* RF0-2 might be overwritten during the delay slots by
1987*61046927SAndroid Build Coastguard Worker                          * fragment shader setup.
1988*61046927SAndroid Build Coastguard Worker                          */
1989*61046927SAndroid Build Coastguard Worker                         if (inst->raddr_a < 3 && v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A))
1990*61046927SAndroid Build Coastguard Worker                                 return false;
1991*61046927SAndroid Build Coastguard Worker 
1992*61046927SAndroid Build Coastguard Worker                         if (inst->raddr_b < 3 &&
1993*61046927SAndroid Build Coastguard Worker                             !inst->sig.small_imm_b &&
1994*61046927SAndroid Build Coastguard Worker                             v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B)) {
1995*61046927SAndroid Build Coastguard Worker                                 return false;
1996*61046927SAndroid Build Coastguard Worker                         }
1997*61046927SAndroid Build Coastguard Worker                 }
1998*61046927SAndroid Build Coastguard Worker 
1999*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver >= 71) {
2000*61046927SAndroid Build Coastguard Worker                         /* RF2-3 might be overwritten during the delay slots by
2001*61046927SAndroid Build Coastguard Worker                          * fragment shader setup.
2002*61046927SAndroid Build Coastguard Worker                          */
2003*61046927SAndroid Build Coastguard Worker                         if (v3d71_qpu_reads_raddr(inst, 2) ||
2004*61046927SAndroid Build Coastguard Worker                             v3d71_qpu_reads_raddr(inst, 3)) {
2005*61046927SAndroid Build Coastguard Worker                                 return false;
2006*61046927SAndroid Build Coastguard Worker                         }
2007*61046927SAndroid Build Coastguard Worker 
2008*61046927SAndroid Build Coastguard Worker                         if (v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 2) ||
2009*61046927SAndroid Build Coastguard Worker                             v3d71_qpu_writes_waddr_explicitly(c->devinfo, inst, 3)) {
2010*61046927SAndroid Build Coastguard Worker                                 return false;
2011*61046927SAndroid Build Coastguard Worker                         }
2012*61046927SAndroid Build Coastguard Worker                 }
2013*61046927SAndroid Build Coastguard Worker         }
2014*61046927SAndroid Build Coastguard Worker 
2015*61046927SAndroid Build Coastguard Worker         return true;
2016*61046927SAndroid Build Coastguard Worker }
2017*61046927SAndroid Build Coastguard Worker 
2018*61046927SAndroid Build Coastguard Worker /**
2019*61046927SAndroid Build Coastguard Worker  * This is called when trying to merge a thrsw back into the instruction stream
2020*61046927SAndroid Build Coastguard Worker  * of instructions that were scheduled *before* the thrsw signal to fill its
2021*61046927SAndroid Build Coastguard Worker  * delay slots. Because the actual execution of the thrsw happens after the
2022*61046927SAndroid Build Coastguard Worker  * delay slots, it is usually safe to do this, but there are some cases that
2023*61046927SAndroid Build Coastguard Worker  * need special care.
2024*61046927SAndroid Build Coastguard Worker  */
2025*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile * c,struct choose_scoreboard * scoreboard,const struct qinst * qinst,uint32_t slot)2026*61046927SAndroid Build Coastguard Worker qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
2027*61046927SAndroid Build Coastguard Worker                                           struct choose_scoreboard *scoreboard,
2028*61046927SAndroid Build Coastguard Worker                                           const struct qinst *qinst,
2029*61046927SAndroid Build Coastguard Worker                                           uint32_t slot)
2030*61046927SAndroid Build Coastguard Worker {
2031*61046927SAndroid Build Coastguard Worker         /* No scheduling SFU when the result would land in the other
2032*61046927SAndroid Build Coastguard Worker          * thread.  The simulator complains for safety, though it
2033*61046927SAndroid Build Coastguard Worker          * would only occur for dead code in our case.
2034*61046927SAndroid Build Coastguard Worker          */
2035*61046927SAndroid Build Coastguard Worker         if (slot > 0) {
2036*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver == 42 && v3d_qpu_instr_is_legacy_sfu(&qinst->qpu))
2037*61046927SAndroid Build Coastguard Worker                         return false;
2038*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver >= 71 && v3d_qpu_instr_is_sfu(&qinst->qpu))
2039*61046927SAndroid Build Coastguard Worker                         return false;
2040*61046927SAndroid Build Coastguard Worker         }
2041*61046927SAndroid Build Coastguard Worker 
2042*61046927SAndroid Build Coastguard Worker         if (qinst->qpu.sig.ldvary) {
2043*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver == 42 && slot > 0)
2044*61046927SAndroid Build Coastguard Worker                         return false;
2045*61046927SAndroid Build Coastguard Worker                 if (c->devinfo->ver >= 71 && slot == 2)
2046*61046927SAndroid Build Coastguard Worker                         return false;
2047*61046927SAndroid Build Coastguard Worker         }
2048*61046927SAndroid Build Coastguard Worker 
2049*61046927SAndroid Build Coastguard Worker         /* unifa and the following 3 instructions can't overlap a
2050*61046927SAndroid Build Coastguard Worker          * thread switch/end. The docs further clarify that this means
2051*61046927SAndroid Build Coastguard Worker          * the cycle at which the actual thread switch/end happens
2052*61046927SAndroid Build Coastguard Worker          * and not when the thrsw instruction is processed, which would
2053*61046927SAndroid Build Coastguard Worker          * be after the 2 delay slots following the thrsw instruction.
2054*61046927SAndroid Build Coastguard Worker          * This means that we can move up a thrsw up to the instruction
2055*61046927SAndroid Build Coastguard Worker          * right after unifa:
2056*61046927SAndroid Build Coastguard Worker          *
2057*61046927SAndroid Build Coastguard Worker          * unifa, r5
2058*61046927SAndroid Build Coastguard Worker          * thrsw
2059*61046927SAndroid Build Coastguard Worker          * delay slot 1
2060*61046927SAndroid Build Coastguard Worker          * delay slot 2
2061*61046927SAndroid Build Coastguard Worker          * Thread switch happens here, 4 instructions away from unifa
2062*61046927SAndroid Build Coastguard Worker          */
2063*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_unifa(c->devinfo, &qinst->qpu))
2064*61046927SAndroid Build Coastguard Worker                 return false;
2065*61046927SAndroid Build Coastguard Worker 
2066*61046927SAndroid Build Coastguard Worker         /* See comment when we set has_rf0_flops_conflict for details */
2067*61046927SAndroid Build Coastguard Worker         if (c->devinfo->ver >= 71 &&
2068*61046927SAndroid Build Coastguard Worker             slot == 2 &&
2069*61046927SAndroid Build Coastguard Worker             v3d_qpu_sig_writes_address(c->devinfo, &qinst->qpu.sig) &&
2070*61046927SAndroid Build Coastguard Worker             !qinst->qpu.sig_magic) {
2071*61046927SAndroid Build Coastguard Worker                 if (scoreboard->has_rf0_flops_conflict)
2072*61046927SAndroid Build Coastguard Worker                         return false;
2073*61046927SAndroid Build Coastguard Worker                 if (scoreboard->last_implicit_rf0_write_tick == scoreboard->tick)
2074*61046927SAndroid Build Coastguard Worker                         return false;
2075*61046927SAndroid Build Coastguard Worker         }
2076*61046927SAndroid Build Coastguard Worker 
2077*61046927SAndroid Build Coastguard Worker         return true;
2078*61046927SAndroid Build Coastguard Worker }
2079*61046927SAndroid Build Coastguard Worker 
2080*61046927SAndroid Build Coastguard Worker /**
2081*61046927SAndroid Build Coastguard Worker  * This is called for instructions scheduled *after* a thrsw signal that may
2082*61046927SAndroid Build Coastguard Worker  * land in the delay slots of the thrsw. Because these instructions were
2083*61046927SAndroid Build Coastguard Worker  * scheduled after the thrsw, we need to be careful when placing them into
2084*61046927SAndroid Build Coastguard Worker  * the delay slots, since that means that we are moving them ahead of the
2085*61046927SAndroid Build Coastguard Worker  * thread switch and we need to ensure that is not a problem.
2086*61046927SAndroid Build Coastguard Worker  */
2087*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile * c,struct choose_scoreboard * scoreboard,const struct qinst * qinst)2088*61046927SAndroid Build Coastguard Worker qpu_inst_after_thrsw_valid_in_delay_slot(struct v3d_compile *c,
2089*61046927SAndroid Build Coastguard Worker                                          struct choose_scoreboard *scoreboard,
2090*61046927SAndroid Build Coastguard Worker                                          const struct qinst *qinst)
2091*61046927SAndroid Build Coastguard Worker {
2092*61046927SAndroid Build Coastguard Worker         const uint32_t slot = scoreboard->tick - scoreboard->last_thrsw_tick;
2093*61046927SAndroid Build Coastguard Worker         assert(slot <= 2);
2094*61046927SAndroid Build Coastguard Worker 
2095*61046927SAndroid Build Coastguard Worker         /* We merge thrsw instructions back into the instruction stream
2096*61046927SAndroid Build Coastguard Worker          * manually, so any instructions scheduled after a thrsw should be
2097*61046927SAndroid Build Coastguard Worker          * in the actual delay slots and not in the same slot as the thrsw.
2098*61046927SAndroid Build Coastguard Worker          */
2099*61046927SAndroid Build Coastguard Worker         assert(slot >= 1);
2100*61046927SAndroid Build Coastguard Worker 
2101*61046927SAndroid Build Coastguard Worker         /* No emitting a thrsw while the previous thrsw hasn't happened yet. */
2102*61046927SAndroid Build Coastguard Worker         if (qinst->qpu.sig.thrsw)
2103*61046927SAndroid Build Coastguard Worker                 return false;
2104*61046927SAndroid Build Coastguard Worker 
2105*61046927SAndroid Build Coastguard Worker         /* The restrictions for instructions scheduled before the the thrsw
2106*61046927SAndroid Build Coastguard Worker          * also apply to instructions scheduled after the thrsw that we want
2107*61046927SAndroid Build Coastguard Worker          * to place in its delay slots.
2108*61046927SAndroid Build Coastguard Worker          */
2109*61046927SAndroid Build Coastguard Worker         if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard, qinst, slot))
2110*61046927SAndroid Build Coastguard Worker                 return false;
2111*61046927SAndroid Build Coastguard Worker 
2112*61046927SAndroid Build Coastguard Worker         /* TLB access is disallowed until scoreboard wait is executed, which
2113*61046927SAndroid Build Coastguard Worker          * we do on the last thread switch.
2114*61046927SAndroid Build Coastguard Worker          */
2115*61046927SAndroid Build Coastguard Worker         if (qpu_inst_is_tlb(&qinst->qpu))
2116*61046927SAndroid Build Coastguard Worker                 return false;
2117*61046927SAndroid Build Coastguard Worker 
2118*61046927SAndroid Build Coastguard Worker         /* Instruction sequence restrictions: Branch is not allowed in delay
2119*61046927SAndroid Build Coastguard Worker          * slots of a thrsw.
2120*61046927SAndroid Build Coastguard Worker          */
2121*61046927SAndroid Build Coastguard Worker         if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
2122*61046927SAndroid Build Coastguard Worker                 return false;
2123*61046927SAndroid Build Coastguard Worker 
2124*61046927SAndroid Build Coastguard Worker         /* Miscellaneous restrictions: At the point of a thrsw we need to have
2125*61046927SAndroid Build Coastguard Worker          * at least one outstanding lookup or TSY wait.
2126*61046927SAndroid Build Coastguard Worker          *
2127*61046927SAndroid Build Coastguard Worker          * So avoid placing TMU instructions scheduled after the thrsw into
2128*61046927SAndroid Build Coastguard Worker          * its delay slots or we may be compromising the integrity of our TMU
2129*61046927SAndroid Build Coastguard Worker          * sequences. Also, notice that if we moved these instructions into
2130*61046927SAndroid Build Coastguard Worker          * the delay slots of a previous thrsw we could overflow our TMU output
2131*61046927SAndroid Build Coastguard Worker          * fifo, since we could be effectively pipelining a lookup scheduled
2132*61046927SAndroid Build Coastguard Worker          * after the thrsw into the sequence before the thrsw.
2133*61046927SAndroid Build Coastguard Worker          */
2134*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_tmu(c->devinfo, &qinst->qpu) ||
2135*61046927SAndroid Build Coastguard Worker             qinst->qpu.sig.wrtmuc) {
2136*61046927SAndroid Build Coastguard Worker                 return false;
2137*61046927SAndroid Build Coastguard Worker         }
2138*61046927SAndroid Build Coastguard Worker 
2139*61046927SAndroid Build Coastguard Worker         /* Don't move instructions that wait on the TMU before the thread switch
2140*61046927SAndroid Build Coastguard Worker          * happens since that would make the current thread stall before the
2141*61046927SAndroid Build Coastguard Worker          * switch, which is exactly what we want to avoid with the thrsw
2142*61046927SAndroid Build Coastguard Worker          * instruction.
2143*61046927SAndroid Build Coastguard Worker          */
2144*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_waits_on_tmu(&qinst->qpu))
2145*61046927SAndroid Build Coastguard Worker                 return false;
2146*61046927SAndroid Build Coastguard Worker 
2147*61046927SAndroid Build Coastguard Worker         /* A thread switch invalidates all accumulators, so don't place any
2148*61046927SAndroid Build Coastguard Worker          * instructions that write accumulators into the delay slots.
2149*61046927SAndroid Build Coastguard Worker          */
2150*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_accum(c->devinfo, &qinst->qpu))
2151*61046927SAndroid Build Coastguard Worker                 return false;
2152*61046927SAndroid Build Coastguard Worker 
2153*61046927SAndroid Build Coastguard Worker         /* Multop has an implicit write to the rtop register which is an
2154*61046927SAndroid Build Coastguard Worker          * specialized accumulator that is only used with this instruction.
2155*61046927SAndroid Build Coastguard Worker          */
2156*61046927SAndroid Build Coastguard Worker         if (qinst->qpu.alu.mul.op == V3D_QPU_M_MULTOP)
2157*61046927SAndroid Build Coastguard Worker                 return false;
2158*61046927SAndroid Build Coastguard Worker 
2159*61046927SAndroid Build Coastguard Worker         /* Flags are invalidated across a thread switch, so dont' place
2160*61046927SAndroid Build Coastguard Worker          * instructions that write flags into delay slots.
2161*61046927SAndroid Build Coastguard Worker          */
2162*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_flags(&qinst->qpu))
2163*61046927SAndroid Build Coastguard Worker                 return false;
2164*61046927SAndroid Build Coastguard Worker 
2165*61046927SAndroid Build Coastguard Worker         /* TSY sync ops materialize at the point of the next thread switch,
2166*61046927SAndroid Build Coastguard Worker          * therefore, if we have a TSY sync right after a thread switch, we
2167*61046927SAndroid Build Coastguard Worker          * cannot place it in its delay slots, or we would be moving the sync
2168*61046927SAndroid Build Coastguard Worker          * to the thrsw before it instead.
2169*61046927SAndroid Build Coastguard Worker          */
2170*61046927SAndroid Build Coastguard Worker         if (qinst->qpu.alu.add.op == V3D_QPU_A_BARRIERID)
2171*61046927SAndroid Build Coastguard Worker                 return false;
2172*61046927SAndroid Build Coastguard Worker 
2173*61046927SAndroid Build Coastguard Worker         return true;
2174*61046927SAndroid Build Coastguard Worker }
2175*61046927SAndroid Build Coastguard Worker 
2176*61046927SAndroid Build Coastguard Worker static bool
valid_thrsw_sequence(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct qinst * qinst,int instructions_in_sequence,bool is_thrend)2177*61046927SAndroid Build Coastguard Worker valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard,
2178*61046927SAndroid Build Coastguard Worker                      struct qinst *qinst, int instructions_in_sequence,
2179*61046927SAndroid Build Coastguard Worker                      bool is_thrend)
2180*61046927SAndroid Build Coastguard Worker {
2181*61046927SAndroid Build Coastguard Worker         for (int slot = 0; slot < instructions_in_sequence; slot++) {
2182*61046927SAndroid Build Coastguard Worker                 if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, scoreboard,
2183*61046927SAndroid Build Coastguard Worker                                                                qinst, slot)) {
2184*61046927SAndroid Build Coastguard Worker                         return false;
2185*61046927SAndroid Build Coastguard Worker                 }
2186*61046927SAndroid Build Coastguard Worker 
2187*61046927SAndroid Build Coastguard Worker                 if (is_thrend &&
2188*61046927SAndroid Build Coastguard Worker                     !qpu_inst_valid_in_thrend_slot(c, qinst, slot)) {
2189*61046927SAndroid Build Coastguard Worker                         return false;
2190*61046927SAndroid Build Coastguard Worker                 }
2191*61046927SAndroid Build Coastguard Worker 
2192*61046927SAndroid Build Coastguard Worker                 /* Note that the list is circular, so we can only do this up
2193*61046927SAndroid Build Coastguard Worker                  * to instructions_in_sequence.
2194*61046927SAndroid Build Coastguard Worker                  */
2195*61046927SAndroid Build Coastguard Worker                 qinst = (struct qinst *)qinst->link.next;
2196*61046927SAndroid Build Coastguard Worker         }
2197*61046927SAndroid Build Coastguard Worker 
2198*61046927SAndroid Build Coastguard Worker         return true;
2199*61046927SAndroid Build Coastguard Worker }
2200*61046927SAndroid Build Coastguard Worker 
2201*61046927SAndroid Build Coastguard Worker /**
2202*61046927SAndroid Build Coastguard Worker  * Emits a THRSW signal in the stream, trying to move it up to pair with
2203*61046927SAndroid Build Coastguard Worker  * another instruction.
2204*61046927SAndroid Build Coastguard Worker  */
2205*61046927SAndroid Build Coastguard Worker static int
emit_thrsw(struct v3d_compile * c,struct qblock * block,struct choose_scoreboard * scoreboard,struct qinst * inst,bool is_thrend)2206*61046927SAndroid Build Coastguard Worker emit_thrsw(struct v3d_compile *c,
2207*61046927SAndroid Build Coastguard Worker            struct qblock *block,
2208*61046927SAndroid Build Coastguard Worker            struct choose_scoreboard *scoreboard,
2209*61046927SAndroid Build Coastguard Worker            struct qinst *inst,
2210*61046927SAndroid Build Coastguard Worker            bool is_thrend)
2211*61046927SAndroid Build Coastguard Worker {
2212*61046927SAndroid Build Coastguard Worker         int time = 0;
2213*61046927SAndroid Build Coastguard Worker 
2214*61046927SAndroid Build Coastguard Worker         /* There should be nothing in a thrsw inst being scheduled other than
2215*61046927SAndroid Build Coastguard Worker          * the signal bits.
2216*61046927SAndroid Build Coastguard Worker          */
2217*61046927SAndroid Build Coastguard Worker         assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
2218*61046927SAndroid Build Coastguard Worker         assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP);
2219*61046927SAndroid Build Coastguard Worker         assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP);
2220*61046927SAndroid Build Coastguard Worker 
2221*61046927SAndroid Build Coastguard Worker         /* Don't try to emit a thrsw in the delay slots of a previous thrsw
2222*61046927SAndroid Build Coastguard Worker          * or branch.
2223*61046927SAndroid Build Coastguard Worker          */
2224*61046927SAndroid Build Coastguard Worker         while (scoreboard->last_thrsw_tick + 2 >= scoreboard->tick) {
2225*61046927SAndroid Build Coastguard Worker                 emit_nop(c, block, scoreboard);
2226*61046927SAndroid Build Coastguard Worker                 time++;
2227*61046927SAndroid Build Coastguard Worker         }
2228*61046927SAndroid Build Coastguard Worker         while (scoreboard->last_branch_tick + 3 >= scoreboard->tick) {
2229*61046927SAndroid Build Coastguard Worker                 emit_nop(c, block, scoreboard);
2230*61046927SAndroid Build Coastguard Worker                 time++;
2231*61046927SAndroid Build Coastguard Worker         }
2232*61046927SAndroid Build Coastguard Worker 
2233*61046927SAndroid Build Coastguard Worker         /* Find how far back into previous instructions we can put the THRSW. */
2234*61046927SAndroid Build Coastguard Worker         int slots_filled = 0;
2235*61046927SAndroid Build Coastguard Worker         int invalid_sig_count = 0;
2236*61046927SAndroid Build Coastguard Worker         int invalid_seq_count = 0;
2237*61046927SAndroid Build Coastguard Worker         bool last_thrsw_after_invalid_ok = false;
2238*61046927SAndroid Build Coastguard Worker         struct qinst *merge_inst = NULL;
2239*61046927SAndroid Build Coastguard Worker         vir_for_each_inst_rev(prev_inst, block) {
2240*61046927SAndroid Build Coastguard Worker                 /* No emitting our thrsw while the previous thrsw hasn't
2241*61046927SAndroid Build Coastguard Worker                  * happened yet.
2242*61046927SAndroid Build Coastguard Worker                  */
2243*61046927SAndroid Build Coastguard Worker                 if (scoreboard->last_thrsw_tick + 3 >
2244*61046927SAndroid Build Coastguard Worker                     scoreboard->tick - (slots_filled + 1)) {
2245*61046927SAndroid Build Coastguard Worker                         break;
2246*61046927SAndroid Build Coastguard Worker                 }
2247*61046927SAndroid Build Coastguard Worker 
2248*61046927SAndroid Build Coastguard Worker 
2249*61046927SAndroid Build Coastguard Worker                 if (!valid_thrsw_sequence(c, scoreboard,
2250*61046927SAndroid Build Coastguard Worker                                           prev_inst, slots_filled + 1,
2251*61046927SAndroid Build Coastguard Worker                                           is_thrend)) {
2252*61046927SAndroid Build Coastguard Worker                         /* Even if the current sequence isn't valid, we may
2253*61046927SAndroid Build Coastguard Worker                          * be able to get a valid sequence by trying to move the
2254*61046927SAndroid Build Coastguard Worker                          * thrsw earlier, so keep going.
2255*61046927SAndroid Build Coastguard Worker                          */
2256*61046927SAndroid Build Coastguard Worker                         invalid_seq_count++;
2257*61046927SAndroid Build Coastguard Worker                         goto cont_block;
2258*61046927SAndroid Build Coastguard Worker                 }
2259*61046927SAndroid Build Coastguard Worker 
2260*61046927SAndroid Build Coastguard Worker                 struct v3d_qpu_sig sig = prev_inst->qpu.sig;
2261*61046927SAndroid Build Coastguard Worker                 sig.thrsw = true;
2262*61046927SAndroid Build Coastguard Worker                 uint32_t packed_sig;
2263*61046927SAndroid Build Coastguard Worker                 if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig)) {
2264*61046927SAndroid Build Coastguard Worker                         /* If we can't merge the thrsw here because of signal
2265*61046927SAndroid Build Coastguard Worker                          * incompatibility, keep going, we might be able to
2266*61046927SAndroid Build Coastguard Worker                          * merge it in an earlier instruction.
2267*61046927SAndroid Build Coastguard Worker                          */
2268*61046927SAndroid Build Coastguard Worker                         invalid_sig_count++;
2269*61046927SAndroid Build Coastguard Worker                         goto cont_block;
2270*61046927SAndroid Build Coastguard Worker                 }
2271*61046927SAndroid Build Coastguard Worker 
2272*61046927SAndroid Build Coastguard Worker                 /* For last thrsw we need 2 consecutive slots that are
2273*61046927SAndroid Build Coastguard Worker                  * thrsw compatible, so if we have previously jumped over
2274*61046927SAndroid Build Coastguard Worker                  * an incompatible signal, flag that we have found the first
2275*61046927SAndroid Build Coastguard Worker                  * valid slot here and keep going.
2276*61046927SAndroid Build Coastguard Worker                  */
2277*61046927SAndroid Build Coastguard Worker                 if (inst->is_last_thrsw && invalid_sig_count > 0 &&
2278*61046927SAndroid Build Coastguard Worker                     !last_thrsw_after_invalid_ok) {
2279*61046927SAndroid Build Coastguard Worker                         last_thrsw_after_invalid_ok = true;
2280*61046927SAndroid Build Coastguard Worker                         invalid_sig_count++;
2281*61046927SAndroid Build Coastguard Worker                         goto cont_block;
2282*61046927SAndroid Build Coastguard Worker                 }
2283*61046927SAndroid Build Coastguard Worker 
2284*61046927SAndroid Build Coastguard Worker                 /* We can merge the thrsw in this instruction */
2285*61046927SAndroid Build Coastguard Worker                 last_thrsw_after_invalid_ok = false;
2286*61046927SAndroid Build Coastguard Worker                 invalid_sig_count = 0;
2287*61046927SAndroid Build Coastguard Worker                 invalid_seq_count = 0;
2288*61046927SAndroid Build Coastguard Worker                 merge_inst = prev_inst;
2289*61046927SAndroid Build Coastguard Worker 
2290*61046927SAndroid Build Coastguard Worker cont_block:
2291*61046927SAndroid Build Coastguard Worker                 if (++slots_filled == 3)
2292*61046927SAndroid Build Coastguard Worker                         break;
2293*61046927SAndroid Build Coastguard Worker         }
2294*61046927SAndroid Build Coastguard Worker 
2295*61046927SAndroid Build Coastguard Worker         /* If we jumped over a signal incompatibility and did not manage to
2296*61046927SAndroid Build Coastguard Worker          * merge the thrsw in the end, we need to adjust slots filled to match
2297*61046927SAndroid Build Coastguard Worker          * the last valid merge point.
2298*61046927SAndroid Build Coastguard Worker          */
2299*61046927SAndroid Build Coastguard Worker         assert((invalid_sig_count == 0 && invalid_seq_count == 0) ||
2300*61046927SAndroid Build Coastguard Worker                 slots_filled >= invalid_sig_count + invalid_seq_count);
2301*61046927SAndroid Build Coastguard Worker         if (invalid_sig_count > 0)
2302*61046927SAndroid Build Coastguard Worker                 slots_filled -= invalid_sig_count;
2303*61046927SAndroid Build Coastguard Worker         if (invalid_seq_count > 0)
2304*61046927SAndroid Build Coastguard Worker                 slots_filled -= invalid_seq_count;
2305*61046927SAndroid Build Coastguard Worker 
2306*61046927SAndroid Build Coastguard Worker         bool needs_free = false;
2307*61046927SAndroid Build Coastguard Worker         if (merge_inst) {
2308*61046927SAndroid Build Coastguard Worker                 merge_inst->qpu.sig.thrsw = true;
2309*61046927SAndroid Build Coastguard Worker                 needs_free = true;
2310*61046927SAndroid Build Coastguard Worker                 scoreboard->last_thrsw_tick = scoreboard->tick - slots_filled;
2311*61046927SAndroid Build Coastguard Worker         } else {
2312*61046927SAndroid Build Coastguard Worker                 scoreboard->last_thrsw_tick = scoreboard->tick;
2313*61046927SAndroid Build Coastguard Worker                 insert_scheduled_instruction(c, block, scoreboard, inst);
2314*61046927SAndroid Build Coastguard Worker                 time++;
2315*61046927SAndroid Build Coastguard Worker                 slots_filled++;
2316*61046927SAndroid Build Coastguard Worker                 merge_inst = inst;
2317*61046927SAndroid Build Coastguard Worker         }
2318*61046927SAndroid Build Coastguard Worker 
2319*61046927SAndroid Build Coastguard Worker         scoreboard->first_thrsw_emitted = true;
2320*61046927SAndroid Build Coastguard Worker 
2321*61046927SAndroid Build Coastguard Worker         /* If we're emitting the last THRSW (other than program end), then
2322*61046927SAndroid Build Coastguard Worker          * signal that to the HW by emitting two THRSWs in a row.
2323*61046927SAndroid Build Coastguard Worker          */
2324*61046927SAndroid Build Coastguard Worker         if (inst->is_last_thrsw) {
2325*61046927SAndroid Build Coastguard Worker                 if (slots_filled <= 1) {
2326*61046927SAndroid Build Coastguard Worker                         emit_nop(c, block, scoreboard);
2327*61046927SAndroid Build Coastguard Worker                         time++;
2328*61046927SAndroid Build Coastguard Worker                 }
2329*61046927SAndroid Build Coastguard Worker                 struct qinst *second_inst =
2330*61046927SAndroid Build Coastguard Worker                         (struct qinst *)merge_inst->link.next;
2331*61046927SAndroid Build Coastguard Worker                 second_inst->qpu.sig.thrsw = true;
2332*61046927SAndroid Build Coastguard Worker                 scoreboard->last_thrsw_emitted = true;
2333*61046927SAndroid Build Coastguard Worker         }
2334*61046927SAndroid Build Coastguard Worker 
2335*61046927SAndroid Build Coastguard Worker         /* Make sure the thread end executes within the program lifespan */
2336*61046927SAndroid Build Coastguard Worker         if (is_thrend) {
2337*61046927SAndroid Build Coastguard Worker                 for (int i = 0; i < 3 - slots_filled; i++) {
2338*61046927SAndroid Build Coastguard Worker                         emit_nop(c, block, scoreboard);
2339*61046927SAndroid Build Coastguard Worker                         time++;
2340*61046927SAndroid Build Coastguard Worker                 }
2341*61046927SAndroid Build Coastguard Worker         }
2342*61046927SAndroid Build Coastguard Worker 
2343*61046927SAndroid Build Coastguard Worker         /* If we put our THRSW into another instruction, free up the
2344*61046927SAndroid Build Coastguard Worker          * instruction that didn't end up scheduled into the list.
2345*61046927SAndroid Build Coastguard Worker          */
2346*61046927SAndroid Build Coastguard Worker         if (needs_free)
2347*61046927SAndroid Build Coastguard Worker                 free(inst);
2348*61046927SAndroid Build Coastguard Worker 
2349*61046927SAndroid Build Coastguard Worker         return time;
2350*61046927SAndroid Build Coastguard Worker }
2351*61046927SAndroid Build Coastguard Worker 
2352*61046927SAndroid Build Coastguard Worker static bool
qpu_inst_valid_in_branch_delay_slot(struct v3d_compile * c,struct qinst * inst)2353*61046927SAndroid Build Coastguard Worker qpu_inst_valid_in_branch_delay_slot(struct v3d_compile *c, struct qinst *inst)
2354*61046927SAndroid Build Coastguard Worker {
2355*61046927SAndroid Build Coastguard Worker         if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
2356*61046927SAndroid Build Coastguard Worker                 return false;
2357*61046927SAndroid Build Coastguard Worker 
2358*61046927SAndroid Build Coastguard Worker         if (inst->qpu.sig.thrsw)
2359*61046927SAndroid Build Coastguard Worker                 return false;
2360*61046927SAndroid Build Coastguard Worker 
2361*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_unifa(c->devinfo, &inst->qpu))
2362*61046927SAndroid Build Coastguard Worker                 return false;
2363*61046927SAndroid Build Coastguard Worker 
2364*61046927SAndroid Build Coastguard Worker         if (vir_has_uniform(inst))
2365*61046927SAndroid Build Coastguard Worker                 return false;
2366*61046927SAndroid Build Coastguard Worker 
2367*61046927SAndroid Build Coastguard Worker         return true;
2368*61046927SAndroid Build Coastguard Worker }
2369*61046927SAndroid Build Coastguard Worker 
2370*61046927SAndroid Build Coastguard Worker static void
emit_branch(struct v3d_compile * c,struct qblock * block,struct choose_scoreboard * scoreboard,struct qinst * inst)2371*61046927SAndroid Build Coastguard Worker emit_branch(struct v3d_compile *c,
2372*61046927SAndroid Build Coastguard Worker            struct qblock *block,
2373*61046927SAndroid Build Coastguard Worker            struct choose_scoreboard *scoreboard,
2374*61046927SAndroid Build Coastguard Worker            struct qinst *inst)
2375*61046927SAndroid Build Coastguard Worker {
2376*61046927SAndroid Build Coastguard Worker         assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
2377*61046927SAndroid Build Coastguard Worker 
2378*61046927SAndroid Build Coastguard Worker         /* We should've not picked up a branch for the delay slots of a previous
2379*61046927SAndroid Build Coastguard Worker          * thrsw, branch or unifa write instruction.
2380*61046927SAndroid Build Coastguard Worker          */
2381*61046927SAndroid Build Coastguard Worker         int branch_tick = scoreboard->tick;
2382*61046927SAndroid Build Coastguard Worker         assert(scoreboard->last_thrsw_tick + 2 < branch_tick);
2383*61046927SAndroid Build Coastguard Worker         assert(scoreboard->last_branch_tick + 3 < branch_tick);
2384*61046927SAndroid Build Coastguard Worker         assert(scoreboard->last_unifa_write_tick + 3 < branch_tick);
2385*61046927SAndroid Build Coastguard Worker 
2386*61046927SAndroid Build Coastguard Worker         /* V3D 4.x can't place a branch with msfign != 0 and cond != 0,2,3 after
2387*61046927SAndroid Build Coastguard Worker          * setmsf.
2388*61046927SAndroid Build Coastguard Worker          */
2389*61046927SAndroid Build Coastguard Worker         bool is_safe_msf_branch =
2390*61046927SAndroid Build Coastguard Worker                 c->devinfo->ver >= 71 ||
2391*61046927SAndroid Build Coastguard Worker                 inst->qpu.branch.msfign == V3D_QPU_MSFIGN_NONE ||
2392*61046927SAndroid Build Coastguard Worker                 inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_ALWAYS ||
2393*61046927SAndroid Build Coastguard Worker                 inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_A0 ||
2394*61046927SAndroid Build Coastguard Worker                 inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_NA0;
2395*61046927SAndroid Build Coastguard Worker         assert(scoreboard->last_setmsf_tick != branch_tick - 1 ||
2396*61046927SAndroid Build Coastguard Worker                is_safe_msf_branch);
2397*61046927SAndroid Build Coastguard Worker 
2398*61046927SAndroid Build Coastguard Worker         /* Insert the branch instruction */
2399*61046927SAndroid Build Coastguard Worker         insert_scheduled_instruction(c, block, scoreboard, inst);
2400*61046927SAndroid Build Coastguard Worker 
2401*61046927SAndroid Build Coastguard Worker         /* Now see if we can move the branch instruction back into the
2402*61046927SAndroid Build Coastguard Worker          * instruction stream to fill its delay slots
2403*61046927SAndroid Build Coastguard Worker          */
2404*61046927SAndroid Build Coastguard Worker         int slots_filled = 0;
2405*61046927SAndroid Build Coastguard Worker         while (slots_filled < 3 && block->instructions.next != &inst->link) {
2406*61046927SAndroid Build Coastguard Worker                 struct qinst *prev_inst = (struct qinst *) inst->link.prev;
2407*61046927SAndroid Build Coastguard Worker                 assert(prev_inst->qpu.type != V3D_QPU_INSTR_TYPE_BRANCH);
2408*61046927SAndroid Build Coastguard Worker 
2409*61046927SAndroid Build Coastguard Worker                 /* Can't move the branch instruction if that would place it
2410*61046927SAndroid Build Coastguard Worker                  * in the delay slots of other instructions.
2411*61046927SAndroid Build Coastguard Worker                  */
2412*61046927SAndroid Build Coastguard Worker                 if (scoreboard->last_branch_tick + 3 >=
2413*61046927SAndroid Build Coastguard Worker                     branch_tick - slots_filled - 1) {
2414*61046927SAndroid Build Coastguard Worker                         break;
2415*61046927SAndroid Build Coastguard Worker                 }
2416*61046927SAndroid Build Coastguard Worker 
2417*61046927SAndroid Build Coastguard Worker                 if (scoreboard->last_thrsw_tick + 2 >=
2418*61046927SAndroid Build Coastguard Worker                     branch_tick - slots_filled - 1) {
2419*61046927SAndroid Build Coastguard Worker                         break;
2420*61046927SAndroid Build Coastguard Worker                 }
2421*61046927SAndroid Build Coastguard Worker 
2422*61046927SAndroid Build Coastguard Worker                 if (scoreboard->last_unifa_write_tick + 3 >=
2423*61046927SAndroid Build Coastguard Worker                     branch_tick - slots_filled - 1) {
2424*61046927SAndroid Build Coastguard Worker                         break;
2425*61046927SAndroid Build Coastguard Worker                 }
2426*61046927SAndroid Build Coastguard Worker 
2427*61046927SAndroid Build Coastguard Worker                 /* Do not move up a branch if it can disrupt an ldvary sequence
2428*61046927SAndroid Build Coastguard Worker                  * as that can cause stomping of the r5 register.
2429*61046927SAndroid Build Coastguard Worker                  */
2430*61046927SAndroid Build Coastguard Worker                 if (scoreboard->last_ldvary_tick + 2 >=
2431*61046927SAndroid Build Coastguard Worker                     branch_tick - slots_filled) {
2432*61046927SAndroid Build Coastguard Worker                        break;
2433*61046927SAndroid Build Coastguard Worker                 }
2434*61046927SAndroid Build Coastguard Worker 
2435*61046927SAndroid Build Coastguard Worker                 /* Can't move a conditional branch before the instruction
2436*61046927SAndroid Build Coastguard Worker                  * that writes the flags for its condition.
2437*61046927SAndroid Build Coastguard Worker                  */
2438*61046927SAndroid Build Coastguard Worker                 if (v3d_qpu_writes_flags(&prev_inst->qpu) &&
2439*61046927SAndroid Build Coastguard Worker                     inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
2440*61046927SAndroid Build Coastguard Worker                         break;
2441*61046927SAndroid Build Coastguard Worker                 }
2442*61046927SAndroid Build Coastguard Worker 
2443*61046927SAndroid Build Coastguard Worker                 if (!qpu_inst_valid_in_branch_delay_slot(c, prev_inst))
2444*61046927SAndroid Build Coastguard Worker                         break;
2445*61046927SAndroid Build Coastguard Worker 
2446*61046927SAndroid Build Coastguard Worker                 if (!is_safe_msf_branch) {
2447*61046927SAndroid Build Coastguard Worker                         struct qinst *prev_prev_inst =
2448*61046927SAndroid Build Coastguard Worker                                 (struct qinst *) prev_inst->link.prev;
2449*61046927SAndroid Build Coastguard Worker                         if (prev_prev_inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
2450*61046927SAndroid Build Coastguard Worker                             prev_prev_inst->qpu.alu.add.op == V3D_QPU_A_SETMSF) {
2451*61046927SAndroid Build Coastguard Worker                                 break;
2452*61046927SAndroid Build Coastguard Worker                         }
2453*61046927SAndroid Build Coastguard Worker                 }
2454*61046927SAndroid Build Coastguard Worker 
2455*61046927SAndroid Build Coastguard Worker                 list_del(&prev_inst->link);
2456*61046927SAndroid Build Coastguard Worker                 list_add(&prev_inst->link, &inst->link);
2457*61046927SAndroid Build Coastguard Worker                 slots_filled++;
2458*61046927SAndroid Build Coastguard Worker         }
2459*61046927SAndroid Build Coastguard Worker 
2460*61046927SAndroid Build Coastguard Worker         block->branch_qpu_ip = c->qpu_inst_count - 1 - slots_filled;
2461*61046927SAndroid Build Coastguard Worker         scoreboard->last_branch_tick = branch_tick - slots_filled;
2462*61046927SAndroid Build Coastguard Worker 
2463*61046927SAndroid Build Coastguard Worker         /* Fill any remaining delay slots.
2464*61046927SAndroid Build Coastguard Worker          *
2465*61046927SAndroid Build Coastguard Worker          * For unconditional branches we'll try to fill these with the
2466*61046927SAndroid Build Coastguard Worker          * first instructions in the successor block after scheduling
2467*61046927SAndroid Build Coastguard Worker          * all blocks when setting up branch targets.
2468*61046927SAndroid Build Coastguard Worker          */
2469*61046927SAndroid Build Coastguard Worker         for (int i = 0; i < 3 - slots_filled; i++)
2470*61046927SAndroid Build Coastguard Worker                 emit_nop(c, block, scoreboard);
2471*61046927SAndroid Build Coastguard Worker }
2472*61046927SAndroid Build Coastguard Worker 
2473*61046927SAndroid Build Coastguard Worker static bool
alu_reads_register(const struct v3d_device_info * devinfo,struct v3d_qpu_instr * inst,bool add,bool magic,uint32_t index)2474*61046927SAndroid Build Coastguard Worker alu_reads_register(const struct v3d_device_info *devinfo,
2475*61046927SAndroid Build Coastguard Worker                    struct v3d_qpu_instr *inst,
2476*61046927SAndroid Build Coastguard Worker                    bool add, bool magic, uint32_t index)
2477*61046927SAndroid Build Coastguard Worker {
2478*61046927SAndroid Build Coastguard Worker         uint32_t num_src;
2479*61046927SAndroid Build Coastguard Worker         if (add)
2480*61046927SAndroid Build Coastguard Worker                 num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
2481*61046927SAndroid Build Coastguard Worker         else
2482*61046927SAndroid Build Coastguard Worker                 num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
2483*61046927SAndroid Build Coastguard Worker 
2484*61046927SAndroid Build Coastguard Worker         if (devinfo->ver == 42) {
2485*61046927SAndroid Build Coastguard Worker                 enum v3d_qpu_mux mux_a, mux_b;
2486*61046927SAndroid Build Coastguard Worker                 if (add) {
2487*61046927SAndroid Build Coastguard Worker                         mux_a = inst->alu.add.a.mux;
2488*61046927SAndroid Build Coastguard Worker                         mux_b = inst->alu.add.b.mux;
2489*61046927SAndroid Build Coastguard Worker                 } else {
2490*61046927SAndroid Build Coastguard Worker                         mux_a = inst->alu.mul.a.mux;
2491*61046927SAndroid Build Coastguard Worker                         mux_b = inst->alu.mul.b.mux;
2492*61046927SAndroid Build Coastguard Worker                 }
2493*61046927SAndroid Build Coastguard Worker 
2494*61046927SAndroid Build Coastguard Worker                 for (int i = 0; i < num_src; i++) {
2495*61046927SAndroid Build Coastguard Worker                         if (magic) {
2496*61046927SAndroid Build Coastguard Worker                                 if (i == 0 && mux_a == index)
2497*61046927SAndroid Build Coastguard Worker                                         return true;
2498*61046927SAndroid Build Coastguard Worker                                 if (i == 1 && mux_b == index)
2499*61046927SAndroid Build Coastguard Worker                                         return true;
2500*61046927SAndroid Build Coastguard Worker                         } else {
2501*61046927SAndroid Build Coastguard Worker                                 if (i == 0 && mux_a == V3D_QPU_MUX_A &&
2502*61046927SAndroid Build Coastguard Worker                                     inst->raddr_a == index) {
2503*61046927SAndroid Build Coastguard Worker                                         return true;
2504*61046927SAndroid Build Coastguard Worker                                 }
2505*61046927SAndroid Build Coastguard Worker                                 if (i == 0 && mux_a == V3D_QPU_MUX_B &&
2506*61046927SAndroid Build Coastguard Worker                                     inst->raddr_b == index) {
2507*61046927SAndroid Build Coastguard Worker                                         return true;
2508*61046927SAndroid Build Coastguard Worker                                 }
2509*61046927SAndroid Build Coastguard Worker                                 if (i == 1 && mux_b == V3D_QPU_MUX_A &&
2510*61046927SAndroid Build Coastguard Worker                                     inst->raddr_a == index) {
2511*61046927SAndroid Build Coastguard Worker                                         return true;
2512*61046927SAndroid Build Coastguard Worker                                 }
2513*61046927SAndroid Build Coastguard Worker                                 if (i == 1 && mux_b == V3D_QPU_MUX_B &&
2514*61046927SAndroid Build Coastguard Worker                                     inst->raddr_b == index) {
2515*61046927SAndroid Build Coastguard Worker                                         return true;
2516*61046927SAndroid Build Coastguard Worker                                 }
2517*61046927SAndroid Build Coastguard Worker                         }
2518*61046927SAndroid Build Coastguard Worker                 }
2519*61046927SAndroid Build Coastguard Worker 
2520*61046927SAndroid Build Coastguard Worker                 return false;
2521*61046927SAndroid Build Coastguard Worker         }
2522*61046927SAndroid Build Coastguard Worker 
2523*61046927SAndroid Build Coastguard Worker         assert(devinfo->ver >= 71);
2524*61046927SAndroid Build Coastguard Worker         assert(!magic);
2525*61046927SAndroid Build Coastguard Worker 
2526*61046927SAndroid Build Coastguard Worker         uint32_t raddr_a, raddr_b;
2527*61046927SAndroid Build Coastguard Worker         if (add) {
2528*61046927SAndroid Build Coastguard Worker                 raddr_a = inst->alu.add.a.raddr;
2529*61046927SAndroid Build Coastguard Worker                 raddr_b = inst->alu.add.b.raddr;
2530*61046927SAndroid Build Coastguard Worker         } else {
2531*61046927SAndroid Build Coastguard Worker                 raddr_a = inst->alu.mul.a.raddr;
2532*61046927SAndroid Build Coastguard Worker                 raddr_b = inst->alu.mul.b.raddr;
2533*61046927SAndroid Build Coastguard Worker         }
2534*61046927SAndroid Build Coastguard Worker 
2535*61046927SAndroid Build Coastguard Worker         for (int i = 0; i < num_src; i++) {
2536*61046927SAndroid Build Coastguard Worker                 if (i == 0 && raddr_a == index)
2537*61046927SAndroid Build Coastguard Worker                         return true;
2538*61046927SAndroid Build Coastguard Worker                 if (i == 1 && raddr_b == index)
2539*61046927SAndroid Build Coastguard Worker                         return true;
2540*61046927SAndroid Build Coastguard Worker         }
2541*61046927SAndroid Build Coastguard Worker 
2542*61046927SAndroid Build Coastguard Worker         return false;
2543*61046927SAndroid Build Coastguard Worker }
2544*61046927SAndroid Build Coastguard Worker 
2545*61046927SAndroid Build Coastguard Worker /**
2546*61046927SAndroid Build Coastguard Worker  * This takes and ldvary signal merged into 'inst' and tries to move it up to
2547*61046927SAndroid Build Coastguard Worker  * the previous instruction to get good pipelining of ldvary sequences,
2548*61046927SAndroid Build Coastguard Worker  * transforming this:
2549*61046927SAndroid Build Coastguard Worker  *
2550*61046927SAndroid Build Coastguard Worker  * nop                  ; nop               ; ldvary.r4
2551*61046927SAndroid Build Coastguard Worker  * nop                  ; fmul  r0, r4, rf0 ;
2552*61046927SAndroid Build Coastguard Worker  * fadd  rf13, r0, r5   ; nop;              ; ldvary.r1  <-- inst
2553*61046927SAndroid Build Coastguard Worker  *
2554*61046927SAndroid Build Coastguard Worker  * into:
2555*61046927SAndroid Build Coastguard Worker  *
2556*61046927SAndroid Build Coastguard Worker  * nop                  ; nop               ; ldvary.r4
2557*61046927SAndroid Build Coastguard Worker  * nop                  ; fmul  r0, r4, rf0 ; ldvary.r1
2558*61046927SAndroid Build Coastguard Worker  * fadd  rf13, r0, r5   ; nop;              ;            <-- inst
2559*61046927SAndroid Build Coastguard Worker  *
2560*61046927SAndroid Build Coastguard Worker  * If we manage to do this successfully (we return true here), then flagging
2561*61046927SAndroid Build Coastguard Worker  * the ldvary as "scheduled" may promote the follow-up fmul to a DAG head that
2562*61046927SAndroid Build Coastguard Worker  * we will be able to pick up to merge into 'inst', leading to code like this:
2563*61046927SAndroid Build Coastguard Worker  *
2564*61046927SAndroid Build Coastguard Worker  * nop                  ; nop               ; ldvary.r4
2565*61046927SAndroid Build Coastguard Worker  * nop                  ; fmul  r0, r4, rf0 ; ldvary.r1
2566*61046927SAndroid Build Coastguard Worker  * fadd  rf13, r0, r5   ; fmul  r2, r1, rf0 ;            <-- inst
2567*61046927SAndroid Build Coastguard Worker  */
2568*61046927SAndroid Build Coastguard Worker static bool
fixup_pipelined_ldvary(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct qblock * block,struct v3d_qpu_instr * inst)2569*61046927SAndroid Build Coastguard Worker fixup_pipelined_ldvary(struct v3d_compile *c,
2570*61046927SAndroid Build Coastguard Worker                        struct choose_scoreboard *scoreboard,
2571*61046927SAndroid Build Coastguard Worker                        struct qblock *block,
2572*61046927SAndroid Build Coastguard Worker                        struct v3d_qpu_instr *inst)
2573*61046927SAndroid Build Coastguard Worker {
2574*61046927SAndroid Build Coastguard Worker         const struct v3d_device_info *devinfo = c->devinfo;
2575*61046927SAndroid Build Coastguard Worker 
2576*61046927SAndroid Build Coastguard Worker         /* We only call this if we have successfully merged an ldvary into a
2577*61046927SAndroid Build Coastguard Worker          * previous instruction.
2578*61046927SAndroid Build Coastguard Worker          */
2579*61046927SAndroid Build Coastguard Worker         assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
2580*61046927SAndroid Build Coastguard Worker         assert(inst->sig.ldvary);
2581*61046927SAndroid Build Coastguard Worker         uint32_t ldvary_magic = inst->sig_magic;
2582*61046927SAndroid Build Coastguard Worker         uint32_t ldvary_index = inst->sig_addr;
2583*61046927SAndroid Build Coastguard Worker 
2584*61046927SAndroid Build Coastguard Worker         /* The instruction in which we merged the ldvary cannot read
2585*61046927SAndroid Build Coastguard Worker          * the ldvary destination, if it does, then moving the ldvary before
2586*61046927SAndroid Build Coastguard Worker          * it would overwrite it.
2587*61046927SAndroid Build Coastguard Worker          */
2588*61046927SAndroid Build Coastguard Worker         if (alu_reads_register(devinfo, inst, true, ldvary_magic, ldvary_index))
2589*61046927SAndroid Build Coastguard Worker                 return false;
2590*61046927SAndroid Build Coastguard Worker         if (alu_reads_register(devinfo, inst, false, ldvary_magic, ldvary_index))
2591*61046927SAndroid Build Coastguard Worker                 return false;
2592*61046927SAndroid Build Coastguard Worker 
2593*61046927SAndroid Build Coastguard Worker         /* The implicit ldvary destination may not be written to by a signal
2594*61046927SAndroid Build Coastguard Worker          * in the instruction following ldvary. Since we are planning to move
2595*61046927SAndroid Build Coastguard Worker          * ldvary to the previous instruction, this means we need to check if
2596*61046927SAndroid Build Coastguard Worker          * the current instruction has any other signal that could create this
2597*61046927SAndroid Build Coastguard Worker          * conflict. The only other signal that can write to the implicit
2598*61046927SAndroid Build Coastguard Worker          * ldvary destination that is compatible with ldvary in the same
2599*61046927SAndroid Build Coastguard Worker          * instruction is ldunif.
2600*61046927SAndroid Build Coastguard Worker          */
2601*61046927SAndroid Build Coastguard Worker         if (inst->sig.ldunif)
2602*61046927SAndroid Build Coastguard Worker                 return false;
2603*61046927SAndroid Build Coastguard Worker 
2604*61046927SAndroid Build Coastguard Worker         /* The previous instruction can't write to the same destination as the
2605*61046927SAndroid Build Coastguard Worker          * ldvary.
2606*61046927SAndroid Build Coastguard Worker          */
2607*61046927SAndroid Build Coastguard Worker         struct qinst *prev = (struct qinst *) block->instructions.prev;
2608*61046927SAndroid Build Coastguard Worker         if (!prev || prev->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
2609*61046927SAndroid Build Coastguard Worker                 return false;
2610*61046927SAndroid Build Coastguard Worker 
2611*61046927SAndroid Build Coastguard Worker         if (prev->qpu.alu.add.op != V3D_QPU_A_NOP) {
2612*61046927SAndroid Build Coastguard Worker                 if (prev->qpu.alu.add.magic_write == ldvary_magic &&
2613*61046927SAndroid Build Coastguard Worker                     prev->qpu.alu.add.waddr == ldvary_index) {
2614*61046927SAndroid Build Coastguard Worker                         return false;
2615*61046927SAndroid Build Coastguard Worker                 }
2616*61046927SAndroid Build Coastguard Worker         }
2617*61046927SAndroid Build Coastguard Worker 
2618*61046927SAndroid Build Coastguard Worker         if (prev->qpu.alu.mul.op != V3D_QPU_M_NOP) {
2619*61046927SAndroid Build Coastguard Worker                 if (prev->qpu.alu.mul.magic_write == ldvary_magic &&
2620*61046927SAndroid Build Coastguard Worker                     prev->qpu.alu.mul.waddr == ldvary_index) {
2621*61046927SAndroid Build Coastguard Worker                         return false;
2622*61046927SAndroid Build Coastguard Worker                 }
2623*61046927SAndroid Build Coastguard Worker         }
2624*61046927SAndroid Build Coastguard Worker 
2625*61046927SAndroid Build Coastguard Worker         /* The previous instruction cannot have a conflicting signal */
2626*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_sig_writes_address(devinfo, &prev->qpu.sig))
2627*61046927SAndroid Build Coastguard Worker                 return false;
2628*61046927SAndroid Build Coastguard Worker 
2629*61046927SAndroid Build Coastguard Worker         uint32_t sig;
2630*61046927SAndroid Build Coastguard Worker         struct v3d_qpu_sig new_sig = prev->qpu.sig;
2631*61046927SAndroid Build Coastguard Worker         new_sig.ldvary = true;
2632*61046927SAndroid Build Coastguard Worker         if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
2633*61046927SAndroid Build Coastguard Worker                 return false;
2634*61046927SAndroid Build Coastguard Worker 
2635*61046927SAndroid Build Coastguard Worker         /* The previous instruction cannot use flags since ldvary uses the
2636*61046927SAndroid Build Coastguard Worker          * 'cond' instruction field to store the destination.
2637*61046927SAndroid Build Coastguard Worker          */
2638*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_writes_flags(&prev->qpu))
2639*61046927SAndroid Build Coastguard Worker                 return false;
2640*61046927SAndroid Build Coastguard Worker         if (v3d_qpu_reads_flags(&prev->qpu))
2641*61046927SAndroid Build Coastguard Worker                 return false;
2642*61046927SAndroid Build Coastguard Worker 
2643*61046927SAndroid Build Coastguard Worker         /* We can't put an ldvary in the delay slots of a thrsw. We should've
2644*61046927SAndroid Build Coastguard Worker          * prevented this when pairing up the ldvary with another instruction
2645*61046927SAndroid Build Coastguard Worker          * and flagging it for a fixup. In V3D 7.x this is limited only to the
2646*61046927SAndroid Build Coastguard Worker          * second delay slot.
2647*61046927SAndroid Build Coastguard Worker          */
2648*61046927SAndroid Build Coastguard Worker         assert((devinfo->ver == 42 &&
2649*61046927SAndroid Build Coastguard Worker                 scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) ||
2650*61046927SAndroid Build Coastguard Worker                (devinfo->ver >= 71 &&
2651*61046927SAndroid Build Coastguard Worker                 scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1));
2652*61046927SAndroid Build Coastguard Worker 
2653*61046927SAndroid Build Coastguard Worker         /* Move the ldvary to the previous instruction and remove it from the
2654*61046927SAndroid Build Coastguard Worker          * current one.
2655*61046927SAndroid Build Coastguard Worker          */
2656*61046927SAndroid Build Coastguard Worker         prev->qpu.sig.ldvary = true;
2657*61046927SAndroid Build Coastguard Worker         prev->qpu.sig_magic = ldvary_magic;
2658*61046927SAndroid Build Coastguard Worker         prev->qpu.sig_addr = ldvary_index;
2659*61046927SAndroid Build Coastguard Worker         scoreboard->last_ldvary_tick = scoreboard->tick - 1;
2660*61046927SAndroid Build Coastguard Worker 
2661*61046927SAndroid Build Coastguard Worker         inst->sig.ldvary = false;
2662*61046927SAndroid Build Coastguard Worker         inst->sig_magic = false;
2663*61046927SAndroid Build Coastguard Worker         inst->sig_addr = 0;
2664*61046927SAndroid Build Coastguard Worker 
2665*61046927SAndroid Build Coastguard Worker         /* Update rf0 flops tracking for new ldvary delayed rf0 write tick */
2666*61046927SAndroid Build Coastguard Worker         if (devinfo->ver >= 71) {
2667*61046927SAndroid Build Coastguard Worker                 scoreboard->last_implicit_rf0_write_tick = scoreboard->tick;
2668*61046927SAndroid Build Coastguard Worker                 set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
2669*61046927SAndroid Build Coastguard Worker         }
2670*61046927SAndroid Build Coastguard Worker 
2671*61046927SAndroid Build Coastguard Worker         /* By moving ldvary to the previous instruction we make it update r5
2672*61046927SAndroid Build Coastguard Worker          * (rf0 for ver >= 71) in the current one, so nothing else in it
2673*61046927SAndroid Build Coastguard Worker          * should write this register.
2674*61046927SAndroid Build Coastguard Worker          *
2675*61046927SAndroid Build Coastguard Worker          * This should've been prevented by our depedency tracking, which
2676*61046927SAndroid Build Coastguard Worker          * would not allow ldvary to be paired up with an instruction that
2677*61046927SAndroid Build Coastguard Worker          * writes r5/rf0 (since our dependency tracking doesn't know that the
2678*61046927SAndroid Build Coastguard Worker          * ldvary write to r5/rf0 happens in the next instruction).
2679*61046927SAndroid Build Coastguard Worker          */
2680*61046927SAndroid Build Coastguard Worker         assert(!v3d_qpu_writes_r5(devinfo, inst));
2681*61046927SAndroid Build Coastguard Worker         assert(devinfo->ver == 42 ||
2682*61046927SAndroid Build Coastguard Worker                (!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
2683*61046927SAndroid Build Coastguard Worker                 !v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
2684*61046927SAndroid Build Coastguard Worker 
2685*61046927SAndroid Build Coastguard Worker         return true;
2686*61046927SAndroid Build Coastguard Worker }
2687*61046927SAndroid Build Coastguard Worker 
2688*61046927SAndroid Build Coastguard Worker static uint32_t
schedule_instructions(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct qblock * block,enum quniform_contents * orig_uniform_contents,uint32_t * orig_uniform_data,uint32_t * next_uniform)2689*61046927SAndroid Build Coastguard Worker schedule_instructions(struct v3d_compile *c,
2690*61046927SAndroid Build Coastguard Worker                       struct choose_scoreboard *scoreboard,
2691*61046927SAndroid Build Coastguard Worker                       struct qblock *block,
2692*61046927SAndroid Build Coastguard Worker                       enum quniform_contents *orig_uniform_contents,
2693*61046927SAndroid Build Coastguard Worker                       uint32_t *orig_uniform_data,
2694*61046927SAndroid Build Coastguard Worker                       uint32_t *next_uniform)
2695*61046927SAndroid Build Coastguard Worker {
2696*61046927SAndroid Build Coastguard Worker         const struct v3d_device_info *devinfo = c->devinfo;
2697*61046927SAndroid Build Coastguard Worker         uint32_t time = 0;
2698*61046927SAndroid Build Coastguard Worker 
2699*61046927SAndroid Build Coastguard Worker         while (!list_is_empty(&scoreboard->dag->heads)) {
2700*61046927SAndroid Build Coastguard Worker                 struct schedule_node *chosen =
2701*61046927SAndroid Build Coastguard Worker                         choose_instruction_to_schedule(c, scoreboard, NULL);
2702*61046927SAndroid Build Coastguard Worker                 struct schedule_node *merge = NULL;
2703*61046927SAndroid Build Coastguard Worker 
2704*61046927SAndroid Build Coastguard Worker                 /* If there are no valid instructions to schedule, drop a NOP
2705*61046927SAndroid Build Coastguard Worker                  * in.
2706*61046927SAndroid Build Coastguard Worker                  */
2707*61046927SAndroid Build Coastguard Worker                 struct qinst *qinst = chosen ? chosen->inst : vir_nop();
2708*61046927SAndroid Build Coastguard Worker                 struct v3d_qpu_instr *inst = &qinst->qpu;
2709*61046927SAndroid Build Coastguard Worker 
2710*61046927SAndroid Build Coastguard Worker                 if (debug) {
2711*61046927SAndroid Build Coastguard Worker                         fprintf(stderr, "t=%4d: current list:\n",
2712*61046927SAndroid Build Coastguard Worker                                 time);
2713*61046927SAndroid Build Coastguard Worker                         dump_state(devinfo, scoreboard->dag);
2714*61046927SAndroid Build Coastguard Worker                         fprintf(stderr, "t=%4d: chose:   ", time);
2715*61046927SAndroid Build Coastguard Worker                         v3d_qpu_dump(devinfo, inst);
2716*61046927SAndroid Build Coastguard Worker                         fprintf(stderr, "\n");
2717*61046927SAndroid Build Coastguard Worker                 }
2718*61046927SAndroid Build Coastguard Worker 
2719*61046927SAndroid Build Coastguard Worker                 /* We can't mark_instruction_scheduled() the chosen inst until
2720*61046927SAndroid Build Coastguard Worker                  * we're done identifying instructions to merge, so put the
2721*61046927SAndroid Build Coastguard Worker                  * merged instructions on a list for a moment.
2722*61046927SAndroid Build Coastguard Worker                  */
2723*61046927SAndroid Build Coastguard Worker                 struct list_head merged_list;
2724*61046927SAndroid Build Coastguard Worker                 list_inithead(&merged_list);
2725*61046927SAndroid Build Coastguard Worker 
2726*61046927SAndroid Build Coastguard Worker                 /* Schedule this instruction onto the QPU list. Also try to
2727*61046927SAndroid Build Coastguard Worker                  * find an instruction to pair with it.
2728*61046927SAndroid Build Coastguard Worker                  */
2729*61046927SAndroid Build Coastguard Worker                 if (chosen) {
2730*61046927SAndroid Build Coastguard Worker                         time = MAX2(chosen->unblocked_time, time);
2731*61046927SAndroid Build Coastguard Worker                         pre_remove_head(scoreboard->dag, chosen);
2732*61046927SAndroid Build Coastguard Worker 
2733*61046927SAndroid Build Coastguard Worker                         while ((merge =
2734*61046927SAndroid Build Coastguard Worker                                 choose_instruction_to_schedule(c, scoreboard,
2735*61046927SAndroid Build Coastguard Worker                                                                chosen))) {
2736*61046927SAndroid Build Coastguard Worker                                 time = MAX2(merge->unblocked_time, time);
2737*61046927SAndroid Build Coastguard Worker                                 pre_remove_head(scoreboard->dag, merge);
2738*61046927SAndroid Build Coastguard Worker                                 list_addtail(&merge->link, &merged_list);
2739*61046927SAndroid Build Coastguard Worker                                 (void)qpu_merge_inst(devinfo, inst,
2740*61046927SAndroid Build Coastguard Worker                                                      inst, &merge->inst->qpu);
2741*61046927SAndroid Build Coastguard Worker                                 if (merge->inst->uniform != -1) {
2742*61046927SAndroid Build Coastguard Worker                                         chosen->inst->uniform =
2743*61046927SAndroid Build Coastguard Worker                                                 merge->inst->uniform;
2744*61046927SAndroid Build Coastguard Worker                                 }
2745*61046927SAndroid Build Coastguard Worker 
2746*61046927SAndroid Build Coastguard Worker                                 chosen->inst->ldtmu_count +=
2747*61046927SAndroid Build Coastguard Worker                                         merge->inst->ldtmu_count;
2748*61046927SAndroid Build Coastguard Worker 
2749*61046927SAndroid Build Coastguard Worker                                 if (debug) {
2750*61046927SAndroid Build Coastguard Worker                                         fprintf(stderr, "t=%4d: merging: ",
2751*61046927SAndroid Build Coastguard Worker                                                 time);
2752*61046927SAndroid Build Coastguard Worker                                         v3d_qpu_dump(devinfo, &merge->inst->qpu);
2753*61046927SAndroid Build Coastguard Worker                                         fprintf(stderr, "\n");
2754*61046927SAndroid Build Coastguard Worker                                         fprintf(stderr, "         result: ");
2755*61046927SAndroid Build Coastguard Worker                                         v3d_qpu_dump(devinfo, inst);
2756*61046927SAndroid Build Coastguard Worker                                         fprintf(stderr, "\n");
2757*61046927SAndroid Build Coastguard Worker                                 }
2758*61046927SAndroid Build Coastguard Worker 
2759*61046927SAndroid Build Coastguard Worker                                 if (scoreboard->fixup_ldvary) {
2760*61046927SAndroid Build Coastguard Worker                                         scoreboard->fixup_ldvary = false;
2761*61046927SAndroid Build Coastguard Worker                                         if (fixup_pipelined_ldvary(c, scoreboard, block, inst)) {
2762*61046927SAndroid Build Coastguard Worker                                                 /* Flag the ldvary as scheduled
2763*61046927SAndroid Build Coastguard Worker                                                  * now so we can try to merge the
2764*61046927SAndroid Build Coastguard Worker                                                  * follow-up instruction in the
2765*61046927SAndroid Build Coastguard Worker                                                  * the ldvary sequence into the
2766*61046927SAndroid Build Coastguard Worker                                                  * current instruction.
2767*61046927SAndroid Build Coastguard Worker                                                  */
2768*61046927SAndroid Build Coastguard Worker                                                 mark_instruction_scheduled(
2769*61046927SAndroid Build Coastguard Worker                                                         devinfo, scoreboard->dag,
2770*61046927SAndroid Build Coastguard Worker                                                         time, merge);
2771*61046927SAndroid Build Coastguard Worker                                         }
2772*61046927SAndroid Build Coastguard Worker                                 }
2773*61046927SAndroid Build Coastguard Worker                         }
2774*61046927SAndroid Build Coastguard Worker                         if (read_stalls(c->devinfo, scoreboard, inst))
2775*61046927SAndroid Build Coastguard Worker                                 c->qpu_inst_stalled_count++;
2776*61046927SAndroid Build Coastguard Worker                 }
2777*61046927SAndroid Build Coastguard Worker 
2778*61046927SAndroid Build Coastguard Worker                 /* Update the uniform index for the rewritten location --
2779*61046927SAndroid Build Coastguard Worker                  * branch target updating will still need to change
2780*61046927SAndroid Build Coastguard Worker                  * c->uniform_data[] using this index.
2781*61046927SAndroid Build Coastguard Worker                  */
2782*61046927SAndroid Build Coastguard Worker                 if (qinst->uniform != -1) {
2783*61046927SAndroid Build Coastguard Worker                         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
2784*61046927SAndroid Build Coastguard Worker                                 block->branch_uniform = *next_uniform;
2785*61046927SAndroid Build Coastguard Worker 
2786*61046927SAndroid Build Coastguard Worker                         c->uniform_data[*next_uniform] =
2787*61046927SAndroid Build Coastguard Worker                                 orig_uniform_data[qinst->uniform];
2788*61046927SAndroid Build Coastguard Worker                         c->uniform_contents[*next_uniform] =
2789*61046927SAndroid Build Coastguard Worker                                 orig_uniform_contents[qinst->uniform];
2790*61046927SAndroid Build Coastguard Worker                         qinst->uniform = *next_uniform;
2791*61046927SAndroid Build Coastguard Worker                         (*next_uniform)++;
2792*61046927SAndroid Build Coastguard Worker                 }
2793*61046927SAndroid Build Coastguard Worker 
2794*61046927SAndroid Build Coastguard Worker                 if (debug) {
2795*61046927SAndroid Build Coastguard Worker                         fprintf(stderr, "\n");
2796*61046927SAndroid Build Coastguard Worker                 }
2797*61046927SAndroid Build Coastguard Worker 
2798*61046927SAndroid Build Coastguard Worker                 /* Now that we've scheduled a new instruction, some of its
2799*61046927SAndroid Build Coastguard Worker                  * children can be promoted to the list of instructions ready to
2800*61046927SAndroid Build Coastguard Worker                  * be scheduled.  Update the children's unblocked time for this
2801*61046927SAndroid Build Coastguard Worker                  * DAG edge as we do so.
2802*61046927SAndroid Build Coastguard Worker                  */
2803*61046927SAndroid Build Coastguard Worker                 mark_instruction_scheduled(devinfo, scoreboard->dag, time, chosen);
2804*61046927SAndroid Build Coastguard Worker                 list_for_each_entry(struct schedule_node, merge, &merged_list,
2805*61046927SAndroid Build Coastguard Worker                                     link) {
2806*61046927SAndroid Build Coastguard Worker                         mark_instruction_scheduled(devinfo, scoreboard->dag, time, merge);
2807*61046927SAndroid Build Coastguard Worker 
2808*61046927SAndroid Build Coastguard Worker                         /* The merged VIR instruction doesn't get re-added to the
2809*61046927SAndroid Build Coastguard Worker                          * block, so free it now.
2810*61046927SAndroid Build Coastguard Worker                          */
2811*61046927SAndroid Build Coastguard Worker                         free(merge->inst);
2812*61046927SAndroid Build Coastguard Worker                 }
2813*61046927SAndroid Build Coastguard Worker 
2814*61046927SAndroid Build Coastguard Worker                 if (inst->sig.thrsw) {
2815*61046927SAndroid Build Coastguard Worker                         time += emit_thrsw(c, block, scoreboard, qinst, false);
2816*61046927SAndroid Build Coastguard Worker                 } else if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
2817*61046927SAndroid Build Coastguard Worker                         emit_branch(c, block, scoreboard, qinst);
2818*61046927SAndroid Build Coastguard Worker                 } else {
2819*61046927SAndroid Build Coastguard Worker                         insert_scheduled_instruction(c, block,
2820*61046927SAndroid Build Coastguard Worker                                                      scoreboard, qinst);
2821*61046927SAndroid Build Coastguard Worker                 }
2822*61046927SAndroid Build Coastguard Worker         }
2823*61046927SAndroid Build Coastguard Worker 
2824*61046927SAndroid Build Coastguard Worker         return time;
2825*61046927SAndroid Build Coastguard Worker }
2826*61046927SAndroid Build Coastguard Worker 
2827*61046927SAndroid Build Coastguard Worker static uint32_t
qpu_schedule_instructions_block(struct v3d_compile * c,struct choose_scoreboard * scoreboard,struct qblock * block,enum quniform_contents * orig_uniform_contents,uint32_t * orig_uniform_data,uint32_t * next_uniform)2828*61046927SAndroid Build Coastguard Worker qpu_schedule_instructions_block(struct v3d_compile *c,
2829*61046927SAndroid Build Coastguard Worker                                 struct choose_scoreboard *scoreboard,
2830*61046927SAndroid Build Coastguard Worker                                 struct qblock *block,
2831*61046927SAndroid Build Coastguard Worker                                 enum quniform_contents *orig_uniform_contents,
2832*61046927SAndroid Build Coastguard Worker                                 uint32_t *orig_uniform_data,
2833*61046927SAndroid Build Coastguard Worker                                 uint32_t *next_uniform)
2834*61046927SAndroid Build Coastguard Worker {
2835*61046927SAndroid Build Coastguard Worker         void *mem_ctx = ralloc_context(NULL);
2836*61046927SAndroid Build Coastguard Worker         scoreboard->dag = dag_create(mem_ctx);
2837*61046927SAndroid Build Coastguard Worker         struct list_head setup_list;
2838*61046927SAndroid Build Coastguard Worker 
2839*61046927SAndroid Build Coastguard Worker         list_inithead(&setup_list);
2840*61046927SAndroid Build Coastguard Worker 
2841*61046927SAndroid Build Coastguard Worker         /* Wrap each instruction in a scheduler structure. */
2842*61046927SAndroid Build Coastguard Worker         while (!list_is_empty(&block->instructions)) {
2843*61046927SAndroid Build Coastguard Worker                 struct qinst *qinst = (struct qinst *)block->instructions.next;
2844*61046927SAndroid Build Coastguard Worker                 struct schedule_node *n =
2845*61046927SAndroid Build Coastguard Worker                         rzalloc(mem_ctx, struct schedule_node);
2846*61046927SAndroid Build Coastguard Worker 
2847*61046927SAndroid Build Coastguard Worker                 dag_init_node(scoreboard->dag, &n->dag);
2848*61046927SAndroid Build Coastguard Worker                 n->inst = qinst;
2849*61046927SAndroid Build Coastguard Worker 
2850*61046927SAndroid Build Coastguard Worker                 list_del(&qinst->link);
2851*61046927SAndroid Build Coastguard Worker                 list_addtail(&n->link, &setup_list);
2852*61046927SAndroid Build Coastguard Worker         }
2853*61046927SAndroid Build Coastguard Worker 
2854*61046927SAndroid Build Coastguard Worker         calculate_forward_deps(c, scoreboard->dag, &setup_list);
2855*61046927SAndroid Build Coastguard Worker         calculate_reverse_deps(c, scoreboard->dag, &setup_list);
2856*61046927SAndroid Build Coastguard Worker 
2857*61046927SAndroid Build Coastguard Worker         dag_traverse_bottom_up(scoreboard->dag, compute_delay, c);
2858*61046927SAndroid Build Coastguard Worker 
2859*61046927SAndroid Build Coastguard Worker         uint32_t cycles = schedule_instructions(c, scoreboard, block,
2860*61046927SAndroid Build Coastguard Worker                                                 orig_uniform_contents,
2861*61046927SAndroid Build Coastguard Worker                                                 orig_uniform_data,
2862*61046927SAndroid Build Coastguard Worker                                                 next_uniform);
2863*61046927SAndroid Build Coastguard Worker 
2864*61046927SAndroid Build Coastguard Worker         ralloc_free(mem_ctx);
2865*61046927SAndroid Build Coastguard Worker         scoreboard->dag = NULL;
2866*61046927SAndroid Build Coastguard Worker 
2867*61046927SAndroid Build Coastguard Worker         return cycles;
2868*61046927SAndroid Build Coastguard Worker }
2869*61046927SAndroid Build Coastguard Worker 
2870*61046927SAndroid Build Coastguard Worker static void
qpu_set_branch_targets(struct v3d_compile * c)2871*61046927SAndroid Build Coastguard Worker qpu_set_branch_targets(struct v3d_compile *c)
2872*61046927SAndroid Build Coastguard Worker {
2873*61046927SAndroid Build Coastguard Worker         vir_for_each_block(block, c) {
2874*61046927SAndroid Build Coastguard Worker                 /* The end block of the program has no branch. */
2875*61046927SAndroid Build Coastguard Worker                 if (!block->successors[0])
2876*61046927SAndroid Build Coastguard Worker                         continue;
2877*61046927SAndroid Build Coastguard Worker 
2878*61046927SAndroid Build Coastguard Worker                 /* If there was no branch instruction, then the successor
2879*61046927SAndroid Build Coastguard Worker                  * block must follow immediately after this one.
2880*61046927SAndroid Build Coastguard Worker                  */
2881*61046927SAndroid Build Coastguard Worker                 if (block->branch_qpu_ip == ~0) {
2882*61046927SAndroid Build Coastguard Worker                         assert(block->end_qpu_ip + 1 ==
2883*61046927SAndroid Build Coastguard Worker                                block->successors[0]->start_qpu_ip);
2884*61046927SAndroid Build Coastguard Worker                         continue;
2885*61046927SAndroid Build Coastguard Worker                 }
2886*61046927SAndroid Build Coastguard Worker 
2887*61046927SAndroid Build Coastguard Worker                 /* Walk back through the delay slots to find the branch
2888*61046927SAndroid Build Coastguard Worker                  * instr.
2889*61046927SAndroid Build Coastguard Worker                  */
2890*61046927SAndroid Build Coastguard Worker                 struct qinst *branch = NULL;
2891*61046927SAndroid Build Coastguard Worker                 struct list_head *entry = block->instructions.prev;
2892*61046927SAndroid Build Coastguard Worker                 int32_t delay_slot_count = -1;
2893*61046927SAndroid Build Coastguard Worker                 struct qinst *delay_slots_start = NULL;
2894*61046927SAndroid Build Coastguard Worker                 for (int i = 0; i < 3; i++) {
2895*61046927SAndroid Build Coastguard Worker                         entry = entry->prev;
2896*61046927SAndroid Build Coastguard Worker                         struct qinst *inst =
2897*61046927SAndroid Build Coastguard Worker                                 container_of(entry, struct qinst, link);
2898*61046927SAndroid Build Coastguard Worker 
2899*61046927SAndroid Build Coastguard Worker                         if (delay_slot_count == -1) {
2900*61046927SAndroid Build Coastguard Worker                                 if (!v3d_qpu_is_nop(&inst->qpu))
2901*61046927SAndroid Build Coastguard Worker                                         delay_slot_count = i;
2902*61046927SAndroid Build Coastguard Worker                                 else
2903*61046927SAndroid Build Coastguard Worker                                         delay_slots_start = inst;
2904*61046927SAndroid Build Coastguard Worker                         }
2905*61046927SAndroid Build Coastguard Worker 
2906*61046927SAndroid Build Coastguard Worker                         if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
2907*61046927SAndroid Build Coastguard Worker                                 branch = inst;
2908*61046927SAndroid Build Coastguard Worker                                 break;
2909*61046927SAndroid Build Coastguard Worker                         }
2910*61046927SAndroid Build Coastguard Worker                 }
2911*61046927SAndroid Build Coastguard Worker                 assert(branch && branch->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
2912*61046927SAndroid Build Coastguard Worker                 assert(delay_slot_count >= 0 && delay_slot_count <= 3);
2913*61046927SAndroid Build Coastguard Worker                 assert(delay_slot_count == 0 || delay_slots_start != NULL);
2914*61046927SAndroid Build Coastguard Worker 
2915*61046927SAndroid Build Coastguard Worker                 /* Make sure that the if-we-don't-jump
2916*61046927SAndroid Build Coastguard Worker                  * successor was scheduled just after the
2917*61046927SAndroid Build Coastguard Worker                  * delay slots.
2918*61046927SAndroid Build Coastguard Worker                  */
2919*61046927SAndroid Build Coastguard Worker                 assert(!block->successors[1] ||
2920*61046927SAndroid Build Coastguard Worker                        block->successors[1]->start_qpu_ip ==
2921*61046927SAndroid Build Coastguard Worker                        block->branch_qpu_ip + 4);
2922*61046927SAndroid Build Coastguard Worker 
2923*61046927SAndroid Build Coastguard Worker                 branch->qpu.branch.offset =
2924*61046927SAndroid Build Coastguard Worker                         ((block->successors[0]->start_qpu_ip -
2925*61046927SAndroid Build Coastguard Worker                           (block->branch_qpu_ip + 4)) *
2926*61046927SAndroid Build Coastguard Worker                          sizeof(uint64_t));
2927*61046927SAndroid Build Coastguard Worker 
2928*61046927SAndroid Build Coastguard Worker                 /* Set up the relative offset to jump in the
2929*61046927SAndroid Build Coastguard Worker                  * uniform stream.
2930*61046927SAndroid Build Coastguard Worker                  *
2931*61046927SAndroid Build Coastguard Worker                  * Use a temporary here, because
2932*61046927SAndroid Build Coastguard Worker                  * uniform_data[inst->uniform] may be shared
2933*61046927SAndroid Build Coastguard Worker                  * between multiple instructions.
2934*61046927SAndroid Build Coastguard Worker                  */
2935*61046927SAndroid Build Coastguard Worker                 assert(c->uniform_contents[branch->uniform] == QUNIFORM_CONSTANT);
2936*61046927SAndroid Build Coastguard Worker                 c->uniform_data[branch->uniform] =
2937*61046927SAndroid Build Coastguard Worker                         (block->successors[0]->start_uniform -
2938*61046927SAndroid Build Coastguard Worker                          (block->branch_uniform + 1)) * 4;
2939*61046927SAndroid Build Coastguard Worker 
2940*61046927SAndroid Build Coastguard Worker                 /* If this is an unconditional branch, try to fill any remaining
2941*61046927SAndroid Build Coastguard Worker                  * delay slots with the initial instructions of the successor
2942*61046927SAndroid Build Coastguard Worker                  * block.
2943*61046927SAndroid Build Coastguard Worker                  *
2944*61046927SAndroid Build Coastguard Worker                  * FIXME: we can do the same for conditional branches if we
2945*61046927SAndroid Build Coastguard Worker                  * predicate the instructions to match the branch condition.
2946*61046927SAndroid Build Coastguard Worker                  */
2947*61046927SAndroid Build Coastguard Worker                 if (branch->qpu.branch.cond == V3D_QPU_BRANCH_COND_ALWAYS) {
2948*61046927SAndroid Build Coastguard Worker                         struct list_head *successor_insts =
2949*61046927SAndroid Build Coastguard Worker                                 &block->successors[0]->instructions;
2950*61046927SAndroid Build Coastguard Worker                         delay_slot_count = MIN2(delay_slot_count,
2951*61046927SAndroid Build Coastguard Worker                                                 list_length(successor_insts));
2952*61046927SAndroid Build Coastguard Worker                         struct qinst *s_inst =
2953*61046927SAndroid Build Coastguard Worker                                 (struct qinst *) successor_insts->next;
2954*61046927SAndroid Build Coastguard Worker                         struct qinst *slot = delay_slots_start;
2955*61046927SAndroid Build Coastguard Worker                         int slots_filled = 0;
2956*61046927SAndroid Build Coastguard Worker                         while (slots_filled < delay_slot_count &&
2957*61046927SAndroid Build Coastguard Worker                                qpu_inst_valid_in_branch_delay_slot(c, s_inst)) {
2958*61046927SAndroid Build Coastguard Worker                                 memcpy(&slot->qpu, &s_inst->qpu,
2959*61046927SAndroid Build Coastguard Worker                                        sizeof(slot->qpu));
2960*61046927SAndroid Build Coastguard Worker                                 s_inst = (struct qinst *) s_inst->link.next;
2961*61046927SAndroid Build Coastguard Worker                                 slot = (struct qinst *) slot->link.next;
2962*61046927SAndroid Build Coastguard Worker                                 slots_filled++;
2963*61046927SAndroid Build Coastguard Worker                         }
2964*61046927SAndroid Build Coastguard Worker                         branch->qpu.branch.offset +=
2965*61046927SAndroid Build Coastguard Worker                                 slots_filled * sizeof(uint64_t);
2966*61046927SAndroid Build Coastguard Worker                 }
2967*61046927SAndroid Build Coastguard Worker         }
2968*61046927SAndroid Build Coastguard Worker }
2969*61046927SAndroid Build Coastguard Worker 
2970*61046927SAndroid Build Coastguard Worker uint32_t
v3d_qpu_schedule_instructions(struct v3d_compile * c)2971*61046927SAndroid Build Coastguard Worker v3d_qpu_schedule_instructions(struct v3d_compile *c)
2972*61046927SAndroid Build Coastguard Worker {
2973*61046927SAndroid Build Coastguard Worker         const struct v3d_device_info *devinfo = c->devinfo;
2974*61046927SAndroid Build Coastguard Worker         struct qblock *end_block = list_last_entry(&c->blocks,
2975*61046927SAndroid Build Coastguard Worker                                                    struct qblock, link);
2976*61046927SAndroid Build Coastguard Worker 
2977*61046927SAndroid Build Coastguard Worker         /* We reorder the uniforms as we schedule instructions, so save the
2978*61046927SAndroid Build Coastguard Worker          * old data off and replace it.
2979*61046927SAndroid Build Coastguard Worker          */
2980*61046927SAndroid Build Coastguard Worker         uint32_t *uniform_data = c->uniform_data;
2981*61046927SAndroid Build Coastguard Worker         enum quniform_contents *uniform_contents = c->uniform_contents;
2982*61046927SAndroid Build Coastguard Worker         c->uniform_contents = ralloc_array(c, enum quniform_contents,
2983*61046927SAndroid Build Coastguard Worker                                            c->num_uniforms);
2984*61046927SAndroid Build Coastguard Worker         c->uniform_data = ralloc_array(c, uint32_t, c->num_uniforms);
2985*61046927SAndroid Build Coastguard Worker         c->uniform_array_size = c->num_uniforms;
2986*61046927SAndroid Build Coastguard Worker         uint32_t next_uniform = 0;
2987*61046927SAndroid Build Coastguard Worker 
2988*61046927SAndroid Build Coastguard Worker         struct choose_scoreboard scoreboard;
2989*61046927SAndroid Build Coastguard Worker         memset(&scoreboard, 0, sizeof(scoreboard));
2990*61046927SAndroid Build Coastguard Worker         scoreboard.last_ldvary_tick = -10;
2991*61046927SAndroid Build Coastguard Worker         scoreboard.last_unifa_write_tick = -10;
2992*61046927SAndroid Build Coastguard Worker         scoreboard.last_magic_sfu_write_tick = -10;
2993*61046927SAndroid Build Coastguard Worker         scoreboard.last_uniforms_reset_tick = -10;
2994*61046927SAndroid Build Coastguard Worker         scoreboard.last_thrsw_tick = -10;
2995*61046927SAndroid Build Coastguard Worker         scoreboard.last_branch_tick = -10;
2996*61046927SAndroid Build Coastguard Worker         scoreboard.last_setmsf_tick = -10;
2997*61046927SAndroid Build Coastguard Worker         scoreboard.last_stallable_sfu_tick = -10;
2998*61046927SAndroid Build Coastguard Worker         scoreboard.first_ldtmu_after_thrsw = true;
2999*61046927SAndroid Build Coastguard Worker         scoreboard.last_implicit_rf0_write_tick = - 10;
3000*61046927SAndroid Build Coastguard Worker 
3001*61046927SAndroid Build Coastguard Worker         if (debug) {
3002*61046927SAndroid Build Coastguard Worker                 fprintf(stderr, "Pre-schedule instructions\n");
3003*61046927SAndroid Build Coastguard Worker                 vir_for_each_block(block, c) {
3004*61046927SAndroid Build Coastguard Worker                         fprintf(stderr, "BLOCK %d\n", block->index);
3005*61046927SAndroid Build Coastguard Worker                         list_for_each_entry(struct qinst, qinst,
3006*61046927SAndroid Build Coastguard Worker                                             &block->instructions, link) {
3007*61046927SAndroid Build Coastguard Worker                                 v3d_qpu_dump(devinfo, &qinst->qpu);
3008*61046927SAndroid Build Coastguard Worker                                 fprintf(stderr, "\n");
3009*61046927SAndroid Build Coastguard Worker                         }
3010*61046927SAndroid Build Coastguard Worker                 }
3011*61046927SAndroid Build Coastguard Worker                 fprintf(stderr, "\n");
3012*61046927SAndroid Build Coastguard Worker         }
3013*61046927SAndroid Build Coastguard Worker 
3014*61046927SAndroid Build Coastguard Worker         uint32_t cycles = 0;
3015*61046927SAndroid Build Coastguard Worker         vir_for_each_block(block, c) {
3016*61046927SAndroid Build Coastguard Worker                 block->start_qpu_ip = c->qpu_inst_count;
3017*61046927SAndroid Build Coastguard Worker                 block->branch_qpu_ip = ~0;
3018*61046927SAndroid Build Coastguard Worker                 block->start_uniform = next_uniform;
3019*61046927SAndroid Build Coastguard Worker 
3020*61046927SAndroid Build Coastguard Worker                 cycles += qpu_schedule_instructions_block(c,
3021*61046927SAndroid Build Coastguard Worker                                                           &scoreboard,
3022*61046927SAndroid Build Coastguard Worker                                                           block,
3023*61046927SAndroid Build Coastguard Worker                                                           uniform_contents,
3024*61046927SAndroid Build Coastguard Worker                                                           uniform_data,
3025*61046927SAndroid Build Coastguard Worker                                                           &next_uniform);
3026*61046927SAndroid Build Coastguard Worker 
3027*61046927SAndroid Build Coastguard Worker                 block->end_qpu_ip = c->qpu_inst_count - 1;
3028*61046927SAndroid Build Coastguard Worker         }
3029*61046927SAndroid Build Coastguard Worker 
3030*61046927SAndroid Build Coastguard Worker         /* Emit the program-end THRSW instruction. */;
3031*61046927SAndroid Build Coastguard Worker         struct qinst *thrsw = vir_nop();
3032*61046927SAndroid Build Coastguard Worker         thrsw->qpu.sig.thrsw = true;
3033*61046927SAndroid Build Coastguard Worker         emit_thrsw(c, end_block, &scoreboard, thrsw, true);
3034*61046927SAndroid Build Coastguard Worker 
3035*61046927SAndroid Build Coastguard Worker         qpu_set_branch_targets(c);
3036*61046927SAndroid Build Coastguard Worker 
3037*61046927SAndroid Build Coastguard Worker         assert(next_uniform == c->num_uniforms);
3038*61046927SAndroid Build Coastguard Worker 
3039*61046927SAndroid Build Coastguard Worker         return cycles;
3040*61046927SAndroid Build Coastguard Worker }
3041