1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file v3d_opt_dead_code.c
26 *
27 * This is a simple dead code eliminator for SSA values in VIR.
28 *
29 * It walks all the instructions finding what temps are used, then walks again
30 * to remove instructions writing unused temps.
31 *
32 * This is an inefficient implementation if you have long chains of
33 * instructions where the entire chain is dead, but we expect those to have
34 * been eliminated at the NIR level, and here we're just cleaning up small
35 * problems produced by NIR->VIR.
36 */
37
38 #include "v3d_compiler.h"
39
40 static bool debug;
41
42 static void
dce(struct v3d_compile * c,struct qinst * inst)43 dce(struct v3d_compile *c, struct qinst *inst)
44 {
45 if (debug) {
46 fprintf(stderr, "Removing: ");
47 vir_dump_inst(c, inst);
48 fprintf(stderr, "\n");
49 }
50 assert(!v3d_qpu_writes_flags(&inst->qpu));
51 vir_remove_instruction(c, inst);
52 }
53
54 static bool
can_write_to_null(struct v3d_compile * c,struct qinst * inst)55 can_write_to_null(struct v3d_compile *c, struct qinst *inst)
56 {
57 /* The SFU instructions must write to a physical register. */
58 if (v3d_qpu_uses_sfu(&inst->qpu))
59 return false;
60
61 return true;
62 }
63
64 static void
vir_dce_flags(struct v3d_compile * c,struct qinst * inst)65 vir_dce_flags(struct v3d_compile *c, struct qinst *inst)
66 {
67 if (debug) {
68 fprintf(stderr,
69 "Removing flags write from: ");
70 vir_dump_inst(c, inst);
71 fprintf(stderr, "\n");
72 }
73
74 assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
75
76 inst->qpu.flags.apf = V3D_QPU_PF_NONE;
77 inst->qpu.flags.mpf = V3D_QPU_PF_NONE;
78 inst->qpu.flags.auf = V3D_QPU_UF_NONE;
79 inst->qpu.flags.muf = V3D_QPU_UF_NONE;
80 }
81
82 static bool
check_last_ldunifa(struct v3d_compile * c,struct qinst * inst,struct qblock * block)83 check_last_ldunifa(struct v3d_compile *c,
84 struct qinst *inst,
85 struct qblock *block)
86 {
87 if (!inst->qpu.sig.ldunifa && !inst->qpu.sig.ldunifarf)
88 return false;
89
90 list_for_each_entry_from(struct qinst, scan_inst, inst->link.next,
91 &block->instructions, link) {
92 /* If we find a new write to unifa, then this was the last
93 * ldunifa in its sequence and is safe to remove.
94 */
95 if (scan_inst->dst.file == QFILE_MAGIC &&
96 scan_inst->dst.index == V3D_QPU_WADDR_UNIFA) {
97 return true;
98 }
99
100 /* If we find another ldunifa in the same sequence then we
101 * can't remove it.
102 */
103 if (scan_inst->qpu.sig.ldunifa || scan_inst->qpu.sig.ldunifarf)
104 return false;
105 }
106
107 return true;
108 }
109
110 static bool
check_first_ldunifa(struct v3d_compile * c,struct qinst * inst,struct qblock * block,struct qinst ** unifa)111 check_first_ldunifa(struct v3d_compile *c,
112 struct qinst *inst,
113 struct qblock *block,
114 struct qinst **unifa)
115 {
116 if (!inst->qpu.sig.ldunifa && !inst->qpu.sig.ldunifarf)
117 return false;
118
119 list_for_each_entry_from_rev(struct qinst, scan_inst, inst->link.prev,
120 &block->instructions, link) {
121 /* If we find a write to unifa, then this was the first
122 * ldunifa in its sequence and is safe to remove.
123 */
124 if (scan_inst->dst.file == QFILE_MAGIC &&
125 scan_inst->dst.index == V3D_QPU_WADDR_UNIFA) {
126 *unifa = scan_inst;
127 return true;
128 }
129
130 /* If we find another ldunifa in the same sequence then we
131 * can't remove it.
132 */
133 if (scan_inst->qpu.sig.ldunifa || scan_inst->qpu.sig.ldunifarf)
134 return false;
135 }
136
137 unreachable("could not find starting unifa for ldunifa sequence");
138 }
139
140 static bool
increment_unifa_address(struct v3d_compile * c,struct qinst * unifa)141 increment_unifa_address(struct v3d_compile *c, struct qinst *unifa)
142 {
143 if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
144 unifa->qpu.alu.mul.op == V3D_QPU_M_MOV) {
145 c->cursor = vir_after_inst(unifa);
146 struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
147 vir_ADD_dest(c, unifa_reg, unifa->src[0], vir_uniform_ui(c, 4u));
148 vir_remove_instruction(c, unifa);
149 return true;
150 }
151
152 if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
153 unifa->qpu.alu.add.op == V3D_QPU_A_ADD) {
154 c->cursor = vir_after_inst(unifa);
155 struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
156 struct qreg tmp =
157 vir_ADD(c, unifa->src[1], vir_uniform_ui(c, 4u));
158 vir_ADD_dest(c, unifa_reg, unifa->src[0], tmp);
159 vir_remove_instruction(c, unifa);
160 return true;
161 }
162
163 return false;
164 }
165
166 bool
vir_opt_dead_code(struct v3d_compile * c)167 vir_opt_dead_code(struct v3d_compile *c)
168 {
169 bool progress = false;
170 bool *used = calloc(c->num_temps, sizeof(bool));
171
172 /* Defuse the "are you removing the cursor?" assertion in the core.
173 * You'll need to set up a new cursor for any new instructions after
174 * doing DCE (which we would expect, anyway).
175 */
176 c->cursor.link = NULL;
177
178 vir_for_each_inst_inorder(inst, c) {
179 for (int i = 0; i < vir_get_nsrc(inst); i++) {
180 if (inst->src[i].file == QFILE_TEMP)
181 used[inst->src[i].index] = true;
182 }
183 }
184
185 struct qinst *last_multop = NULL;
186 vir_for_each_block(block, c) {
187 struct qinst *last_flags_write = NULL;
188 c->cur_block = block;
189 vir_for_each_inst_safe(inst, block) {
190 /* If this instruction reads the flags, we can't
191 * remove the flags generation for it.
192 */
193 if (v3d_qpu_reads_flags(&inst->qpu))
194 last_flags_write = NULL;
195
196 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
197 inst->qpu.alu.mul.op == V3D_QPU_M_MULTOP) {
198 last_multop = inst;
199 }
200
201 if (inst->dst.file != QFILE_NULL &&
202 !(inst->dst.file == QFILE_TEMP &&
203 !used[inst->dst.index])) {
204 continue;
205 }
206
207 const bool is_ldunifa = inst->qpu.sig.ldunifa ||
208 inst->qpu.sig.ldunifarf;
209
210 if (vir_has_side_effects(c, inst) && !is_ldunifa)
211 continue;
212
213 bool is_first_ldunifa = false;
214 bool is_last_ldunifa = false;
215 struct qinst *unifa = NULL;
216 if (is_ldunifa) {
217 is_last_ldunifa =
218 check_last_ldunifa(c, inst, block);
219
220 is_first_ldunifa =
221 check_first_ldunifa(c, inst, block, &unifa);
222 }
223
224 if (v3d_qpu_writes_flags(&inst->qpu)) {
225 /* If we obscure a previous flags write,
226 * drop it.
227 */
228 if (last_flags_write &&
229 (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
230 inst->qpu.flags.mpf != V3D_QPU_PF_NONE)) {
231 vir_dce_flags(c, last_flags_write);
232 progress = true;
233 }
234
235 last_flags_write = inst;
236 }
237
238 if (v3d_qpu_writes_flags(&inst->qpu) ||
239 (is_ldunifa && !is_first_ldunifa && !is_last_ldunifa)) {
240 /* If we can't remove the instruction, but we
241 * don't need its destination value, just
242 * remove the destination. The register
243 * allocator would trivially color it and it
244 * wouldn't cause any register pressure, but
245 * it's nicer to read the VIR code without
246 * unused destination regs.
247 */
248 if (inst->dst.file == QFILE_TEMP &&
249 can_write_to_null(c, inst)) {
250 if (debug) {
251 fprintf(stderr,
252 "Removing dst from: ");
253 vir_dump_inst(c, inst);
254 fprintf(stderr, "\n");
255 }
256 c->defs[inst->dst.index] = NULL;
257 inst->dst.file = QFILE_NULL;
258 progress = true;
259 }
260 continue;
261 }
262
263 /* If we are removing the first ldunifa in a sequence
264 * we need to update the unifa address.
265 */
266 if (is_first_ldunifa) {
267 assert(unifa);
268 if (!increment_unifa_address(c, unifa))
269 continue;
270 }
271
272 /* If we drop umul24 we should also drop the previous
273 * multop that we emit with it.
274 */
275 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
276 inst->qpu.alu.mul.op == V3D_QPU_M_UMUL24 &&
277 last_multop) {
278 dce(c, last_multop);
279 last_multop = NULL;
280 }
281
282 assert(inst != last_flags_write);
283 dce(c, inst);
284 progress = true;
285 continue;
286 }
287 }
288
289 free(used);
290
291 return progress;
292 }
293