1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "radeon_dataflow.h"
7
8 #include "radeon_compiler.h"
9
10
11 struct updatemask_state {
12 unsigned char Output[RC_REGISTER_MAX_INDEX];
13 unsigned char Temporary[RC_REGISTER_MAX_INDEX];
14 unsigned char Address;
15 unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
16 };
17
18 struct instruction_state {
19 unsigned char WriteMask:4;
20 unsigned char WriteALUResult:1;
21 unsigned char SrcReg[3];
22 };
23
24 struct loopinfo {
25 struct updatemask_state StoreEndloop;
26 unsigned int BreakCount;
27 unsigned int BreaksReserved;
28 };
29
30 struct branchinfo {
31 unsigned int HaveElse:1;
32
33 struct updatemask_state StoreEndif;
34 struct updatemask_state StoreElse;
35 };
36
37 struct deadcode_state {
38 struct radeon_compiler * C;
39 struct instruction_state * Instructions;
40
41 struct updatemask_state R;
42
43 struct branchinfo * BranchStack;
44 unsigned int BranchStackSize;
45 unsigned int BranchStackReserved;
46
47 struct loopinfo * LoopStack;
48 unsigned int LoopStackSize;
49 unsigned int LoopStackReserved;
50 };
51
52
or_updatemasks(struct updatemask_state * dst,struct updatemask_state * a,struct updatemask_state * b)53 static void or_updatemasks(
54 struct updatemask_state * dst,
55 struct updatemask_state * a,
56 struct updatemask_state * b)
57 {
58 for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
59 dst->Output[i] = a->Output[i] | b->Output[i];
60 dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
61 }
62
63 for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
64 dst->Special[i] = a->Special[i] | b->Special[i];
65
66 dst->Address = a->Address | b->Address;
67 }
68
push_loop(struct deadcode_state * s)69 static void push_loop(struct deadcode_state * s)
70 {
71 memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
72 s->LoopStackSize, s->LoopStackReserved, 1);
73 memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
74 memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R));
75 }
76
push_branch(struct deadcode_state * s)77 static void push_branch(struct deadcode_state * s)
78 {
79 struct branchinfo * branch;
80
81 memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
82 s->BranchStackSize, s->BranchStackReserved, 1);
83
84 branch = &s->BranchStack[s->BranchStackSize++];
85 branch->HaveElse = 0;
86 memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
87 }
88
get_used_ptr(struct deadcode_state * s,rc_register_file file,unsigned int index)89 static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
90 {
91 if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
92 if (index >= RC_REGISTER_MAX_INDEX) {
93 rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __func__, index, file);
94 return NULL;
95 }
96
97 if (file == RC_FILE_OUTPUT)
98 return &s->R.Output[index];
99 else
100 return &s->R.Temporary[index];
101 } else if (file == RC_FILE_ADDRESS) {
102 return &s->R.Address;
103 } else if (file == RC_FILE_SPECIAL) {
104 if (index >= RC_NUM_SPECIAL_REGISTERS) {
105 rc_error(s->C, "%s: special file index %i out of bounds\n", __func__, index);
106 return NULL;
107 }
108
109 return &s->R.Special[index];
110 }
111
112 return NULL;
113 }
114
mark_used(struct deadcode_state * s,rc_register_file file,unsigned int index,unsigned int mask)115 static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
116 {
117 unsigned char * pused = get_used_ptr(s, file, index);
118 if (pused)
119 *pused |= mask;
120 }
121
update_instruction(struct deadcode_state * s,struct rc_instruction * inst)122 static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
123 {
124 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
125 struct instruction_state * insts = &s->Instructions[inst->IP];
126 unsigned int usedmask = 0;
127 unsigned int srcmasks[3];
128
129 if (opcode->HasDstReg) {
130 unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
131 if (pused) {
132 usedmask = *pused & inst->U.I.DstReg.WriteMask;
133 *pused &= ~usedmask;
134 }
135 }
136
137 insts->WriteMask |= usedmask;
138
139 if (inst->U.I.WriteALUResult) {
140 unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
141 if (pused && *pused) {
142 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
143 usedmask |= RC_MASK_X;
144 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
145 usedmask |= RC_MASK_W;
146
147 *pused = 0;
148 insts->WriteALUResult = 1;
149 }
150 }
151
152 rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
153
154 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
155 unsigned int refmask = 0;
156 unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
157 insts->SrcReg[src] |= newsrcmask;
158
159 for(unsigned int chan = 0; chan < 4; ++chan) {
160 if (GET_BIT(newsrcmask, chan))
161 refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
162 }
163
164 /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
165 refmask &= RC_MASK_XYZW;
166
167 if (!refmask)
168 continue;
169
170 mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
171
172 if (inst->U.I.SrcReg[src].RelAddr)
173 mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
174 }
175 }
176
rc_dataflow_deadcode(struct radeon_compiler * c,void * user)177 void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
178 {
179 struct deadcode_state s;
180 unsigned int nr_instructions;
181 unsigned int ip;
182
183 memset(&s, 0, sizeof(s));
184 s.C = c;
185
186 nr_instructions = rc_recompute_ips(c);
187 s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
188 memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
189
190 for(struct rc_instruction * inst = c->Program.Instructions.Prev;
191 inst != &c->Program.Instructions;
192 inst = inst->Prev) {
193 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
194
195 /* Assume all output regs are live. Anything else should have been
196 * eliminated before it got to us.
197 */
198 if (opcode->HasDstReg)
199 mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask);
200
201 switch(opcode->Opcode){
202 /* Mark all sources in the loop body as used before doing
203 * normal deadcode analysis. This is probably not optimal.
204 * Save this pessimistic deadcode state and restore it anytime
205 * we see a break just to be extra sure.
206 */
207 case RC_OPCODE_ENDLOOP:
208 {
209 int endloops = 1;
210 struct rc_instruction *ptr;
211 for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
212 opcode = rc_get_opcode_info(ptr->U.I.Opcode);
213 if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
214 endloops--;
215 continue;
216 }
217 if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
218 endloops++;
219 continue;
220 }
221 if(opcode->HasDstReg){
222 int src = 0;
223 unsigned int srcmasks[3];
224 unsigned int writemask = ptr->U.I.DstReg.WriteMask;
225 if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
226 writemask |= RC_MASK_X;
227 else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
228 writemask |= RC_MASK_W;
229
230 rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
231 for(src=0; src < opcode->NumSrcRegs; src++){
232 mark_used(&s,
233 ptr->U.I.SrcReg[src].File,
234 ptr->U.I.SrcReg[src].Index,
235 srcmasks[src]);
236 }
237 }
238 }
239 push_loop(&s);
240 break;
241 }
242 case RC_OPCODE_BRK:
243 {
244 struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
245 memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R));
246 break;
247 }
248 case RC_OPCODE_BGNLOOP:
249 s.LoopStackSize--;
250 break;
251 case RC_OPCODE_CONT:
252 break;
253 case RC_OPCODE_ENDIF:
254 push_branch(&s);
255 break;
256 default:
257 if (opcode->IsFlowControl && s.BranchStackSize) {
258 struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
259 if (opcode->Opcode == RC_OPCODE_IF) {
260 or_updatemasks(&s.R,
261 &s.R,
262 branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
263
264 s.BranchStackSize--;
265 } else if (opcode->Opcode == RC_OPCODE_ELSE) {
266 if (branch->HaveElse) {
267 rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __func__);
268 } else {
269 memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
270 memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
271 branch->HaveElse = 1;
272 }
273 } else {
274 rc_error(c, "%s: Unhandled control flow instruction %s\n", __func__, opcode->Name);
275 }
276 }
277 }
278
279 update_instruction(&s, inst);
280 }
281
282 ip = 0;
283 for(struct rc_instruction * inst = c->Program.Instructions.Next;
284 inst != &c->Program.Instructions;
285 inst = inst->Next, ++ip) {
286 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
287 int dead = 1;
288 unsigned int srcmasks[3];
289 unsigned int usemask;
290
291 if (!opcode->HasDstReg) {
292 dead = 0;
293 } else {
294 inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
295 if (s.Instructions[ip].WriteMask)
296 dead = 0;
297
298 if (s.Instructions[ip].WriteALUResult)
299 dead = 0;
300 else
301 inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
302 }
303
304 if (dead) {
305 struct rc_instruction * todelete = inst;
306 inst = inst->Prev;
307 rc_remove_instruction(todelete);
308 continue;
309 }
310
311 usemask = s.Instructions[ip].WriteMask;
312
313 if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
314 usemask |= RC_MASK_X;
315 else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
316 usemask |= RC_MASK_W;
317
318 rc_compute_sources_for_writemask(inst, usemask, srcmasks);
319
320 for(unsigned int src = 0; src < 3; ++src) {
321 for(unsigned int chan = 0; chan < 4; ++chan) {
322 if (!GET_BIT(srcmasks[src], chan))
323 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
324 }
325 }
326 }
327
328 rc_calculate_inputs_outputs(c);
329 }
330