xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_dataflow.h"
7 
8 #include "radeon_compiler.h"
9 
10 
11 struct updatemask_state {
12 	unsigned char Output[RC_REGISTER_MAX_INDEX];
13 	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
14 	unsigned char Address;
15 	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
16 };
17 
18 struct instruction_state {
19 	unsigned char WriteMask:4;
20 	unsigned char WriteALUResult:1;
21 	unsigned char SrcReg[3];
22 };
23 
24 struct loopinfo {
25 	struct updatemask_state StoreEndloop;
26 	unsigned int BreakCount;
27 	unsigned int BreaksReserved;
28 };
29 
30 struct branchinfo {
31 	unsigned int HaveElse:1;
32 
33 	struct updatemask_state StoreEndif;
34 	struct updatemask_state StoreElse;
35 };
36 
37 struct deadcode_state {
38 	struct radeon_compiler * C;
39 	struct instruction_state * Instructions;
40 
41 	struct updatemask_state R;
42 
43 	struct branchinfo * BranchStack;
44 	unsigned int BranchStackSize;
45 	unsigned int BranchStackReserved;
46 
47 	struct loopinfo * LoopStack;
48 	unsigned int LoopStackSize;
49 	unsigned int LoopStackReserved;
50 };
51 
52 
or_updatemasks(struct updatemask_state * dst,struct updatemask_state * a,struct updatemask_state * b)53 static void or_updatemasks(
54 	struct updatemask_state * dst,
55 	struct updatemask_state * a,
56 	struct updatemask_state * b)
57 {
58 	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
59 		dst->Output[i] = a->Output[i] | b->Output[i];
60 		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
61 	}
62 
63 	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
64 		dst->Special[i] = a->Special[i] | b->Special[i];
65 
66 	dst->Address = a->Address | b->Address;
67 }
68 
push_loop(struct deadcode_state * s)69 static void push_loop(struct deadcode_state * s)
70 {
71 	memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
72 			s->LoopStackSize, s->LoopStackReserved, 1);
73 	memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
74 	memcpy(&s->LoopStack[s->LoopStackSize - 1].StoreEndloop, &s->R, sizeof(s->R));
75 }
76 
push_branch(struct deadcode_state * s)77 static void push_branch(struct deadcode_state * s)
78 {
79 	struct branchinfo * branch;
80 
81 	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
82 			s->BranchStackSize, s->BranchStackReserved, 1);
83 
84 	branch = &s->BranchStack[s->BranchStackSize++];
85 	branch->HaveElse = 0;
86 	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
87 }
88 
get_used_ptr(struct deadcode_state * s,rc_register_file file,unsigned int index)89 static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
90 {
91 	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
92 		if (index >= RC_REGISTER_MAX_INDEX) {
93 			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __func__, index, file);
94 			return NULL;
95 		}
96 
97 		if (file == RC_FILE_OUTPUT)
98 			return &s->R.Output[index];
99 		else
100 			return &s->R.Temporary[index];
101 	} else if (file == RC_FILE_ADDRESS) {
102 		return &s->R.Address;
103 	} else if (file == RC_FILE_SPECIAL) {
104 		if (index >= RC_NUM_SPECIAL_REGISTERS) {
105 			rc_error(s->C, "%s: special file index %i out of bounds\n", __func__, index);
106 			return NULL;
107 		}
108 
109 		return &s->R.Special[index];
110 	}
111 
112 	return NULL;
113 }
114 
mark_used(struct deadcode_state * s,rc_register_file file,unsigned int index,unsigned int mask)115 static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
116 {
117 	unsigned char * pused = get_used_ptr(s, file, index);
118 	if (pused)
119 		*pused |= mask;
120 }
121 
update_instruction(struct deadcode_state * s,struct rc_instruction * inst)122 static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
123 {
124 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
125 	struct instruction_state * insts = &s->Instructions[inst->IP];
126 	unsigned int usedmask = 0;
127 	unsigned int srcmasks[3];
128 
129 	if (opcode->HasDstReg) {
130 		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
131 		if (pused) {
132 			usedmask = *pused & inst->U.I.DstReg.WriteMask;
133 			*pused &= ~usedmask;
134 		}
135 	}
136 
137 	insts->WriteMask |= usedmask;
138 
139 	if (inst->U.I.WriteALUResult) {
140 		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
141 		if (pused && *pused) {
142 			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
143 				usedmask |= RC_MASK_X;
144 			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
145 				usedmask |= RC_MASK_W;
146 
147 			*pused = 0;
148 			insts->WriteALUResult = 1;
149 		}
150 	}
151 
152 	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
153 
154 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
155 		unsigned int refmask = 0;
156 		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
157 		insts->SrcReg[src] |= newsrcmask;
158 
159 		for(unsigned int chan = 0; chan < 4; ++chan) {
160 			if (GET_BIT(newsrcmask, chan))
161 				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
162 		}
163 
164 		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
165 		refmask &= RC_MASK_XYZW;
166 
167 		if (!refmask)
168 			continue;
169 
170 		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
171 
172 		if (inst->U.I.SrcReg[src].RelAddr)
173 			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
174 	}
175 }
176 
rc_dataflow_deadcode(struct radeon_compiler * c,void * user)177 void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
178 {
179 	struct deadcode_state s;
180 	unsigned int nr_instructions;
181 	unsigned int ip;
182 
183 	memset(&s, 0, sizeof(s));
184 	s.C = c;
185 
186 	nr_instructions = rc_recompute_ips(c);
187 	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
188 	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
189 
190 	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
191 	    inst != &c->Program.Instructions;
192 	    inst = inst->Prev) {
193 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
194 
195 		/* Assume all output regs are live.  Anything else should have been
196 		 * eliminated before it got to us.
197 		 */
198 		if (opcode->HasDstReg)
199 			mark_used(&s, RC_FILE_OUTPUT, inst->U.I.DstReg.Index, inst->U.I.DstReg.WriteMask);
200 
201 		switch(opcode->Opcode){
202 		/* Mark all sources in the loop body as used before doing
203 		 * normal deadcode analysis. This is probably not optimal.
204 		 * Save this pessimistic deadcode state and restore it anytime
205 		 * we see a break just to be extra sure.
206 		 */
207 		case RC_OPCODE_ENDLOOP:
208 		{
209 			int endloops = 1;
210 			struct rc_instruction *ptr;
211 			for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
212 				opcode = rc_get_opcode_info(ptr->U.I.Opcode);
213 				if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
214 					endloops--;
215 					continue;
216 				}
217 				if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
218 					endloops++;
219 					continue;
220 				}
221 				if(opcode->HasDstReg){
222 					int src = 0;
223 					unsigned int srcmasks[3];
224 					unsigned int writemask = ptr->U.I.DstReg.WriteMask;
225 					if (ptr->U.I.WriteALUResult == RC_ALURESULT_X)
226 						writemask |= RC_MASK_X;
227 					else if (ptr->U.I.WriteALUResult == RC_ALURESULT_W)
228 						writemask |= RC_MASK_W;
229 
230 					rc_compute_sources_for_writemask(ptr, writemask, srcmasks);
231 					for(src=0; src < opcode->NumSrcRegs; src++){
232 						mark_used(&s,
233 							ptr->U.I.SrcReg[src].File,
234 							ptr->U.I.SrcReg[src].Index,
235 							srcmasks[src]);
236 					}
237 				}
238 			}
239 			push_loop(&s);
240 			break;
241 		}
242 		case RC_OPCODE_BRK:
243 		{
244 			struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
245 			memcpy(&s.R, &loop->StoreEndloop, sizeof(s.R));
246 			break;
247 		}
248 		case RC_OPCODE_BGNLOOP:
249 			s.LoopStackSize--;
250 			break;
251 		case RC_OPCODE_CONT:
252 			break;
253 		case RC_OPCODE_ENDIF:
254 			push_branch(&s);
255 			break;
256 		default:
257 			if (opcode->IsFlowControl && s.BranchStackSize) {
258 				struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
259 				if (opcode->Opcode == RC_OPCODE_IF) {
260 					or_updatemasks(&s.R,
261 							&s.R,
262 							branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
263 
264 					s.BranchStackSize--;
265 				} else if (opcode->Opcode == RC_OPCODE_ELSE) {
266 					if (branch->HaveElse) {
267 						rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __func__);
268 					} else {
269 						memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
270 						memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
271 						branch->HaveElse = 1;
272 					}
273 				} else {
274 					rc_error(c, "%s: Unhandled control flow instruction %s\n", __func__, opcode->Name);
275 				}
276 			}
277 		}
278 
279 		update_instruction(&s, inst);
280 	}
281 
282 	ip = 0;
283 	for(struct rc_instruction * inst = c->Program.Instructions.Next;
284 	    inst != &c->Program.Instructions;
285 	    inst = inst->Next, ++ip) {
286 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
287 		int dead = 1;
288 		unsigned int srcmasks[3];
289 		unsigned int usemask;
290 
291 		if (!opcode->HasDstReg) {
292 			dead = 0;
293 		} else {
294 			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
295 			if (s.Instructions[ip].WriteMask)
296 				dead = 0;
297 
298 			if (s.Instructions[ip].WriteALUResult)
299 				dead = 0;
300 			else
301 				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
302 		}
303 
304 		if (dead) {
305 			struct rc_instruction * todelete = inst;
306 			inst = inst->Prev;
307 			rc_remove_instruction(todelete);
308 			continue;
309 		}
310 
311 		usemask = s.Instructions[ip].WriteMask;
312 
313 		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
314 			usemask |= RC_MASK_X;
315 		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
316 			usemask |= RC_MASK_W;
317 
318 		rc_compute_sources_for_writemask(inst, usemask, srcmasks);
319 
320 		for(unsigned int src = 0; src < 3; ++src) {
321 			for(unsigned int chan = 0; chan < 4; ++chan) {
322 				if (!GET_BIT(srcmasks[src], chan))
323 					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
324 			}
325 		}
326 	}
327 
328 	rc_calculate_inputs_outputs(c);
329 }
330