xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_compiler.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2009 Nicolai Hähnle <[email protected]>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_compiler.h"
7 
8 #include <stdarg.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #include "util/u_debug.h"
14 #include "pipe/p_state.h"
15 #include "radeon_dataflow.h"
16 #include "radeon_program.h"
17 #include "radeon_program_pair.h"
18 #include "radeon_regalloc.h"
19 #include "radeon_compiler_util.h"
20 
21 
rc_init(struct radeon_compiler * c,const struct rc_regalloc_state * rs)22 void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
23 {
24 	memset(c, 0, sizeof(*c));
25 
26 	memory_pool_init(&c->Pool);
27 	c->Program.Instructions.Prev = &c->Program.Instructions;
28 	c->Program.Instructions.Next = &c->Program.Instructions;
29 	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
30 	c->regalloc_state = rs;
31 	c->max_temp_index = -1;
32 }
33 
rc_destroy(struct radeon_compiler * c)34 void rc_destroy(struct radeon_compiler * c)
35 {
36 	rc_constants_destroy(&c->Program.Constants);
37 	memory_pool_destroy(&c->Pool);
38 	free(c->ErrorMsg);
39 }
40 
rc_debug(struct radeon_compiler * c,const char * fmt,...)41 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
42 {
43 	va_list ap;
44 
45 	if (!(c->Debug & RC_DBG_LOG))
46 		return;
47 
48 	va_start(ap, fmt);
49 	vfprintf(stderr, fmt, ap);
50 	va_end(ap);
51 }
52 
rc_error(struct radeon_compiler * c,const char * fmt,...)53 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
54 {
55 	va_list ap;
56 
57 	c->Error = 1;
58 
59 	if (!c->ErrorMsg) {
60 		/* Only remember the first error */
61 		char buf[1024];
62 		int written;
63 
64 		va_start(ap, fmt);
65 		written = vsnprintf(buf, sizeof(buf), fmt, ap);
66 		va_end(ap);
67 
68 		if (written < sizeof(buf)) {
69 			c->ErrorMsg = strdup(buf);
70 		} else {
71 			c->ErrorMsg = malloc(written + 1);
72 
73 			va_start(ap, fmt);
74 			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
75 			va_end(ap);
76 		}
77 	}
78 
79 	if (c->Debug & RC_DBG_LOG) {
80 		fprintf(stderr, "r300compiler error: ");
81 
82 		va_start(ap, fmt);
83 		vfprintf(stderr, fmt, ap);
84 		va_end(ap);
85 	}
86 }
87 
rc_if_fail_helper(struct radeon_compiler * c,const char * file,int line,const char * assertion)88 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
89 {
90 	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
91 	return 1;
92 }
93 
rc_mark_unused_channels(struct radeon_compiler * c,void * user)94 void rc_mark_unused_channels(struct radeon_compiler * c, void *user)
95 {
96 	unsigned int srcmasks[3];
97 
98 	for(struct rc_instruction * inst = c->Program.Instructions.Next;
99 	    inst != &c->Program.Instructions;
100 	    inst = inst->Next) {
101 
102 		rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks);
103 
104 		for(unsigned int src = 0; src < 3; ++src) {
105 			for(unsigned int chan = 0; chan < 4; ++chan) {
106 				if (!GET_BIT(srcmasks[src], chan))
107 					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
108 			}
109 		}
110 	}
111 }
112 
113 /**
114  * Recompute c->Program.InputsRead and c->Program.OutputsWritten
115  * based on which inputs and outputs are actually referenced
116  * in program instructions.
117  */
rc_calculate_inputs_outputs(struct radeon_compiler * c)118 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
119 {
120 	struct rc_instruction *inst;
121 
122 	c->Program.InputsRead = 0;
123 	c->Program.OutputsWritten = 0;
124 
125 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
126 	{
127 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
128 		int i;
129 
130 		for (i = 0; i < opcode->NumSrcRegs; ++i) {
131 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
132 				c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index;
133 		}
134 
135 		if (opcode->HasDstReg) {
136 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
137 				c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index;
138 		}
139 	}
140 }
141 
142 /**
143  * Rewrite the program such that a given output is duplicated.
144  */
rc_copy_output(struct radeon_compiler * c,unsigned output,unsigned dup_output)145 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
146 {
147 	unsigned tempreg = rc_find_free_temporary(c);
148 	struct rc_instruction * inst;
149 	struct rc_instruction * insert_pos = c->Program.Instructions.Prev;
150 	struct rc_instruction * last_write_inst = NULL;
151 	unsigned branch_depth = 0;
152 	unsigned loop_depth = 0;
153 	bool emit_after_control_flow = false;
154 	unsigned num_writes = 0;
155 
156 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
157 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
158 
159 		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
160 			loop_depth++;
161 		if (inst->U.I.Opcode == RC_OPCODE_IF)
162 			branch_depth++;
163 		if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
164 		    (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
165 			if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
166 				insert_pos = inst;
167 				emit_after_control_flow = false;
168 			}
169 
170 		if (opcode->HasDstReg) {
171 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
172 				num_writes++;
173 				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
174 				inst->U.I.DstReg.Index = tempreg;
175 				insert_pos = inst;
176 				last_write_inst = inst;
177 				if (loop_depth != 0 && branch_depth != 0)
178 					emit_after_control_flow = true;
179 			}
180 		}
181 	}
182 
183 	/* If there is only a single write, just duplicate the whole instruction instead.
184 	 * We can do this even when the single write was is a control flow.
185 	 */
186 	if (num_writes == 1) {
187 		last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT;
188 		last_write_inst->U.I.DstReg.Index = output;
189 
190 		inst = rc_insert_new_instruction(c, last_write_inst);
191 		struct rc_instruction * prev = inst->Prev;
192 		struct rc_instruction * next = inst->Next;
193 		memcpy(inst, last_write_inst, sizeof(struct rc_instruction));
194 		inst->Prev = prev;
195 		inst->Next = next;
196 		inst->U.I.DstReg.Index = dup_output;
197 	} else {
198 		inst = rc_insert_new_instruction(c, insert_pos);
199 		inst->U.I.Opcode = RC_OPCODE_MOV;
200 		inst->U.I.DstReg.File = RC_FILE_OUTPUT;
201 		inst->U.I.DstReg.Index = output;
202 
203 		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
204 		inst->U.I.SrcReg[0].Index = tempreg;
205 		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
206 
207 		inst = rc_insert_new_instruction(c, inst);
208 		inst->U.I.Opcode = RC_OPCODE_MOV;
209 		inst->U.I.DstReg.File = RC_FILE_OUTPUT;
210 		inst->U.I.DstReg.Index = dup_output;
211 
212 		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
213 		inst->U.I.SrcReg[0].Index = tempreg;
214 		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
215 	}
216 
217 	c->Program.OutputsWritten |= 1U << dup_output;
218 }
219 
220 
221 /**
222  * Introduce standard code fragment to deal with fragment.position.
223  */
rc_transform_fragment_wpos(struct radeon_compiler * c,unsigned wpos,unsigned new_input,int full_vtransform)224 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
225                                 int full_vtransform)
226 {
227 	unsigned tempregi = rc_find_free_temporary(c);
228 	struct rc_instruction * inst_rcp;
229 	struct rc_instruction * inst_mul;
230 	struct rc_instruction * inst_mad;
231 	struct rc_instruction * inst;
232 
233 	c->Program.InputsRead &= ~(1U << wpos);
234 	c->Program.InputsRead |= 1U << new_input;
235 
236 	/* perspective divide */
237 	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
238 	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
239 
240 	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
241 	inst_rcp->U.I.DstReg.Index = tempregi;
242 	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
243 
244 	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
245 	inst_rcp->U.I.SrcReg[0].Index = new_input;
246 	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
247 
248 	inst_mul = rc_insert_new_instruction(c, inst_rcp);
249 	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
250 
251 	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
252 	inst_mul->U.I.DstReg.Index = tempregi;
253 	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
254 
255 	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
256 	inst_mul->U.I.SrcReg[0].Index = new_input;
257 
258 	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
259 	inst_mul->U.I.SrcReg[1].Index = tempregi;
260 	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
261 
262 	/* viewport transformation */
263 	inst_mad = rc_insert_new_instruction(c, inst_mul);
264 	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
265 
266 	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
267 	inst_mad->U.I.DstReg.Index = tempregi;
268 	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
269 
270 	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
271 	inst_mad->U.I.SrcReg[0].Index = tempregi;
272 	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
273 
274 	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
275 	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
276 
277 	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
278 	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
279 
280 	if (full_vtransform) {
281 		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
282 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
283 	} else {
284 		inst_mad->U.I.SrcReg[1].Index =
285 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
286 	}
287 
288 	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
289 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
290 		unsigned i;
291 
292 		for(i = 0; i < opcode->NumSrcRegs; i++) {
293 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
294 			    inst->U.I.SrcReg[i].Index == wpos) {
295 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
296 				inst->U.I.SrcReg[i].Index = tempregi;
297 			}
298 		}
299 	}
300 }
301 
302 
303 /**
304  * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
305  * Gallium and OpenGL define it the other way around.
306  *
307  * So let's just negate FACE at the beginning of the shader and rewrite the rest
308  * of the shader to read from the newly allocated temporary.
309  */
rc_transform_fragment_face(struct radeon_compiler * c,unsigned face)310 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
311 {
312 	unsigned tempregi = rc_find_free_temporary(c);
313 	struct rc_instruction *inst_add;
314 	struct rc_instruction *inst;
315 
316 	/* perspective divide */
317 	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
318 	inst_add->U.I.Opcode = RC_OPCODE_ADD;
319 
320 	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
321 	inst_add->U.I.DstReg.Index = tempregi;
322 	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
323 
324 	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
325 	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
326 
327 	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
328 	inst_add->U.I.SrcReg[1].Index = face;
329 	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
330 	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
331 
332 	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
333 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
334 		unsigned i;
335 
336 		for(i = 0; i < opcode->NumSrcRegs; i++) {
337 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
338 			    inst->U.I.SrcReg[i].Index == face) {
339 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
340 				inst->U.I.SrcReg[i].Index = tempregi;
341 			}
342 		}
343 	}
344 }
345 
reg_count_callback(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)346 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
347 		rc_register_file file, unsigned int index, unsigned int mask)
348 {
349 	struct rc_program_stats *s = userdata;
350 	if (file == RC_FILE_TEMPORARY)
351 		(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
352 	if (file == RC_FILE_INLINE)
353 		s->num_inline_literals++;
354 	if (file == RC_FILE_CONSTANT)
355 		s->num_consts = MAX2(s->num_consts, index + 1);
356 }
357 
rc_get_stats(struct radeon_compiler * c,struct rc_program_stats * s)358 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
359 {
360 	struct rc_instruction * tmp;
361 	memset(s, 0, sizeof(*s));
362 	unsigned ip = 0;
363 	int last_begintex = -1;
364 
365 	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
366 							tmp = tmp->Next, ip++){
367 		const struct rc_opcode_info * info;
368 		rc_for_all_reads_mask(tmp, reg_count_callback, s);
369 		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
370 			info = rc_get_opcode_info(tmp->U.I.Opcode);
371 			if (info->Opcode == RC_OPCODE_BEGIN_TEX) {
372 				/* The R5xx docs mention ~30 cycles in section 8.3.1
373 				 * The only case when we don't want to add the cycles
374 				 * penalty is when the texblock contains only kil.
375 				 */
376 				const struct rc_opcode_info *next_op
377 					= rc_get_opcode_info(tmp->Next->U.I.Opcode);
378 				struct rc_instruction *second_next_instr = tmp->Next->Next;
379 				const struct rc_opcode_info *second_next_op;
380 				if (second_next_instr->Type == RC_INSTRUCTION_NORMAL) {
381 					second_next_op = rc_get_opcode_info(second_next_instr->U.I.Opcode);
382 				} else {
383 					second_next_op = rc_get_opcode_info(second_next_instr->U.P.RGB.Opcode);
384 				}
385 				if (next_op->Opcode != RC_OPCODE_KIL ||
386 					(second_next_instr->Type == RC_INSTRUCTION_NORMAL &&
387 					 second_next_op->HasTexture)) {
388 					s->num_cycles += 30;
389 					last_begintex = ip;
390 				}
391 				continue;
392 			}
393 			if (info->Opcode == RC_OPCODE_MAD &&
394 				rc_inst_has_three_diff_temp_srcs(tmp))
395 				s->num_cycles++;
396 		} else {
397 			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
398 				s->num_presub_ops++;
399 			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
400 				s->num_presub_ops++;
401 			/* Assuming alpha will never be a flow control or
402 			 * a tex instruction. */
403 			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
404 				s->num_alpha_insts++;
405 			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
406 				s->num_rgb_insts++;
407 			if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
408 				tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
409 				s->num_omod_ops++;
410 			}
411 			if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
412 				tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
413 				s->num_omod_ops++;
414 			}
415 			if (tmp->U.P.Nop)
416 				s->num_cycles++;
417 			/* SemWait has effect only on R500, the more instructions we can put
418 			 * between the tex block and the first texture semaphore, the better.
419 			 */
420 			if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) {
421 				s->num_cycles -= MIN2(30, ip - last_begintex);
422 				last_begintex = -1;
423 			}
424 			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
425 		}
426 		if (info->IsFlowControl) {
427 			s->num_fc_insts++;
428 			if (info->Opcode == RC_OPCODE_BGNLOOP)
429 				s->num_loops++;
430 		}
431 		/* VS flow control was already translated to the predicate instructions */
432 		if (c->type == RC_VERTEX_PROGRAM)
433 			if (strstr(info->Name, "PRED") != NULL)
434 				s->num_pred_insts++;
435 
436 		if (info->HasTexture)
437 			s->num_tex_insts++;
438 		s->num_insts++;
439 		s->num_cycles++;
440 	}
441 	/* Increment here because the reg_count_callback store the max
442 	 * temporary reg index in s->nun_temp_regs. */
443 	s->num_temp_regs++;
444 }
445 
print_stats(struct radeon_compiler * c)446 static void print_stats(struct radeon_compiler * c)
447 {
448 	struct rc_program_stats s;
449 
450 	rc_get_stats(c, &s);
451 
452 	/* Note that we print some dummy values for instruction categories that
453 	 * only the FS has, because shader-db's report.py wants all shaders to
454 	 * have the same set.
455 	 */
456 	util_debug_message(c->debug, SHADER_INFO,
457 	                   "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, "
458 	                   "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles",
459 	                   c->type == RC_VERTEX_PROGRAM ? "VS" : "FS",
460 	                   s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts,
461 	                   s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
462 	                   s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals,
463 	                   s.num_cycles);
464 }
465 
466 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
467 	"Vertex Program",
468 	"Fragment Program"
469 };
470 
rc_run_compiler_passes(struct radeon_compiler * c,struct radeon_compiler_pass * list)471 bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
472 {
473 	for (unsigned i = 0; list[i].name; i++) {
474 		if (list[i].predicate) {
475 			list[i].run(c, list[i].user);
476 
477 			if (c->Error)
478 				return false;
479 
480 			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
481 				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
482 				rc_print_program(&c->Program);
483 			}
484 		}
485 	}
486 	return true;
487 }
488 
489 /* Executes a list of compiler passes given in the parameter 'list'. */
rc_run_compiler(struct radeon_compiler * c,struct radeon_compiler_pass * list)490 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
491 {
492 	if (c->Debug & RC_DBG_LOG) {
493 		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
494 		rc_print_program(&c->Program);
495 	}
496 
497 	if(rc_run_compiler_passes(c, list)) {
498 		print_stats(c);
499 	}
500 }
501 
rc_validate_final_shader(struct radeon_compiler * c,void * user)502 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
503 {
504 	/* Check the number of constants. */
505 	if (c->Program.Constants.Count > c->max_constants) {
506 		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
507 			 c->max_constants, c->Program.Constants.Count);
508 	}
509 }
510