xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2005 Ben Skeggs.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /**
7  * \file
8  *
9  * Emit the r300_fragment_program_code that can be understood by the hardware.
10  * Input is a pre-transformed radeon_program.
11  *
12  * \author Ben Skeggs <[email protected]>
13  *
14  * \author Jerome Glisse <[email protected]>
15  */
16 
17 #include "r300_fragprog.h"
18 
19 #include "r300_reg.h"
20 
21 #include "radeon_program_pair.h"
22 #include "r300_fragprog_swizzle.h"
23 
24 #include "util/compiler.h"
25 
26 
27 struct r300_emit_state {
28 	struct r300_fragment_program_compiler * compiler;
29 
30 	unsigned current_node : 2;
31 	unsigned node_first_tex : 8;
32 	unsigned node_first_alu : 8;
33 	uint32_t node_flags;
34 };
35 
36 #define PROG_CODE \
37 	struct r300_fragment_program_compiler *c = emit->compiler; \
38 	struct r300_fragment_program_code *code = &c->code->code.r300
39 
40 #define error(fmt, args...) do {			\
41 		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
42 			__FILE__, __func__, ##args);	\
43 	} while(0)
44 
get_msbs_alu(unsigned int bits)45 static unsigned int get_msbs_alu(unsigned int bits)
46 {
47 	return (bits >> 6) & 0x7;
48 }
49 
50 /**
51  * @param lsbs The number of least significant bits
52  */
get_msbs_tex(unsigned int bits,unsigned int lsbs)53 static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
54 {
55 	return (bits >> lsbs) & 0x15;
56 }
57 
58 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
59 
60 /**
61  * Mark a temporary register as used.
62  */
use_temporary(struct r300_fragment_program_code * code,unsigned int index)63 static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
64 {
65 	if (index > code->pixsize)
66 		code->pixsize = index;
67 }
68 
use_source(struct r300_fragment_program_code * code,struct rc_pair_instruction_source src)69 static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
70 {
71 	if (!src.Used)
72 		return 0;
73 
74 	if (src.File == RC_FILE_CONSTANT) {
75 		return src.Index | (1 << 5);
76 	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
77 		use_temporary(code, src.Index);
78 		return src.Index & 0x1f;
79 	}
80 
81 	return 0;
82 }
83 
84 
translate_rgb_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)85 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
86 {
87 	switch(opcode) {
88 	case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
89 	case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
90 	case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
91 	case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
92 	case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
93 	default:
94 		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
95 		FALLTHROUGH;
96 	case RC_OPCODE_NOP:
97 		FALLTHROUGH;
98 	case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
99 	case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
100 	case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
101 	case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
102 	}
103 }
104 
translate_alpha_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)105 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
106 {
107 	switch(opcode) {
108 	case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
109 	case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
110 	case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
111 	case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
112 	case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
113 	case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
114 	case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
115 	default:
116 		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
117 		FALLTHROUGH;
118 	case RC_OPCODE_NOP:
119 		FALLTHROUGH;
120 	case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
121 	case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
122 	case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
123 	case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
124 	case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
125 	}
126 }
127 
128 /**
129  * Emit one paired ALU instruction.
130  */
emit_alu(struct r300_emit_state * emit,struct rc_pair_instruction * inst)131 static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
132 {
133 	int ip;
134 	int j;
135 	PROG_CODE;
136 
137 	if (code->alu.length >= c->Base.max_alu_insts) {
138 		/* rc_recompute_ips does not give an exact count, because it counts extra stuff
139 		 * like BEGINTEX, but here it is intended to be only approximative anyway,
140 		 * just to give some idea how close to the limit we are. */
141 		rc_error(&c->Base, "Too many ALU instructions used: %u, max: %u.\n",
142 		         rc_recompute_ips(&c->Base), c->Base.max_alu_insts);
143 		return 0;
144 	}
145 
146 	ip = code->alu.length++;
147 
148 	code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
149 	code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
150 
151 	for(j = 0; j < 3; ++j) {
152 		/* Set the RGB address */
153 		unsigned int src = use_source(code, inst->RGB.Src[j]);
154 		unsigned int arg;
155 		if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
156 			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
157 
158 		code->alu.inst[ip].rgb_addr |= src << (6*j);
159 
160 		/* Set the Alpha address */
161 		src = use_source(code, inst->Alpha.Src[j]);
162 		if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
163 			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
164 
165 		code->alu.inst[ip].alpha_addr |= src << (6*j);
166 
167 		arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
168 		arg |= inst->RGB.Arg[j].Abs << 6;
169 		arg |= inst->RGB.Arg[j].Negate << 5;
170 		code->alu.inst[ip].rgb_inst |= arg << (7*j);
171 
172 		arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
173 		arg |= inst->Alpha.Arg[j].Abs << 6;
174 		arg |= inst->Alpha.Arg[j].Negate << 5;
175 		code->alu.inst[ip].alpha_inst |= arg << (7*j);
176 	}
177 
178 	/* Presubtract */
179 	if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
180 		switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
181 		case RC_PRESUB_BIAS:
182 			code->alu.inst[ip].rgb_inst |=
183 						R300_ALU_SRCP_1_MINUS_2_SRC0;
184 			break;
185 		case RC_PRESUB_ADD:
186 			code->alu.inst[ip].rgb_inst |=
187 						R300_ALU_SRCP_SRC1_PLUS_SRC0;
188 			break;
189 		case RC_PRESUB_SUB:
190 			code->alu.inst[ip].rgb_inst |=
191 						R300_ALU_SRCP_SRC1_MINUS_SRC0;
192 			break;
193 		case RC_PRESUB_INV:
194 			code->alu.inst[ip].rgb_inst |=
195 						R300_ALU_SRCP_1_MINUS_SRC0;
196 			break;
197 		default:
198 			break;
199 		}
200 	}
201 
202 	if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
203 		switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
204 		case RC_PRESUB_BIAS:
205 			code->alu.inst[ip].alpha_inst |=
206 						R300_ALU_SRCP_1_MINUS_2_SRC0;
207 			break;
208 		case RC_PRESUB_ADD:
209 			code->alu.inst[ip].alpha_inst |=
210 						R300_ALU_SRCP_SRC1_PLUS_SRC0;
211 			break;
212 		case RC_PRESUB_SUB:
213 			code->alu.inst[ip].alpha_inst |=
214 						R300_ALU_SRCP_SRC1_MINUS_SRC0;
215 			break;
216 		case RC_PRESUB_INV:
217 			code->alu.inst[ip].alpha_inst |=
218 						R300_ALU_SRCP_1_MINUS_SRC0;
219 			break;
220 		default:
221 			break;
222 		}
223 	}
224 
225 	if (inst->RGB.Saturate)
226 		code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
227 	if (inst->Alpha.Saturate)
228 		code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
229 
230 	if (inst->RGB.WriteMask) {
231 		use_temporary(code, inst->RGB.DestIndex);
232 		if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
233 			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
234 		code->alu.inst[ip].rgb_addr |=
235 			((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
236 			(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
237 	}
238 	if (inst->RGB.OutputWriteMask) {
239 		code->alu.inst[ip].rgb_addr |=
240             (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
241             R300_RGB_TARGET(inst->RGB.Target);
242 		emit->node_flags |= R300_RGBA_OUT;
243 	}
244 
245 	if (inst->Alpha.WriteMask) {
246 		use_temporary(code, inst->Alpha.DestIndex);
247 		if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
248 			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
249 		code->alu.inst[ip].alpha_addr |=
250 			((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
251 			R300_ALU_DSTA_REG;
252 	}
253 	if (inst->Alpha.OutputWriteMask) {
254 		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
255             R300_ALPHA_TARGET(inst->Alpha.Target);
256 		emit->node_flags |= R300_RGBA_OUT;
257 	}
258 	if (inst->Alpha.DepthWriteMask) {
259 		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
260 		emit->node_flags |= R300_W_OUT;
261 		c->code->writes_depth = 1;
262 	}
263 	if (inst->Nop)
264 		code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
265 
266 	/* Handle Output Modifier
267 	 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
268 	if (inst->RGB.Omod) {
269 		if (inst->RGB.Omod == RC_OMOD_DISABLE) {
270 			rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
271 		}
272 		code->alu.inst[ip].rgb_inst |=
273 			(inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
274 	}
275 	if (inst->Alpha.Omod) {
276 		if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
277 			rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
278 		}
279 		code->alu.inst[ip].alpha_inst |=
280 			(inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
281 	}
282 	return 1;
283 }
284 
285 
286 /**
287  * Finish the current node without advancing to the next one.
288  */
finish_node(struct r300_emit_state * emit)289 static int finish_node(struct r300_emit_state * emit)
290 {
291 	struct r300_fragment_program_compiler * c = emit->compiler;
292 	struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
293 	unsigned alu_offset;
294 	unsigned alu_end;
295 	unsigned tex_offset;
296 	unsigned tex_end;
297 
298 	unsigned int alu_offset_msbs, alu_end_msbs;
299 
300 	if (code->alu.length == emit->node_first_alu) {
301 		/* Generate a single NOP for this node */
302 		struct rc_pair_instruction inst;
303 		memset(&inst, 0, sizeof(inst));
304 		if (!emit_alu(emit, &inst))
305 			return 0;
306 	}
307 
308 	alu_offset = emit->node_first_alu;
309 	alu_end = code->alu.length - alu_offset - 1;
310 	tex_offset = emit->node_first_tex;
311 	tex_end = code->tex.length - tex_offset - 1;
312 
313 	if (code->tex.length == emit->node_first_tex) {
314 		if (emit->current_node > 0) {
315 			error("Node %i has no TEX instructions", emit->current_node);
316 			return 0;
317 		}
318 
319 		tex_end = 0;
320 	} else {
321 		if (emit->current_node == 0)
322 			code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
323 	}
324 
325 	/* Write the config register.
326 	 * Note: The order in which the words for each node are written
327 	 * is not correct here and needs to be fixed up once we're entirely
328 	 * done
329 	 *
330 	 * Also note that the register specification from AMD is slightly
331 	 * incorrect in its description of this register. */
332 	code->code_addr[emit->current_node]  =
333 			((alu_offset << R300_ALU_START_SHIFT)
334 				& R300_ALU_START_MASK)
335 			| ((alu_end << R300_ALU_SIZE_SHIFT)
336 				& R300_ALU_SIZE_MASK)
337 			| ((tex_offset << R300_TEX_START_SHIFT)
338 				& R300_TEX_START_MASK)
339 			| ((tex_end << R300_TEX_SIZE_SHIFT)
340 				& R300_TEX_SIZE_MASK)
341 			| emit->node_flags
342 			| (get_msbs_tex(tex_offset, 5)
343 				<< R400_TEX_START_MSB_SHIFT)
344 			| (get_msbs_tex(tex_end, 5)
345 				<< R400_TEX_SIZE_MSB_SHIFT)
346 			;
347 
348 	/* Write r400 extended instruction fields.  These will be ignored on
349 	 * r300 cards.  */
350 	alu_offset_msbs = get_msbs_alu(alu_offset);
351 	alu_end_msbs = get_msbs_alu(alu_end);
352 	switch(emit->current_node) {
353 	case 0:
354 		code->r400_code_offset_ext |=
355 			alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
356 			| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
357 		break;
358 	case 1:
359 		code->r400_code_offset_ext |=
360 			alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
361 			| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
362 		break;
363 	case 2:
364 		code->r400_code_offset_ext |=
365 			alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
366 			| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
367 		break;
368 	case 3:
369 		code->r400_code_offset_ext |=
370 			alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
371 			| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
372 		break;
373 	}
374 	return 1;
375 }
376 
377 
378 /**
379  * Begin a block of texture instructions.
380  * Create the necessary indirection.
381  */
begin_tex(struct r300_emit_state * emit)382 static int begin_tex(struct r300_emit_state * emit)
383 {
384 	PROG_CODE;
385 
386 	if (code->alu.length == emit->node_first_alu &&
387 	    code->tex.length == emit->node_first_tex) {
388 		return 1;
389 	}
390 
391 	if (emit->current_node == 3) {
392 		error("Too many texture indirections");
393 		return 0;
394 	}
395 
396 	if (!finish_node(emit))
397 		return 0;
398 
399 	emit->current_node++;
400 	emit->node_first_tex = code->tex.length;
401 	emit->node_first_alu = code->alu.length;
402 	emit->node_flags = 0;
403 	return 1;
404 }
405 
406 
emit_tex(struct r300_emit_state * emit,struct rc_instruction * inst)407 static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
408 {
409 	unsigned int unit;
410 	unsigned int dest;
411 	unsigned int opcode;
412 	PROG_CODE;
413 
414 	if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
415 		error("Too many TEX instructions");
416 		return 0;
417 	}
418 
419 	unit = inst->U.I.TexSrcUnit;
420 	dest = inst->U.I.DstReg.Index;
421 
422 	switch(inst->U.I.Opcode) {
423 	case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
424 	case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
425 	case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
426 	case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
427 	default:
428 		error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
429 		return 0;
430 	}
431 
432 	if (inst->U.I.Opcode == RC_OPCODE_KIL) {
433 		unit = 0;
434 		dest = 0;
435 	} else {
436 		use_temporary(code, dest);
437 	}
438 
439 	use_temporary(code, inst->U.I.SrcReg[0].Index);
440 
441 	code->tex.inst[code->tex.length++] =
442 		((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
443 			& R300_SRC_ADDR_MASK)
444 		| ((dest << R300_DST_ADDR_SHIFT)
445 			& R300_DST_ADDR_MASK)
446 		| (unit << R300_TEX_ID_SHIFT)
447 		| (opcode << R300_TEX_INST_SHIFT)
448 		| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
449 			R400_SRC_ADDR_EXT_BIT : 0)
450 		| (dest >= R300_PFS_NUM_TEMP_REGS ?
451 			R400_DST_ADDR_EXT_BIT : 0)
452 		;
453 	return 1;
454 }
455 
456 
457 /**
458  * Final compilation step: Turn the intermediate radeon_program into
459  * machine-readable instructions.
460  */
r300BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)461 void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
462 {
463 	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
464 	struct r300_emit_state emit;
465 	struct r300_fragment_program_code *code = &compiler->code->code.r300;
466 	unsigned int tex_end;
467 
468 	memset(&emit, 0, sizeof(emit));
469 	emit.compiler = compiler;
470 
471 	memset(code, 0, sizeof(struct r300_fragment_program_code));
472 
473 	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
474 	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
475 	    inst = inst->Next) {
476 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
477 			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
478 				begin_tex(&emit);
479 				continue;
480 			}
481 
482 			emit_tex(&emit, inst);
483 		} else {
484 			emit_alu(&emit, &inst->U.P);
485 		}
486 	}
487 
488 	if (code->pixsize >= compiler->Base.max_temp_regs)
489 		rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
490 
491 	if (compiler->Base.Error)
492 		return;
493 
494 	/* Finish the program */
495 	finish_node(&emit);
496 
497 	code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
498 
499 	/* Set r400 extended instruction fields.  These values will be ignored
500 	 * on r300 cards. */
501 	code->r400_code_offset_ext |=
502 		(get_msbs_alu(0)
503 				<< R400_ALU_OFFSET_MSB_SHIFT)
504 		| (get_msbs_alu(code->alu.length - 1)
505 				<< R400_ALU_SIZE_MSB_SHIFT);
506 
507 	tex_end = code->tex.length ? code->tex.length - 1 : 0;
508 	code->code_offset =
509 		((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
510 			& R300_PFS_CNTL_ALU_OFFSET_MASK)
511 		| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
512 			& R300_PFS_CNTL_ALU_END_MASK)
513 		| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
514 			& R300_PFS_CNTL_TEX_OFFSET_MASK)
515 		| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
516 			& R300_PFS_CNTL_TEX_END_MASK)
517 		| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
518 		| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
519 		;
520 
521 	if (emit.current_node < 3) {
522 		int shift = 3 - emit.current_node;
523 		int i;
524 		for(i = emit.current_node; i >= 0; --i)
525 			code->code_addr[shift + i] = code->code_addr[i];
526 		for(i = 0; i < shift; ++i)
527 			code->code_addr[i] = 0;
528 	}
529 
530 	if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
531 	    || code->alu.length > R300_PFS_MAX_ALU_INST
532 	    || code->tex.length > R300_PFS_MAX_TEX_INST) {
533 
534 		code->r390_mode = 1;
535 	}
536 }
537