xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2005 Ben Skeggs.
3  * Copyright 2008 Corbin Simpson <[email protected]>
4  * SPDX-License-Identifier: MIT
5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6  */
7 /**
8  * \file
9  *
10  * \author Ben Skeggs <[email protected]>
11  *
12  * \author Jerome Glisse <[email protected]>
13  *
14  * \author Corbin Simpson <[email protected]>
15  *
16  */
17 
18 #include "r500_fragprog.h"
19 
20 #include "r300_reg.h"
21 
22 #include "radeon_program_pair.h"
23 
24 #include "util/compiler.h"
25 
26 #define PROG_CODE \
27 	struct r500_fragment_program_code *code = &c->code->code.r500
28 
29 #define error(fmt, args...) do {			\
30 		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
31 			__FILE__, __func__, ##args);	\
32 	} while(0)
33 
34 
35 struct branch_info {
36 	int If;
37 	int Else;
38 	int Endif;
39 };
40 
41 struct r500_loop_info {
42 	int BgnLoop;
43 
44 	int BranchDepth;
45 	int * Brks;
46 	int BrkCount;
47 	int BrkReserved;
48 
49 	int * Conts;
50 	int ContCount;
51 	int ContReserved;
52 };
53 
54 struct emit_state {
55 	struct radeon_compiler * C;
56 	struct r500_fragment_program_code * Code;
57 
58 	struct branch_info * Branches;
59 	unsigned int CurrentBranchDepth;
60 	unsigned int BranchesReserved;
61 
62 	struct r500_loop_info * Loops;
63 	unsigned int CurrentLoopDepth;
64 	unsigned int LoopsReserved;
65 
66 	unsigned int MaxBranchDepth;
67 
68 };
69 
translate_rgb_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)70 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
71 {
72 	switch(opcode) {
73 	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
74 	case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
75 	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
76 	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
77 	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
78 	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
79 	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
80 	default:
81 		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
82 		FALLTHROUGH;
83 	case RC_OPCODE_NOP:
84 		FALLTHROUGH;
85 	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
86 	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
87 	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
88 	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
89 	}
90 }
91 
translate_alpha_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)92 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
93 {
94 	switch(opcode) {
95 	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
96 	case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
97 	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
98 	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
99 	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
100 	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
101 	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
102 	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
103 	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
104 	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
105 	default:
106 		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
107 		FALLTHROUGH;
108 	case RC_OPCODE_NOP:
109 		FALLTHROUGH;
110 	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
111 	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
112 	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
113 	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
114 	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
115 	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
116 	}
117 }
118 
fix_hw_swizzle(unsigned int swz)119 static unsigned int fix_hw_swizzle(unsigned int swz)
120 {
121     switch (swz) {
122         case RC_SWIZZLE_ZERO:
123         case RC_SWIZZLE_UNUSED:
124             swz = 4;
125             break;
126         case RC_SWIZZLE_HALF:
127             swz = 5;
128             break;
129         case RC_SWIZZLE_ONE:
130             swz = 6;
131             break;
132     }
133 
134 	return swz;
135 }
136 
translate_arg_rgb(struct rc_pair_instruction * inst,int arg)137 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
138 {
139 	unsigned int t = inst->RGB.Arg[arg].Source;
140 	int comp;
141 	t |= inst->RGB.Arg[arg].Negate << 11;
142 	t |= inst->RGB.Arg[arg].Abs << 12;
143 
144 	for(comp = 0; comp < 3; ++comp)
145 		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
146 
147 	return t;
148 }
149 
translate_arg_alpha(struct rc_pair_instruction * inst,int i)150 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
151 {
152 	unsigned int t = inst->Alpha.Arg[i].Source;
153 	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
154 	t |= inst->Alpha.Arg[i].Negate << 5;
155 	t |= inst->Alpha.Arg[i].Abs << 6;
156 	return t;
157 }
158 
translate_alu_result_op(struct r300_fragment_program_compiler * c,rc_compare_func func)159 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
160 {
161 	switch(func) {
162 	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
163 	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
164 	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
165 	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
166 	default:
167 		rc_error(&c->Base, "%s: unsupported compare func %i\n", __func__, func);
168 		return 0;
169 	}
170 }
171 
use_temporary(struct r500_fragment_program_code * code,unsigned int index)172 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
173 {
174 	if (index > code->max_temp_idx)
175 		code->max_temp_idx = index;
176 }
177 
use_source(struct r500_fragment_program_code * code,struct rc_pair_instruction_source src)178 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
179 {
180 	/* From docs:
181 	 *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
182 	 * MSB = 1 << 7 */
183 	if (!src.Used)
184 		return 1 << 7;
185 
186 	if (src.File == RC_FILE_CONSTANT) {
187 		return src.Index | R500_RGB_ADDR0_CONST;
188 	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
189 		use_temporary(code, src.Index);
190 		return src.Index;
191 	} else if (src.File == RC_FILE_INLINE) {
192 		return src.Index | (1 << 7);
193 	}
194 
195 	return 0;
196 }
197 
198 /**
199  * NOP the specified instruction if it is not a texture lookup.
200  */
alu_nop(struct r300_fragment_program_compiler * c,int ip)201 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
202 {
203 	PROG_CODE;
204 
205 	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
206 		code->inst[ip].inst0 |= R500_INST_NOP;
207 	}
208 }
209 
210 /**
211  * Emit a paired ALU instruction.
212  */
emit_paired(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * inst)213 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
214 {
215 	int ip;
216 	PROG_CODE;
217 
218 	if (code->inst_end >= c->Base.max_alu_insts-1) {
219 		error("emit_alu: Too many instructions");
220 		return;
221 	}
222 
223 	ip = ++code->inst_end;
224 
225 	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
226 	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
227 		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
228 		if (ip > 0) {
229 			alu_nop(c, ip - 1);
230 		}
231 	}
232 
233 	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
234 	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
235 
236 	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
237 		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
238 		if (inst->WriteALUResult) {
239 			error("Cannot write output and ALU result at the same time");
240 			return;
241 		}
242 	} else {
243 		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
244 	}
245 	code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
246 
247 	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
248 	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
249 	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
250 	if (inst->Nop) {
251 		code->inst[ip].inst0 |= R500_INST_NOP;
252 	}
253 	if (inst->Alpha.DepthWriteMask) {
254 		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
255 		c->code->writes_depth = 1;
256 	}
257 
258 	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
259 	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
260 	if (inst->Alpha.WriteMask)
261 		use_temporary(code, inst->Alpha.DestIndex);
262 	if (inst->RGB.WriteMask)
263 		use_temporary(code, inst->RGB.DestIndex);
264 
265 	if (inst->RGB.Saturate)
266 		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
267 	if (inst->Alpha.Saturate)
268 		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
269 
270 	/* Set the presubtract operation. */
271 	switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
272 		case RC_PRESUB_BIAS:
273 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
274 			break;
275 		case RC_PRESUB_SUB:
276 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
277 			break;
278 		case RC_PRESUB_ADD:
279 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
280 			break;
281 		case RC_PRESUB_INV:
282 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
283 			break;
284 		default:
285 			break;
286 	}
287 	switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
288 		case RC_PRESUB_BIAS:
289 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
290 			break;
291 		case RC_PRESUB_SUB:
292 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
293 			break;
294 		case RC_PRESUB_ADD:
295 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
296 			break;
297 		case RC_PRESUB_INV:
298 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
299 			break;
300 		default:
301 			break;
302 	}
303 
304 	/* Set the output modifier */
305 	code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
306 	code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
307 
308 	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
309 	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
310 	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
311 
312 	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
313 	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
314 	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
315 
316 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
317 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
318 	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
319 
320 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
321 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
322 	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
323 
324 	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
325 	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
326 
327 	if (inst->WriteALUResult) {
328 		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
329 
330 		if (inst->WriteALUResult == RC_ALURESULT_X)
331 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
332 		else
333 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
334 
335 		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
336 	}
337 }
338 
translate_strq_swizzle(unsigned int swizzle)339 static unsigned int translate_strq_swizzle(unsigned int swizzle)
340 {
341 	unsigned int swiz = 0;
342 	int i;
343 	for (i = 0; i < 4; i++)
344 		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
345 	return swiz;
346 }
347 
348 /**
349  * Emit a single TEX instruction
350  */
emit_tex(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)351 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
352 {
353 	int ip;
354 	PROG_CODE;
355 
356 	if (code->inst_end >= c->Base.max_alu_insts-1) {
357 		error("emit_tex: Too many instructions");
358 		return 0;
359 	}
360 
361 	ip = ++code->inst_end;
362 
363 	code->inst[ip].inst0 = R500_INST_TYPE_TEX
364 		| (inst->DstReg.WriteMask << 11)
365 		| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
366 	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
367 		| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
368 
369 	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
370 		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
371 
372 	switch (inst->Opcode) {
373 	case RC_OPCODE_KIL:
374 		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
375 		break;
376 	case RC_OPCODE_TEX:
377 		code->inst[ip].inst1 |= R500_TEX_INST_LD;
378 		break;
379 	case RC_OPCODE_TXB:
380 		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
381 		break;
382 	case RC_OPCODE_TXP:
383 		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
384 		break;
385 	case RC_OPCODE_TXD:
386 		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
387 		break;
388 	case RC_OPCODE_TXL:
389 		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
390 		break;
391 	default:
392 		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
393 	}
394 
395 	use_temporary(code, inst->SrcReg[0].Index);
396 	if (inst->Opcode != RC_OPCODE_KIL)
397 		use_temporary(code, inst->DstReg.Index);
398 
399 	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
400 		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
401 		| R500_TEX_DST_ADDR(inst->DstReg.Index)
402 		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
403 		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
404 		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
405 		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
406 		;
407 
408 	if (inst->Opcode == RC_OPCODE_TXD) {
409 		use_temporary(code, inst->SrcReg[1].Index);
410 		use_temporary(code, inst->SrcReg[2].Index);
411 
412 		/* DX and DY parameters are specified in a separate register. */
413 		code->inst[ip].inst3 =
414 			R500_DX_ADDR(inst->SrcReg[1].Index) |
415 			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
416 			R500_DY_ADDR(inst->SrcReg[2].Index) |
417 			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
418 	}
419 
420 	return 1;
421 }
422 
emit_flowcontrol(struct emit_state * s,struct rc_instruction * inst)423 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
424 {
425 	unsigned int newip;
426 
427 	if (s->Code->inst_end >= s->C->max_alu_insts-1) {
428 		rc_error(s->C, "emit_tex: Too many instructions");
429 		return;
430 	}
431 
432 	newip = ++s->Code->inst_end;
433 
434 	/* Currently all loops use the same integer constant to initialize
435 	 * the loop variables. */
436 	if(!s->Code->int_constants[0]) {
437 		s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
438 		s->Code->int_constant_count = 1;
439 	}
440 	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
441 	s->Code->inst[newip].inst0 |= (inst->U.I.TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
442 
443 	switch(inst->U.I.Opcode){
444 	struct branch_info * branch;
445 	struct r500_loop_info * loop;
446 	case RC_OPCODE_BGNLOOP:
447 		memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
448 			s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
449 
450 		loop = &s->Loops[s->CurrentLoopDepth++];
451 		memset(loop, 0, sizeof(struct r500_loop_info));
452 		loop->BranchDepth = s->CurrentBranchDepth;
453 		loop->BgnLoop = newip;
454 
455 		s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
456 			| R500_FC_JUMP_FUNC(0x00)
457 			| R500_FC_IGNORE_UNCOVERED
458 			;
459 		break;
460 	case RC_OPCODE_BRK:
461 		loop = &s->Loops[s->CurrentLoopDepth - 1];
462 		memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
463 					loop->BrkCount, loop->BrkReserved, 1);
464 
465 		loop->Brks[loop->BrkCount++] = newip;
466 		s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
467 			| R500_FC_JUMP_FUNC(0xff)
468 			| R500_FC_B_OP1_DECR
469 			| R500_FC_B_POP_CNT(
470 				s->CurrentBranchDepth - loop->BranchDepth)
471 			| R500_FC_IGNORE_UNCOVERED
472 			;
473 		break;
474 
475 	case RC_OPCODE_CONT:
476 		loop = &s->Loops[s->CurrentLoopDepth - 1];
477 		memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
478 					loop->ContCount, loop->ContReserved, 1);
479 		loop->Conts[loop->ContCount++] = newip;
480 		s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
481 			| R500_FC_JUMP_FUNC(0xff)
482 			| R500_FC_B_OP1_DECR
483 			| R500_FC_B_POP_CNT(
484 				s->CurrentBranchDepth -	loop->BranchDepth)
485 			| R500_FC_IGNORE_UNCOVERED
486 			;
487 		break;
488 
489 	case RC_OPCODE_ENDLOOP:
490 	{
491 		loop = &s->Loops[s->CurrentLoopDepth - 1];
492 		/* Emit ENDLOOP */
493 		s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
494 			| R500_FC_JUMP_FUNC(0xff)
495 			| R500_FC_JUMP_ANY
496 			| R500_FC_IGNORE_UNCOVERED
497 			;
498 		/* The constant integer at index 0 is used by all loops. */
499 		s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
500 			| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
501 			;
502 
503 		/* Set jump address and int constant for BGNLOOP */
504 		s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
505 			| R500_FC_JUMP_ADDR(newip)
506 			;
507 
508 		/* Set jump address for the BRK instructions. */
509 		while(loop->BrkCount--) {
510 			s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
511 						R500_FC_JUMP_ADDR(newip + 1);
512 		}
513 
514 		/* Set jump address for CONT instructions. */
515 		while(loop->ContCount--) {
516 			s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
517 						R500_FC_JUMP_ADDR(newip);
518 		}
519 		s->CurrentLoopDepth--;
520 		break;
521 	}
522 	case RC_OPCODE_IF:
523 		if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
524 			rc_error(s->C, "Branch depth exceeds hardware limit");
525 			return;
526 		}
527 		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
528 				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
529 
530 		branch = &s->Branches[s->CurrentBranchDepth++];
531 		branch->If = newip;
532 		branch->Else = -1;
533 		branch->Endif = -1;
534 
535 		if (s->CurrentBranchDepth > s->MaxBranchDepth)
536 			s->MaxBranchDepth = s->CurrentBranchDepth;
537 
538 		/* actual instruction is filled in at ENDIF time */
539 		break;
540 
541 	case RC_OPCODE_ELSE:
542 		if (!s->CurrentBranchDepth) {
543 			rc_error(s->C, "%s: got ELSE outside a branch", __func__);
544 			return;
545 		}
546 
547 		branch = &s->Branches[s->CurrentBranchDepth - 1];
548 		branch->Else = newip;
549 
550 		/* actual instruction is filled in at ENDIF time */
551 		break;
552 
553 	case RC_OPCODE_ENDIF:
554 		if (!s->CurrentBranchDepth) {
555 			rc_error(s->C, "%s: got ELSE outside a branch", __func__);
556 			return;
557 		}
558 
559 		branch = &s->Branches[s->CurrentBranchDepth - 1];
560 		branch->Endif = newip;
561 
562 		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
563 			| R500_FC_A_OP_NONE /* no address stack */
564 			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
565 			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
566 			| R500_FC_B_OP1_NONE /* no branch counter if stay */
567 			| R500_FC_B_POP_CNT(1)
568 			;
569 		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
570 		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
571 			| R500_FC_A_OP_NONE /* no address stack */
572 			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
573 			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
574 			| R500_FC_IGNORE_UNCOVERED
575 		;
576 
577 		if (branch->Else >= 0) {
578 			/* increment branch counter also if jump */
579 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
580 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
581 
582 			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
583 				| R500_FC_A_OP_NONE /* no address stack */
584 				| R500_FC_B_ELSE /* all active pixels want to jump */
585 				| R500_FC_B_OP0_NONE /* no counter op if stay */
586 				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
587 				| R500_FC_B_POP_CNT(1)
588 			;
589 			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
590 		} else {
591 			/* don't touch branch counter on jump */
592 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
593 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
594 		}
595 
596 
597 		s->CurrentBranchDepth--;
598 		break;
599 	default:
600 		rc_error(s->C, "%s: unknown opcode %s\n", __func__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
601 	}
602 }
603 
r500BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)604 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
605 {
606 	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
607 	struct emit_state s;
608 	struct r500_fragment_program_code *code = &compiler->code->code.r500;
609 
610 	memset(&s, 0, sizeof(s));
611 	s.C = &compiler->Base;
612 	s.Code = code;
613 
614 	memset(code, 0, sizeof(*code));
615 	code->max_temp_idx = 1;
616 	code->inst_end = -1;
617 
618 	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
619 	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
620 	    inst = inst->Next) {
621 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
622 			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
623 
624 			if (opcode->IsFlowControl) {
625 				emit_flowcontrol(&s, inst);
626 			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
627 				continue;
628 			} else {
629 				emit_tex(compiler, &inst->U.I);
630 			}
631 		} else {
632 			emit_paired(compiler, &inst->U.P);
633 		}
634 	}
635 
636 	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
637 		rc_error(&compiler->Base, "Too many hardware temporaries used\n");
638 
639 	if (compiler->Base.Error)
640 		return;
641 
642 	if (code->inst_end == -1 ||
643 	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
644 		int ip;
645 
646 		/* This may happen when dead-code elimination is disabled or
647 		 * when most of the fragment program logic is leading to a KIL */
648 		if (code->inst_end >= compiler->Base.max_alu_insts-1) {
649 			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
650 			return;
651 		}
652 
653 		ip = ++code->inst_end;
654 		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
655 	}
656 
657 	/* Make sure TEX_SEM_WAIT is set on the last instruction */
658 	code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
659 
660 	/* Enable full flow control mode if we are using loops or have if
661 	 * statements nested at least four deep. */
662 	if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
663 		if (code->max_temp_idx < 1)
664 			code->max_temp_idx = 1;
665 
666 		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
667 	}
668 }
669