1 /*
2 * Copyright 2005 Ben Skeggs.
3 * Copyright 2008 Corbin Simpson <[email protected]>
4 * SPDX-License-Identifier: MIT
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 */
7 /**
8 * \file
9 *
10 * \author Ben Skeggs <[email protected]>
11 *
12 * \author Jerome Glisse <[email protected]>
13 *
14 * \author Corbin Simpson <[email protected]>
15 *
16 */
17
18 #include "r500_fragprog.h"
19
20 #include "r300_reg.h"
21
22 #include "radeon_program_pair.h"
23
24 #include "util/compiler.h"
25
26 #define PROG_CODE \
27 struct r500_fragment_program_code *code = &c->code->code.r500
28
29 #define error(fmt, args...) do { \
30 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
31 __FILE__, __func__, ##args); \
32 } while(0)
33
34
35 struct branch_info {
36 int If;
37 int Else;
38 int Endif;
39 };
40
41 struct r500_loop_info {
42 int BgnLoop;
43
44 int BranchDepth;
45 int * Brks;
46 int BrkCount;
47 int BrkReserved;
48
49 int * Conts;
50 int ContCount;
51 int ContReserved;
52 };
53
54 struct emit_state {
55 struct radeon_compiler * C;
56 struct r500_fragment_program_code * Code;
57
58 struct branch_info * Branches;
59 unsigned int CurrentBranchDepth;
60 unsigned int BranchesReserved;
61
62 struct r500_loop_info * Loops;
63 unsigned int CurrentLoopDepth;
64 unsigned int LoopsReserved;
65
66 unsigned int MaxBranchDepth;
67
68 };
69
translate_rgb_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)70 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
71 {
72 switch(opcode) {
73 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
74 case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
75 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
76 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
77 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
78 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
79 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
80 default:
81 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
82 FALLTHROUGH;
83 case RC_OPCODE_NOP:
84 FALLTHROUGH;
85 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
86 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
87 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
88 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
89 }
90 }
91
translate_alpha_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)92 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
93 {
94 switch(opcode) {
95 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
96 case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
97 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
98 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
99 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
100 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
101 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
102 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
103 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
104 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
105 default:
106 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
107 FALLTHROUGH;
108 case RC_OPCODE_NOP:
109 FALLTHROUGH;
110 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
111 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
112 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
113 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
114 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
115 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
116 }
117 }
118
fix_hw_swizzle(unsigned int swz)119 static unsigned int fix_hw_swizzle(unsigned int swz)
120 {
121 switch (swz) {
122 case RC_SWIZZLE_ZERO:
123 case RC_SWIZZLE_UNUSED:
124 swz = 4;
125 break;
126 case RC_SWIZZLE_HALF:
127 swz = 5;
128 break;
129 case RC_SWIZZLE_ONE:
130 swz = 6;
131 break;
132 }
133
134 return swz;
135 }
136
translate_arg_rgb(struct rc_pair_instruction * inst,int arg)137 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
138 {
139 unsigned int t = inst->RGB.Arg[arg].Source;
140 int comp;
141 t |= inst->RGB.Arg[arg].Negate << 11;
142 t |= inst->RGB.Arg[arg].Abs << 12;
143
144 for(comp = 0; comp < 3; ++comp)
145 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
146
147 return t;
148 }
149
translate_arg_alpha(struct rc_pair_instruction * inst,int i)150 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
151 {
152 unsigned int t = inst->Alpha.Arg[i].Source;
153 t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
154 t |= inst->Alpha.Arg[i].Negate << 5;
155 t |= inst->Alpha.Arg[i].Abs << 6;
156 return t;
157 }
158
translate_alu_result_op(struct r300_fragment_program_compiler * c,rc_compare_func func)159 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
160 {
161 switch(func) {
162 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
163 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
164 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
165 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
166 default:
167 rc_error(&c->Base, "%s: unsupported compare func %i\n", __func__, func);
168 return 0;
169 }
170 }
171
use_temporary(struct r500_fragment_program_code * code,unsigned int index)172 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
173 {
174 if (index > code->max_temp_idx)
175 code->max_temp_idx = index;
176 }
177
use_source(struct r500_fragment_program_code * code,struct rc_pair_instruction_source src)178 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
179 {
180 /* From docs:
181 * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
182 * MSB = 1 << 7 */
183 if (!src.Used)
184 return 1 << 7;
185
186 if (src.File == RC_FILE_CONSTANT) {
187 return src.Index | R500_RGB_ADDR0_CONST;
188 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
189 use_temporary(code, src.Index);
190 return src.Index;
191 } else if (src.File == RC_FILE_INLINE) {
192 return src.Index | (1 << 7);
193 }
194
195 return 0;
196 }
197
198 /**
199 * NOP the specified instruction if it is not a texture lookup.
200 */
alu_nop(struct r300_fragment_program_compiler * c,int ip)201 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
202 {
203 PROG_CODE;
204
205 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
206 code->inst[ip].inst0 |= R500_INST_NOP;
207 }
208 }
209
210 /**
211 * Emit a paired ALU instruction.
212 */
emit_paired(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * inst)213 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
214 {
215 int ip;
216 PROG_CODE;
217
218 if (code->inst_end >= c->Base.max_alu_insts-1) {
219 error("emit_alu: Too many instructions");
220 return;
221 }
222
223 ip = ++code->inst_end;
224
225 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
226 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
227 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
228 if (ip > 0) {
229 alu_nop(c, ip - 1);
230 }
231 }
232
233 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
234 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
235
236 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
237 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
238 if (inst->WriteALUResult) {
239 error("Cannot write output and ALU result at the same time");
240 return;
241 }
242 } else {
243 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
244 }
245 code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
246
247 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
248 code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
249 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
250 if (inst->Nop) {
251 code->inst[ip].inst0 |= R500_INST_NOP;
252 }
253 if (inst->Alpha.DepthWriteMask) {
254 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
255 c->code->writes_depth = 1;
256 }
257
258 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
259 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
260 if (inst->Alpha.WriteMask)
261 use_temporary(code, inst->Alpha.DestIndex);
262 if (inst->RGB.WriteMask)
263 use_temporary(code, inst->RGB.DestIndex);
264
265 if (inst->RGB.Saturate)
266 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
267 if (inst->Alpha.Saturate)
268 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
269
270 /* Set the presubtract operation. */
271 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
272 case RC_PRESUB_BIAS:
273 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
274 break;
275 case RC_PRESUB_SUB:
276 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
277 break;
278 case RC_PRESUB_ADD:
279 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
280 break;
281 case RC_PRESUB_INV:
282 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
283 break;
284 default:
285 break;
286 }
287 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
288 case RC_PRESUB_BIAS:
289 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
290 break;
291 case RC_PRESUB_SUB:
292 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
293 break;
294 case RC_PRESUB_ADD:
295 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
296 break;
297 case RC_PRESUB_INV:
298 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
299 break;
300 default:
301 break;
302 }
303
304 /* Set the output modifier */
305 code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
306 code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
307
308 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
309 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
310 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
311
312 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
313 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
314 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
315
316 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
317 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
318 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
319
320 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
321 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
322 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
323
324 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
325 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
326
327 if (inst->WriteALUResult) {
328 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
329
330 if (inst->WriteALUResult == RC_ALURESULT_X)
331 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
332 else
333 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
334
335 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
336 }
337 }
338
translate_strq_swizzle(unsigned int swizzle)339 static unsigned int translate_strq_swizzle(unsigned int swizzle)
340 {
341 unsigned int swiz = 0;
342 int i;
343 for (i = 0; i < 4; i++)
344 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
345 return swiz;
346 }
347
348 /**
349 * Emit a single TEX instruction
350 */
emit_tex(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)351 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
352 {
353 int ip;
354 PROG_CODE;
355
356 if (code->inst_end >= c->Base.max_alu_insts-1) {
357 error("emit_tex: Too many instructions");
358 return 0;
359 }
360
361 ip = ++code->inst_end;
362
363 code->inst[ip].inst0 = R500_INST_TYPE_TEX
364 | (inst->DstReg.WriteMask << 11)
365 | (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
366 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
367 | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
368
369 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
370 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
371
372 switch (inst->Opcode) {
373 case RC_OPCODE_KIL:
374 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
375 break;
376 case RC_OPCODE_TEX:
377 code->inst[ip].inst1 |= R500_TEX_INST_LD;
378 break;
379 case RC_OPCODE_TXB:
380 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
381 break;
382 case RC_OPCODE_TXP:
383 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
384 break;
385 case RC_OPCODE_TXD:
386 code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
387 break;
388 case RC_OPCODE_TXL:
389 code->inst[ip].inst1 |= R500_TEX_INST_LOD;
390 break;
391 default:
392 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
393 }
394
395 use_temporary(code, inst->SrcReg[0].Index);
396 if (inst->Opcode != RC_OPCODE_KIL)
397 use_temporary(code, inst->DstReg.Index);
398
399 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
400 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
401 | R500_TEX_DST_ADDR(inst->DstReg.Index)
402 | (GET_SWZ(inst->TexSwizzle, 0) << 24)
403 | (GET_SWZ(inst->TexSwizzle, 1) << 26)
404 | (GET_SWZ(inst->TexSwizzle, 2) << 28)
405 | (GET_SWZ(inst->TexSwizzle, 3) << 30)
406 ;
407
408 if (inst->Opcode == RC_OPCODE_TXD) {
409 use_temporary(code, inst->SrcReg[1].Index);
410 use_temporary(code, inst->SrcReg[2].Index);
411
412 /* DX and DY parameters are specified in a separate register. */
413 code->inst[ip].inst3 =
414 R500_DX_ADDR(inst->SrcReg[1].Index) |
415 (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
416 R500_DY_ADDR(inst->SrcReg[2].Index) |
417 (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
418 }
419
420 return 1;
421 }
422
emit_flowcontrol(struct emit_state * s,struct rc_instruction * inst)423 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
424 {
425 unsigned int newip;
426
427 if (s->Code->inst_end >= s->C->max_alu_insts-1) {
428 rc_error(s->C, "emit_tex: Too many instructions");
429 return;
430 }
431
432 newip = ++s->Code->inst_end;
433
434 /* Currently all loops use the same integer constant to initialize
435 * the loop variables. */
436 if(!s->Code->int_constants[0]) {
437 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
438 s->Code->int_constant_count = 1;
439 }
440 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
441 s->Code->inst[newip].inst0 |= (inst->U.I.TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
442
443 switch(inst->U.I.Opcode){
444 struct branch_info * branch;
445 struct r500_loop_info * loop;
446 case RC_OPCODE_BGNLOOP:
447 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
448 s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
449
450 loop = &s->Loops[s->CurrentLoopDepth++];
451 memset(loop, 0, sizeof(struct r500_loop_info));
452 loop->BranchDepth = s->CurrentBranchDepth;
453 loop->BgnLoop = newip;
454
455 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
456 | R500_FC_JUMP_FUNC(0x00)
457 | R500_FC_IGNORE_UNCOVERED
458 ;
459 break;
460 case RC_OPCODE_BRK:
461 loop = &s->Loops[s->CurrentLoopDepth - 1];
462 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
463 loop->BrkCount, loop->BrkReserved, 1);
464
465 loop->Brks[loop->BrkCount++] = newip;
466 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
467 | R500_FC_JUMP_FUNC(0xff)
468 | R500_FC_B_OP1_DECR
469 | R500_FC_B_POP_CNT(
470 s->CurrentBranchDepth - loop->BranchDepth)
471 | R500_FC_IGNORE_UNCOVERED
472 ;
473 break;
474
475 case RC_OPCODE_CONT:
476 loop = &s->Loops[s->CurrentLoopDepth - 1];
477 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
478 loop->ContCount, loop->ContReserved, 1);
479 loop->Conts[loop->ContCount++] = newip;
480 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
481 | R500_FC_JUMP_FUNC(0xff)
482 | R500_FC_B_OP1_DECR
483 | R500_FC_B_POP_CNT(
484 s->CurrentBranchDepth - loop->BranchDepth)
485 | R500_FC_IGNORE_UNCOVERED
486 ;
487 break;
488
489 case RC_OPCODE_ENDLOOP:
490 {
491 loop = &s->Loops[s->CurrentLoopDepth - 1];
492 /* Emit ENDLOOP */
493 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
494 | R500_FC_JUMP_FUNC(0xff)
495 | R500_FC_JUMP_ANY
496 | R500_FC_IGNORE_UNCOVERED
497 ;
498 /* The constant integer at index 0 is used by all loops. */
499 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
500 | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
501 ;
502
503 /* Set jump address and int constant for BGNLOOP */
504 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
505 | R500_FC_JUMP_ADDR(newip)
506 ;
507
508 /* Set jump address for the BRK instructions. */
509 while(loop->BrkCount--) {
510 s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
511 R500_FC_JUMP_ADDR(newip + 1);
512 }
513
514 /* Set jump address for CONT instructions. */
515 while(loop->ContCount--) {
516 s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
517 R500_FC_JUMP_ADDR(newip);
518 }
519 s->CurrentLoopDepth--;
520 break;
521 }
522 case RC_OPCODE_IF:
523 if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
524 rc_error(s->C, "Branch depth exceeds hardware limit");
525 return;
526 }
527 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
528 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
529
530 branch = &s->Branches[s->CurrentBranchDepth++];
531 branch->If = newip;
532 branch->Else = -1;
533 branch->Endif = -1;
534
535 if (s->CurrentBranchDepth > s->MaxBranchDepth)
536 s->MaxBranchDepth = s->CurrentBranchDepth;
537
538 /* actual instruction is filled in at ENDIF time */
539 break;
540
541 case RC_OPCODE_ELSE:
542 if (!s->CurrentBranchDepth) {
543 rc_error(s->C, "%s: got ELSE outside a branch", __func__);
544 return;
545 }
546
547 branch = &s->Branches[s->CurrentBranchDepth - 1];
548 branch->Else = newip;
549
550 /* actual instruction is filled in at ENDIF time */
551 break;
552
553 case RC_OPCODE_ENDIF:
554 if (!s->CurrentBranchDepth) {
555 rc_error(s->C, "%s: got ELSE outside a branch", __func__);
556 return;
557 }
558
559 branch = &s->Branches[s->CurrentBranchDepth - 1];
560 branch->Endif = newip;
561
562 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
563 | R500_FC_A_OP_NONE /* no address stack */
564 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
565 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
566 | R500_FC_B_OP1_NONE /* no branch counter if stay */
567 | R500_FC_B_POP_CNT(1)
568 ;
569 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
570 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
571 | R500_FC_A_OP_NONE /* no address stack */
572 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
573 | R500_FC_B_OP0_INCR /* increment branch counter if stay */
574 | R500_FC_IGNORE_UNCOVERED
575 ;
576
577 if (branch->Else >= 0) {
578 /* increment branch counter also if jump */
579 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
580 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
581
582 s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
583 | R500_FC_A_OP_NONE /* no address stack */
584 | R500_FC_B_ELSE /* all active pixels want to jump */
585 | R500_FC_B_OP0_NONE /* no counter op if stay */
586 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
587 | R500_FC_B_POP_CNT(1)
588 ;
589 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
590 } else {
591 /* don't touch branch counter on jump */
592 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
593 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
594 }
595
596
597 s->CurrentBranchDepth--;
598 break;
599 default:
600 rc_error(s->C, "%s: unknown opcode %s\n", __func__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
601 }
602 }
603
r500BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)604 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
605 {
606 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
607 struct emit_state s;
608 struct r500_fragment_program_code *code = &compiler->code->code.r500;
609
610 memset(&s, 0, sizeof(s));
611 s.C = &compiler->Base;
612 s.Code = code;
613
614 memset(code, 0, sizeof(*code));
615 code->max_temp_idx = 1;
616 code->inst_end = -1;
617
618 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
619 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
620 inst = inst->Next) {
621 if (inst->Type == RC_INSTRUCTION_NORMAL) {
622 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
623
624 if (opcode->IsFlowControl) {
625 emit_flowcontrol(&s, inst);
626 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
627 continue;
628 } else {
629 emit_tex(compiler, &inst->U.I);
630 }
631 } else {
632 emit_paired(compiler, &inst->U.P);
633 }
634 }
635
636 if (code->max_temp_idx >= compiler->Base.max_temp_regs)
637 rc_error(&compiler->Base, "Too many hardware temporaries used\n");
638
639 if (compiler->Base.Error)
640 return;
641
642 if (code->inst_end == -1 ||
643 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
644 int ip;
645
646 /* This may happen when dead-code elimination is disabled or
647 * when most of the fragment program logic is leading to a KIL */
648 if (code->inst_end >= compiler->Base.max_alu_insts-1) {
649 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
650 return;
651 }
652
653 ip = ++code->inst_end;
654 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
655 }
656
657 /* Make sure TEX_SEM_WAIT is set on the last instruction */
658 code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
659
660 /* Enable full flow control mode if we are using loops or have if
661 * statements nested at least four deep. */
662 if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
663 if (code->max_temp_idx < 1)
664 code->max_temp_idx = 1;
665
666 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
667 }
668 }
669