1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "radeon_program_pair.h"
7
8 #include <stdio.h>
9
10 #include "radeon_compiler.h"
11 #include "radeon_compiler_util.h"
12 #include "radeon_dataflow.h"
13 #include "radeon_list.h"
14 #include "radeon_variable.h"
15
16 #include "util/u_debug.h"
17
18 #define VERBOSE 0
19
20 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
21
22 struct schedule_instruction {
23 struct rc_instruction * Instruction;
24
25 /** Next instruction in the linked list of ready instructions. */
26 struct schedule_instruction *NextReady;
27
28 /** Values that this instruction reads and writes */
29 struct reg_value * WriteValues[4];
30 struct reg_value * ReadValues[12];
31 unsigned int NumWriteValues:3;
32 unsigned int NumReadValues:4;
33
34 /**
35 * Number of (read and write) dependencies that must be resolved before
36 * this instruction can be scheduled.
37 */
38 unsigned int NumDependencies:5;
39
40 /** List of all readers (see rc_get_readers() for the definition of
41 * "all readers"), even those outside the basic block this instruction
42 * lives in. */
43 struct rc_reader_data GlobalReaders;
44
45 /** If the scheduler has paired an RGB and an Alpha instruction together,
46 * PairedInst references the alpha instruction's dependency information.
47 */
48 struct schedule_instruction * PairedInst;
49
50 /** This scheduler uses the value of Score to determine which
51 * instruction to schedule. Instructions with a higher value of Score
52 * will be scheduled first. */
53 int Score;
54
55 /** The number of components that read from a TEX instruction. */
56 unsigned TexReadCount;
57
58 /** For TEX instructions a list of readers */
59 struct rc_list * TexReaders;
60 };
61
62
63 /**
64 * Used to keep track of which instructions read a value.
65 */
66 struct reg_value_reader {
67 struct schedule_instruction *Reader;
68 struct reg_value_reader *Next;
69 };
70
71 /**
72 * Used to keep track which values are stored in each component of a
73 * RC_FILE_TEMPORARY.
74 */
75 struct reg_value {
76 struct schedule_instruction * Writer;
77
78 /**
79 * Unordered linked list of instructions that read from this value.
80 * When this value becomes available, we increase all readers'
81 * dependency count.
82 */
83 struct reg_value_reader *Readers;
84
85 /**
86 * Number of readers of this value. This is decremented each time
87 * a reader of the value is committed.
88 * When the reader count reaches zero, the dependency count
89 * of the instruction writing \ref Next is decremented.
90 */
91 unsigned int NumReaders;
92
93 struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
94 };
95
96 struct register_state {
97 struct reg_value * Values[4];
98 };
99
100 struct remap_reg {
101 struct rc_instruction * Inst;
102 unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
103 unsigned int OldSwizzle:3;
104 unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
105 unsigned int NewSwizzle:3;
106 unsigned int OnlyTexReads:1;
107 struct remap_reg * Next;
108 };
109
110 struct schedule_state {
111 struct radeon_compiler * C;
112 struct schedule_instruction * Current;
113 /** Array of the previous writers of Current's destination register
114 * indexed by channel. */
115 struct schedule_instruction * PrevWriter[4];
116
117 struct register_state Temporary[RC_REGISTER_MAX_INDEX];
118
119 /**
120 * Linked lists of instructions that can be scheduled right now,
121 * based on which ALU/TEX resources they require.
122 */
123 /*@{*/
124 struct schedule_instruction *ReadyFullALU;
125 struct schedule_instruction *ReadyRGB;
126 struct schedule_instruction *ReadyAlpha;
127 struct schedule_instruction *ReadyTEX;
128 /*@}*/
129 struct rc_list *PendingTEX;
130
131 void (*CalcScore)(struct schedule_instruction *);
132 long max_tex_group;
133 unsigned PrevBlockHasTex:1;
134 unsigned PrevBlockHasKil:1;
135 unsigned TEXCount;
136 unsigned Opt:1;
137 };
138
get_reg_valuep(struct schedule_state * s,rc_register_file file,unsigned int index,unsigned int chan)139 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
140 rc_register_file file, unsigned int index, unsigned int chan)
141 {
142 if (file != RC_FILE_TEMPORARY)
143 return NULL;
144
145 if (index >= RC_REGISTER_MAX_INDEX) {
146 rc_error(s->C, "%s: index %i out of bounds\n", __func__, index);
147 return NULL;
148 }
149
150 return &s->Temporary[index].Values[chan];
151 }
152
get_tex_read_count(struct schedule_instruction * sinst)153 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
154 {
155 unsigned tex_read_count = sinst->TexReadCount;
156 if (sinst->PairedInst) {
157 tex_read_count += sinst->PairedInst->TexReadCount;
158 }
159 return tex_read_count;
160 }
161
162 #if VERBOSE
print_list(struct schedule_instruction * sinst)163 static void print_list(struct schedule_instruction * sinst)
164 {
165 struct schedule_instruction * ptr;
166 for (ptr = sinst; ptr; ptr=ptr->NextReady) {
167 unsigned tex_read_count = get_tex_read_count(ptr);
168 unsigned score = sinst->Score;
169 fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
170 tex_read_count);
171 }
172 fprintf(stderr, "\n");
173 }
174 #endif
175
remove_inst_from_list(struct schedule_instruction ** list,struct schedule_instruction * inst)176 static void remove_inst_from_list(struct schedule_instruction ** list,
177 struct schedule_instruction * inst)
178 {
179 struct schedule_instruction * prev = NULL;
180 struct schedule_instruction * list_ptr;
181 for (list_ptr = *list; list_ptr; prev = list_ptr,
182 list_ptr = list_ptr->NextReady) {
183 if (list_ptr == inst) {
184 if (prev) {
185 prev->NextReady = inst->NextReady;
186 } else {
187 *list = inst->NextReady;
188 }
189 inst->NextReady = NULL;
190 break;
191 }
192 }
193 }
194
add_inst_to_list(struct schedule_instruction ** list,struct schedule_instruction * inst)195 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
196 {
197 inst->NextReady = *list;
198 *list = inst;
199 }
200
add_inst_to_list_score(struct schedule_instruction ** list,struct schedule_instruction * inst)201 static void add_inst_to_list_score(struct schedule_instruction ** list,
202 struct schedule_instruction * inst)
203 {
204 struct schedule_instruction * temp;
205 struct schedule_instruction * prev;
206 if (!*list) {
207 *list = inst;
208 return;
209 }
210 temp = *list;
211 prev = NULL;
212 while(temp && inst->Score <= temp->Score) {
213 prev = temp;
214 temp = temp->NextReady;
215 }
216
217 if (!prev) {
218 inst->NextReady = temp;
219 *list = inst;
220 } else {
221 prev->NextReady = inst;
222 inst->NextReady = temp;
223 }
224 }
225
instruction_ready(struct schedule_state * s,struct schedule_instruction * sinst)226 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
227 {
228 DBG("%i is now ready\n", sinst->Instruction->IP);
229
230 /* Adding Ready TEX instructions to the end of the "Ready List" helps
231 * us emit TEX instructions in blocks without losing our place. */
232 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
233 add_inst_to_list_score(&s->ReadyTEX, sinst);
234 else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
235 add_inst_to_list_score(&s->ReadyRGB, sinst);
236 else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
237 add_inst_to_list_score(&s->ReadyAlpha, sinst);
238 else
239 add_inst_to_list_score(&s->ReadyFullALU, sinst);
240 }
241
decrease_dependencies(struct schedule_state * s,struct schedule_instruction * sinst)242 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
243 {
244 assert(sinst->NumDependencies > 0);
245 sinst->NumDependencies--;
246 if (!sinst->NumDependencies)
247 instruction_ready(s, sinst);
248 }
249
250 /* These functions provide different heuristics for scheduling instructions.
251 * The default is calc_score_readers. */
252
253 #if 0
254
255 static void calc_score_zero(struct schedule_instruction * sinst)
256 {
257 sinst->Score = 0;
258 }
259
260 static void calc_score_deps(struct schedule_instruction * sinst)
261 {
262 int i;
263 sinst->Score = 0;
264 for (i = 0; i < sinst->NumWriteValues; i++) {
265 struct reg_value * v = sinst->WriteValues[i];
266 if (v->NumReaders) {
267 struct reg_value_reader * r;
268 for (r = v->Readers; r; r = r->Next) {
269 if (r->Reader->NumDependencies == 1) {
270 sinst->Score += 100;
271 }
272 sinst->Score += r->Reader->NumDependencies;
273 }
274 }
275 }
276 }
277
278 #endif
279
280 #define NO_OUTPUT_SCORE (1 << 24)
281
score_no_output(struct schedule_instruction * sinst)282 static void score_no_output(struct schedule_instruction * sinst)
283 {
284 assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
285 if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
286 !sinst->Instruction->U.P.Alpha.OutputWriteMask) {
287 if (sinst->PairedInst) {
288 if (!sinst->PairedInst->Instruction->U.P.
289 RGB.OutputWriteMask
290 && !sinst->PairedInst->Instruction->U.P.
291 Alpha.OutputWriteMask) {
292 sinst->Score |= NO_OUTPUT_SCORE;
293 }
294
295 } else {
296 sinst->Score |= NO_OUTPUT_SCORE;
297 }
298 }
299 }
300
301 #define PAIRED_SCORE (1 << 16)
302
calc_score_r300(struct schedule_instruction * sinst)303 static void calc_score_r300(struct schedule_instruction * sinst)
304 {
305 unsigned src_idx;
306
307 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
308 sinst->Score = 0;
309 return;
310 }
311
312 score_no_output(sinst);
313
314 if (sinst->PairedInst) {
315 sinst->Score |= PAIRED_SCORE;
316 return;
317 }
318
319 for (src_idx = 0; src_idx < 4; src_idx++) {
320 sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
321 sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
322 }
323 }
324
325 #define NO_READ_TEX_SCORE (1 << 16)
326
calc_score_readers(struct schedule_instruction * sinst)327 static void calc_score_readers(struct schedule_instruction * sinst)
328 {
329 if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
330 sinst->Score = 0;
331 } else {
332 sinst->Score = sinst->NumReadValues;
333 if (sinst->PairedInst) {
334 sinst->Score += sinst->PairedInst->NumReadValues;
335 }
336 if (get_tex_read_count(sinst) == 0) {
337 sinst->Score |= NO_READ_TEX_SCORE;
338 }
339 score_no_output(sinst);
340 }
341 }
342
343 /**
344 * This function decreases the dependencies of the next instruction that
345 * wants to write to each of sinst's read values.
346 */
commit_update_reads(struct schedule_state * s,struct schedule_instruction * sinst)347 static void commit_update_reads(struct schedule_state * s,
348 struct schedule_instruction * sinst){
349 do {
350 for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
351 struct reg_value * v = sinst->ReadValues[i];
352 assert(v->NumReaders > 0);
353 v->NumReaders--;
354 if (!v->NumReaders) {
355 if (v->Next) {
356 decrease_dependencies(s, v->Next->Writer);
357 }
358 }
359 }
360 } while ((sinst = sinst->PairedInst));
361 }
362
commit_update_writes(struct schedule_state * s,struct schedule_instruction * sinst)363 static void commit_update_writes(struct schedule_state * s,
364 struct schedule_instruction * sinst){
365 do {
366 for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
367 struct reg_value * v = sinst->WriteValues[i];
368 if (v->NumReaders) {
369 for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
370 decrease_dependencies(s, r->Reader);
371 }
372 } else {
373 /* This happens in instruction sequences of the type
374 * OP r.x, ...;
375 * OP r.x, r.x, ...;
376 * See also the subtlety in how instructions that both
377 * read and write the same register are scanned.
378 */
379 if (v->Next)
380 decrease_dependencies(s, v->Next->Writer);
381 }
382 }
383 } while ((sinst = sinst->PairedInst));
384 }
385
notify_sem_wait(struct schedule_state * s)386 static void notify_sem_wait(struct schedule_state *s)
387 {
388 struct rc_list * pend_ptr;
389 for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
390 struct rc_list * read_ptr;
391 struct schedule_instruction * pending = pend_ptr->Item;
392 for (read_ptr = pending->TexReaders; read_ptr;
393 read_ptr = read_ptr->Next) {
394 struct schedule_instruction * reader = read_ptr->Item;
395 reader->TexReadCount--;
396 }
397 }
398 s->PendingTEX = NULL;
399 }
400
commit_alu_instruction(struct schedule_state * s,struct schedule_instruction * sinst)401 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
402 {
403 DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
404
405 commit_update_reads(s, sinst);
406
407 commit_update_writes(s, sinst);
408
409 if (get_tex_read_count(sinst) > 0) {
410 sinst->Instruction->U.P.SemWait = 1;
411 notify_sem_wait(s);
412 }
413 }
414
415 /**
416 * Emit all ready texture instructions in a single block.
417 *
418 * Emit as a single block to (hopefully) sample many textures in parallel,
419 * and to avoid hardware indirections on R300.
420 */
emit_all_tex(struct schedule_state * s,struct rc_instruction * before)421 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
422 {
423 struct schedule_instruction *readytex;
424 struct rc_instruction * inst_begin;
425
426 assert(s->ReadyTEX);
427 notify_sem_wait(s);
428
429 /* Node marker for R300 */
430 inst_begin = rc_insert_new_instruction(s->C, before->Prev);
431 inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
432
433 /* Link texture instructions back in */
434 readytex = s->ReadyTEX;
435 while(readytex) {
436 rc_insert_instruction(before->Prev, readytex->Instruction);
437 DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
438
439 /* All of the TEX instructions in the same TEX block have
440 * their source registers read from before any of the
441 * instructions in that block write to their destination
442 * registers. This means that when we commit a TEX
443 * instruction, any other TEX instruction that wants to write
444 * to one of the committed instruction's source register can be
445 * marked as ready and should be emitted in the same TEX
446 * block. This prevents the following sequence from being
447 * emitted in two different TEX blocks:
448 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
449 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
450 */
451 commit_update_reads(s, readytex);
452 readytex = readytex->NextReady;
453 }
454 readytex = s->ReadyTEX;
455 s->ReadyTEX = NULL;
456 while(readytex){
457 DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
458 commit_update_writes(s, readytex);
459 /* Set semaphore bits for last TEX instruction in the block */
460 if (!readytex->NextReady) {
461 readytex->Instruction->U.I.TexSemAcquire = 1;
462 readytex->Instruction->U.I.TexSemWait = 1;
463 }
464 rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
465 readytex = readytex->NextReady;
466 }
467 }
468
469 /* This is a helper function for destructive_merge_instructions(). It helps
470 * merge presubtract sources from two instructions and makes sure the
471 * presubtract sources end up in the correct spot. This function assumes that
472 * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
473 * but no scalar instruction (alpha).
474 * @return 0 if merging the presubtract sources fails.
475 * @return 1 if merging the presubtract sources succeeds.
476 */
merge_presub_sources(struct rc_pair_instruction * dst_full,struct rc_pair_sub_instruction src,unsigned int type)477 static int merge_presub_sources(
478 struct rc_pair_instruction * dst_full,
479 struct rc_pair_sub_instruction src,
480 unsigned int type)
481 {
482 unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
483 struct rc_pair_sub_instruction * dst_sub;
484 const struct rc_opcode_info * info;
485
486 assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
487
488 switch(type) {
489 case RC_SOURCE_RGB:
490 is_rgb = 1;
491 is_alpha = 0;
492 dst_sub = &dst_full->RGB;
493 break;
494 case RC_SOURCE_ALPHA:
495 is_rgb = 0;
496 is_alpha = 1;
497 dst_sub = &dst_full->Alpha;
498 break;
499 default:
500 assert(0);
501 return 0;
502 }
503
504 info = rc_get_opcode_info(dst_full->RGB.Opcode);
505
506 if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
507 return 0;
508
509 srcp_regs = rc_presubtract_src_reg_count(
510 src.Src[RC_PAIR_PRESUB_SRC].Index);
511 for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
512 unsigned int arg;
513 int free_source;
514 unsigned int one_way = 0;
515 struct rc_pair_instruction_source srcp = src.Src[srcp_src];
516 struct rc_pair_instruction_source temp;
517
518 free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
519 srcp.File, srcp.Index);
520
521 /* If free_source < 0 then there are no free source
522 * slots. */
523 if (free_source < 0)
524 return 0;
525
526 temp = dst_sub->Src[srcp_src];
527 dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
528
529 /* srcp needs src0 and src1 to be the same */
530 if (free_source < srcp_src) {
531 if (!temp.Used)
532 continue;
533 free_source = rc_pair_alloc_source(dst_full, is_rgb,
534 is_alpha, temp.File, temp.Index);
535 if (free_source < 0)
536 return 0;
537 one_way = 1;
538 } else {
539 dst_sub->Src[free_source] = temp;
540 }
541
542 /* If free_source == srcp_src, then the presubtract
543 * source is already in the correct place. */
544 if (free_source == srcp_src)
545 continue;
546
547 /* Shuffle the sources, so we can put the
548 * presubtract source in the correct place. */
549 for(arg = 0; arg < info->NumSrcRegs; arg++) {
550 /* If the arg does read both from rgb and alpha, then we need to rewrite
551 * both sources and the code currently doesn't handle this.
552 * FIXME: This is definitely solvable, however shader-db shows it is
553 * not worth the effort.
554 */
555 if (rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_ALPHA &&
556 rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_RGB)
557 return 0;
558
559 /*If this arg does not read from an rgb source,
560 * do nothing. */
561 if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
562 & type)) {
563 continue;
564 }
565
566 if (dst_full->RGB.Arg[arg].Source == srcp_src)
567 dst_full->RGB.Arg[arg].Source = free_source;
568 /* We need to do this just in case register
569 * is one of the sources already, but in the
570 * wrong spot. */
571 else if(dst_full->RGB.Arg[arg].Source == free_source
572 && !one_way) {
573 dst_full->RGB.Arg[arg].Source = srcp_src;
574 }
575 }
576 }
577 return 1;
578 }
579
580
581 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
destructive_merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)582 static int destructive_merge_instructions(
583 struct rc_pair_instruction * rgb,
584 struct rc_pair_instruction * alpha)
585 {
586 const struct rc_opcode_info * opcode;
587
588 assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
589 assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
590
591 /* Presubtract registers need to be merged first so that registers
592 * needed by the presubtract operation can be placed in src0 and/or
593 * src1. */
594
595 /* Merge the rgb presubtract registers. */
596 if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
597 if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
598 return 0;
599 }
600 }
601 /* Merge the alpha presubtract registers */
602 if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
603 if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
604 return 0;
605 }
606 }
607
608 /* Copy alpha args into rgb */
609 opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
610
611 for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
612 unsigned int srcrgb = 0;
613 unsigned int srcalpha = 0;
614 unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
615 rc_register_file file = 0;
616 unsigned int index = 0;
617 int source;
618
619 if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
620 srcrgb = 1;
621 file = alpha->RGB.Src[oldsrc].File;
622 index = alpha->RGB.Src[oldsrc].Index;
623 } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
624 srcalpha = 1;
625 file = alpha->Alpha.Src[oldsrc].File;
626 index = alpha->Alpha.Src[oldsrc].Index;
627 }
628
629 source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
630 if (source < 0)
631 return 0;
632
633 rgb->Alpha.Arg[arg].Source = source;
634 rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
635 rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
636 rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
637 }
638
639 /* Copy alpha opcode into rgb */
640 rgb->Alpha.Opcode = alpha->Alpha.Opcode;
641 rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
642 rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
643 rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
644 rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
645 rgb->Alpha.Saturate = alpha->Alpha.Saturate;
646 rgb->Alpha.Omod = alpha->Alpha.Omod;
647
648 /* Merge ALU result writing */
649 if (alpha->WriteALUResult) {
650 if (rgb->WriteALUResult)
651 return 0;
652
653 rgb->WriteALUResult = alpha->WriteALUResult;
654 rgb->ALUResultCompare = alpha->ALUResultCompare;
655 }
656
657 /* Copy SemWait */
658 rgb->SemWait |= alpha->SemWait;
659
660 return 1;
661 }
662
663 /**
664 * Try to merge the given instructions into the rgb instructions.
665 *
666 * Return true on success; on failure, return false, and keep
667 * the instructions untouched.
668 */
merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)669 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
670 {
671 struct rc_pair_instruction backup;
672
673 /*Instructions can't write output registers and ALU result at the
674 * same time. */
675 if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
676 || (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
677 return 0;
678 }
679
680 /* Writing output registers in the middle of shaders is slow, so
681 * we don't want to pair output writes with temp writes. */
682 if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
683 || (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
684 return 0;
685 }
686
687 memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
688
689 if (destructive_merge_instructions(rgb, alpha))
690 return 1;
691
692 memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
693 return 0;
694 }
695
presub_nop(struct rc_instruction * emitted)696 static void presub_nop(struct rc_instruction * emitted) {
697 int prev_rgb_index, prev_alpha_index, i, num_src;
698
699 /* We don't need a nop if the previous instruction is a TEX. */
700 if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
701 return;
702 }
703 if (emitted->Prev->U.P.RGB.WriteMask)
704 prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
705 else
706 prev_rgb_index = -1;
707 if (emitted->Prev->U.P.Alpha.WriteMask)
708 prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
709 else
710 prev_alpha_index = 1;
711
712 /* Check the previous rgb instruction */
713 if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
714 num_src = rc_presubtract_src_reg_count(
715 emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
716 for (i = 0; i < num_src; i++) {
717 unsigned int index = emitted->U.P.RGB.Src[i].Index;
718 if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
719 && (index == prev_rgb_index
720 || index == prev_alpha_index)) {
721 emitted->Prev->U.P.Nop = 1;
722 return;
723 }
724 }
725 }
726
727 /* Check the previous alpha instruction. */
728 if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
729 return;
730
731 num_src = rc_presubtract_src_reg_count(
732 emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
733 for (i = 0; i < num_src; i++) {
734 unsigned int index = emitted->U.P.Alpha.Src[i].Index;
735 if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
736 && (index == prev_rgb_index || index == prev_alpha_index)) {
737 emitted->Prev->U.P.Nop = 1;
738 return;
739 }
740 }
741 }
742
rgb_to_alpha_remap(struct schedule_state * s,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,rc_register_file old_file,rc_swizzle old_swz,unsigned int new_index)743 static void rgb_to_alpha_remap (
744 struct schedule_state * s,
745 struct rc_instruction * inst,
746 struct rc_pair_instruction_arg * arg,
747 rc_register_file old_file,
748 rc_swizzle old_swz,
749 unsigned int new_index)
750 {
751 int new_src_index;
752 unsigned int i;
753
754 for (i = 0; i < 3; i++) {
755 if (get_swz(arg->Swizzle, i) == old_swz) {
756 SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
757 }
758 }
759 new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
760 old_file, new_index);
761 /* This conversion is not possible, we must have made a mistake in
762 * is_rgb_to_alpha_possible. */
763 if (new_src_index < 0) {
764 rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n");
765 return;
766 }
767
768 arg->Source = new_src_index;
769 }
770
can_remap(unsigned int opcode)771 static int can_remap(unsigned int opcode)
772 {
773 switch(opcode) {
774 case RC_OPCODE_DDX:
775 case RC_OPCODE_DDY:
776 return 0;
777 default:
778 return 1;
779 }
780 }
781
can_convert_opcode_to_alpha(unsigned int opcode)782 static int can_convert_opcode_to_alpha(unsigned int opcode)
783 {
784 switch(opcode) {
785 case RC_OPCODE_DDX:
786 case RC_OPCODE_DDY:
787 case RC_OPCODE_DP2:
788 case RC_OPCODE_DP3:
789 case RC_OPCODE_DP4:
790 return 0;
791 default:
792 return 1;
793 }
794 }
795
is_rgb_to_alpha_possible(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)796 static void is_rgb_to_alpha_possible(
797 void * userdata,
798 struct rc_instruction * inst,
799 struct rc_pair_instruction_arg * arg,
800 struct rc_pair_instruction_source * src)
801 {
802 unsigned int read_chan = RC_SWIZZLE_UNUSED;
803 unsigned int alpha_sources = 0;
804 unsigned int i;
805 struct rc_reader_data * reader_data = userdata;
806
807 if (!can_remap(inst->U.P.RGB.Opcode)
808 || !can_remap(inst->U.P.Alpha.Opcode)) {
809 reader_data->Abort = 1;
810 return;
811 }
812
813 if (!src)
814 return;
815
816 /* XXX There are some cases where we can still do the conversion if
817 * a reader reads from a presubtract source, but for now we'll prevent
818 * it. */
819 if (arg->Source == RC_PAIR_PRESUB_SRC) {
820 reader_data->Abort = 1;
821 return;
822 }
823
824 /* Make sure the source only reads the register component that we
825 * are going to be converting from. It is OK if the instruction uses
826 * this component more than once.
827 * XXX If the index we will be converting to is the same as the
828 * current index, then it is OK to read from more than one component.
829 */
830 for (i = 0; i < 3; i++) {
831 rc_swizzle swz = get_swz(arg->Swizzle, i);
832 switch(swz) {
833 case RC_SWIZZLE_X:
834 case RC_SWIZZLE_Y:
835 case RC_SWIZZLE_Z:
836 case RC_SWIZZLE_W:
837 if (read_chan == RC_SWIZZLE_UNUSED) {
838 read_chan = swz;
839 } else if (read_chan != swz) {
840 reader_data->Abort = 1;
841 return;
842 }
843 break;
844 default:
845 break;
846 }
847 }
848
849 /* Make sure there are enough alpha sources.
850 * XXX If we know what register all the readers are going
851 * to be remapped to, then in some situations we can still do
852 * the substitution, even if all 3 alpha sources are being used.*/
853 for (i = 0; i < 3; i++) {
854 if (inst->U.P.Alpha.Src[i].Used) {
855 alpha_sources++;
856 }
857 }
858 if (alpha_sources > 2) {
859 reader_data->Abort = 1;
860 return;
861 }
862 }
863
convert_rgb_to_alpha(struct schedule_state * s,struct schedule_instruction * sched_inst)864 static int convert_rgb_to_alpha(
865 struct schedule_state * s,
866 struct schedule_instruction * sched_inst)
867 {
868 struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
869 unsigned int old_mask = pair_inst->RGB.WriteMask;
870 unsigned int old_swz = rc_mask_to_swizzle(old_mask);
871 const struct rc_opcode_info * info =
872 rc_get_opcode_info(pair_inst->RGB.Opcode);
873 int new_index = -1;
874 unsigned int i;
875
876 if (sched_inst->GlobalReaders.Abort)
877 return 0;
878
879 /* Even though we checked that we can convert to alpha previously, it is
880 * possible that another rgb source of the reader instructions was already
881 * converted to alpha and we thus have no longer free alpha sources.
882 */
883 for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
884 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
885 if (reader.Inst->U.P.Alpha.Src[2].Used)
886 return 0;
887 }
888
889 if (!pair_inst->RGB.WriteMask)
890 return 0;
891
892 if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
893 || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
894 return 0;
895 }
896
897 assert(sched_inst->NumWriteValues == 1);
898
899 if (!sched_inst->WriteValues[0]) {
900 assert(0);
901 return 0;
902 }
903
904 /* We start at the old index, because if we can reuse the same
905 * register and just change the swizzle then it is more likely we
906 * will be able to convert all the readers. */
907 for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
908 struct reg_value ** new_regvalp = get_reg_valuep(
909 s, RC_FILE_TEMPORARY, i, 3);
910 if (!*new_regvalp) {
911 struct reg_value ** old_regvalp =
912 get_reg_valuep(s,
913 RC_FILE_TEMPORARY,
914 pair_inst->RGB.DestIndex,
915 rc_mask_to_swizzle(old_mask));
916 new_index = i;
917 *new_regvalp = *old_regvalp;
918 break;
919 }
920 }
921 if (new_index < 0) {
922 return 0;
923 }
924
925 /* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
926 * as the RGB opcode, then the Alpha instruction will already contain
927 * the correct opcode and instruction args, so we do not want to
928 * overwrite them.
929 */
930 if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
931 pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
932 memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
933 sizeof(pair_inst->Alpha.Arg));
934 }
935 pair_inst->Alpha.DestIndex = new_index;
936 pair_inst->Alpha.WriteMask = RC_MASK_W;
937 pair_inst->Alpha.Target = pair_inst->RGB.Target;
938 pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
939 pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
940 pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
941 pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
942 /* Move the swizzles into the first chan */
943 for (i = 0; i < info->NumSrcRegs; i++) {
944 unsigned int j;
945 for (j = 0; j < 3; j++) {
946 unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
947 if (swz != RC_SWIZZLE_UNUSED) {
948 pair_inst->Alpha.Arg[i].Swizzle =
949 rc_init_swizzle(swz, 1);
950 break;
951 }
952 }
953 }
954 pair_inst->RGB.Opcode = RC_OPCODE_NOP;
955 pair_inst->RGB.DestIndex = 0;
956 pair_inst->RGB.WriteMask = 0;
957 pair_inst->RGB.Target = 0;
958 pair_inst->RGB.OutputWriteMask = 0;
959 pair_inst->RGB.DepthWriteMask = 0;
960 pair_inst->RGB.Saturate = 0;
961 memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
962
963 for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
964 struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
965 rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg,
966 RC_FILE_TEMPORARY, old_swz, new_index);
967 }
968 return 1;
969 }
970
try_convert_and_pair(struct schedule_state * s,struct schedule_instruction ** inst_list)971 static void try_convert_and_pair(
972 struct schedule_state *s,
973 struct schedule_instruction ** inst_list)
974 {
975 struct schedule_instruction * list_ptr = *inst_list;
976 while (list_ptr && *inst_list && (*inst_list)->NextReady) {
977 int paired = 0;
978 if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
979 && list_ptr->Instruction->U.P.RGB.Opcode
980 != RC_OPCODE_REPL_ALPHA) {
981 goto next;
982 }
983 if (list_ptr->NumWriteValues == 1
984 && convert_rgb_to_alpha(s, list_ptr)) {
985
986 struct schedule_instruction * pair_ptr;
987 remove_inst_from_list(inst_list, list_ptr);
988 add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
989
990 for (pair_ptr = s->ReadyRGB; pair_ptr;
991 pair_ptr = pair_ptr->NextReady) {
992 if (merge_instructions(&pair_ptr->Instruction->U.P,
993 &list_ptr->Instruction->U.P)) {
994 remove_inst_from_list(&s->ReadyAlpha, list_ptr);
995 remove_inst_from_list(&s->ReadyRGB, pair_ptr);
996 pair_ptr->PairedInst = list_ptr;
997
998 add_inst_to_list(&s->ReadyFullALU, pair_ptr);
999 list_ptr = *inst_list;
1000 paired = 1;
1001 break;
1002 }
1003
1004 }
1005 }
1006 if (!paired) {
1007 next:
1008 list_ptr = list_ptr->NextReady;
1009 }
1010 }
1011 }
1012
1013 /**
1014 * This function attempts to merge RGB and Alpha instructions together.
1015 */
pair_instructions(struct schedule_state * s)1016 static void pair_instructions(struct schedule_state * s)
1017 {
1018 struct schedule_instruction *rgb_ptr;
1019 struct schedule_instruction *alpha_ptr;
1020
1021 /* Some pairings might fail because they require too
1022 * many source slots; try all possible pairings if necessary */
1023 rgb_ptr = s->ReadyRGB;
1024 while(rgb_ptr) {
1025 struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1026 alpha_ptr = s->ReadyAlpha;
1027 while(alpha_ptr) {
1028 struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1029 if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1030 /* Remove RGB and Alpha from their ready lists.
1031 */
1032 remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1033 remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1034 rgb_ptr->PairedInst = alpha_ptr;
1035 add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1036 break;
1037 }
1038 alpha_ptr = alpha_next;
1039 }
1040 rgb_ptr = rgb_next;
1041 }
1042
1043 if (!s->Opt) {
1044 return;
1045 }
1046
1047 /* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1048 * slot can be converted into Alpha instructions. */
1049 try_convert_and_pair(s, &s->ReadyFullALU);
1050
1051 /* Try to convert some of the RGB instructions to Alpha and
1052 * try to pair it with another RGB. */
1053 try_convert_and_pair(s, &s->ReadyRGB);
1054 }
1055
update_max_score(struct schedule_state * s,struct schedule_instruction ** list,int * max_score,struct schedule_instruction ** max_inst_out,struct schedule_instruction *** list_out)1056 static void update_max_score(
1057 struct schedule_state * s,
1058 struct schedule_instruction ** list,
1059 int * max_score,
1060 struct schedule_instruction ** max_inst_out,
1061 struct schedule_instruction *** list_out)
1062 {
1063 struct schedule_instruction * list_ptr;
1064 for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1065 int score;
1066 s->CalcScore(list_ptr);
1067 score = list_ptr->Score;
1068 if (!*max_inst_out || score > *max_score) {
1069 *max_score = score;
1070 *max_inst_out = list_ptr;
1071 *list_out = list;
1072 }
1073 }
1074 }
1075
emit_instruction(struct schedule_state * s,struct rc_instruction * before)1076 static void emit_instruction(
1077 struct schedule_state * s,
1078 struct rc_instruction * before)
1079 {
1080 int max_score = -1;
1081 struct schedule_instruction * max_inst = NULL;
1082 struct schedule_instruction ** max_list = NULL;
1083 unsigned tex_count = 0;
1084 struct schedule_instruction * tex_ptr;
1085
1086 pair_instructions(s);
1087 #if VERBOSE
1088 fprintf(stderr, "Full:\n");
1089 print_list(s->ReadyFullALU);
1090 fprintf(stderr, "RGB:\n");
1091 print_list(s->ReadyRGB);
1092 fprintf(stderr, "Alpha:\n");
1093 print_list(s->ReadyAlpha);
1094 fprintf(stderr, "TEX:\n");
1095 print_list(s->ReadyTEX);
1096 #endif
1097
1098 for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1099 if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1100 emit_all_tex(s, before);
1101 s->PrevBlockHasKil = 1;
1102 return;
1103 }
1104 tex_count++;
1105 }
1106 update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1107 update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1108 update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1109
1110 if (tex_count >= s->max_tex_group || max_score == -1
1111 || (s->TEXCount > 0 && tex_count == s->TEXCount)
1112 || (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) {
1113 emit_all_tex(s, before);
1114 } else {
1115
1116
1117 remove_inst_from_list(max_list, max_inst);
1118 rc_insert_instruction(before->Prev, max_inst->Instruction);
1119 commit_alu_instruction(s, max_inst);
1120
1121 presub_nop(before->Prev);
1122 }
1123 }
1124
add_tex_reader(struct schedule_state * s,struct schedule_instruction * writer,struct schedule_instruction * reader)1125 static void add_tex_reader(
1126 struct schedule_state * s,
1127 struct schedule_instruction * writer,
1128 struct schedule_instruction * reader)
1129 {
1130 if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1131 /*Not a TEX instructions */
1132 return;
1133 }
1134 reader->TexReadCount++;
1135 rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1136 }
1137
scan_read(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1138 static void scan_read(void * data, struct rc_instruction * inst,
1139 rc_register_file file, unsigned int index, unsigned int chan)
1140 {
1141 struct schedule_state * s = data;
1142 struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1143 struct reg_value_reader * reader;
1144
1145 if (!v)
1146 return;
1147
1148 if (*v && (*v)->Writer == s->Current) {
1149 /* The instruction reads and writes to a register component.
1150 * In this case, we only want to increment dependencies by one.
1151 * Why?
1152 * Because each instruction depends on the writers of its source
1153 * registers _and_ the most recent writer of its destination
1154 * register. In this case, the current instruction (s->Current)
1155 * has a dependency that both writes to one of its source
1156 * registers and was the most recent writer to its destination
1157 * register. We have already marked this dependency in
1158 * scan_write(), so we don't need to do it again.
1159 */
1160
1161 /* We need to make sure we are adding s->Current to the
1162 * previous writer's list of TexReaders, if the previous writer
1163 * was a TEX instruction.
1164 */
1165 add_tex_reader(s, s->PrevWriter[chan], s->Current);
1166
1167 return;
1168 }
1169
1170 DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1171
1172 reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1173 reader->Reader = s->Current;
1174 if (!*v) {
1175 /* In this situation, the instruction reads from a register
1176 * that hasn't been written to or read from in the current
1177 * block. */
1178 *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1179 memset(*v, 0, sizeof(struct reg_value));
1180 (*v)->Readers = reader;
1181 } else {
1182 reader->Next = (*v)->Readers;
1183 (*v)->Readers = reader;
1184 /* Only update the current instruction's dependencies if the
1185 * register it reads from has been written to in this block. */
1186 if ((*v)->Writer) {
1187 add_tex_reader(s, (*v)->Writer, s->Current);
1188 s->Current->NumDependencies++;
1189 }
1190 }
1191 (*v)->NumReaders++;
1192
1193 if (s->Current->NumReadValues >= 12) {
1194 rc_error(s->C, "%s: NumReadValues overflow\n", __func__);
1195 } else {
1196 s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1197 }
1198 }
1199
scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1200 static void scan_write(void * data, struct rc_instruction * inst,
1201 rc_register_file file, unsigned int index, unsigned int chan)
1202 {
1203 struct schedule_state * s = data;
1204 struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1205 struct reg_value * newv;
1206
1207 if (!pv)
1208 return;
1209
1210 DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1211
1212 newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1213 memset(newv, 0, sizeof(*newv));
1214
1215 newv->Writer = s->Current;
1216
1217 if (*pv) {
1218 (*pv)->Next = newv;
1219 s->Current->NumDependencies++;
1220 /* Keep track of the previous writer to s->Current's destination
1221 * register */
1222 s->PrevWriter[chan] = (*pv)->Writer;
1223 }
1224
1225 *pv = newv;
1226
1227 if (s->Current->NumWriteValues >= 4) {
1228 rc_error(s->C, "%s: NumWriteValues overflow\n", __func__);
1229 } else {
1230 s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1231 }
1232 }
1233
is_rgb_to_alpha_possible_normal(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)1234 static void is_rgb_to_alpha_possible_normal(
1235 void * userdata,
1236 struct rc_instruction * inst,
1237 struct rc_src_register * src)
1238 {
1239 struct rc_reader_data * reader_data = userdata;
1240 reader_data->Abort = 1;
1241
1242 }
1243
schedule_block(struct schedule_state * s,struct rc_instruction * begin,struct rc_instruction * end)1244 static void schedule_block(struct schedule_state * s,
1245 struct rc_instruction * begin, struct rc_instruction * end)
1246 {
1247 unsigned int ip;
1248
1249 /* Scan instructions for data dependencies */
1250 ip = 0;
1251 for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1252 s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1253 memset(s->Current, 0, sizeof(struct schedule_instruction));
1254
1255 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1256 const struct rc_opcode_info * info =
1257 rc_get_opcode_info(inst->U.I.Opcode);
1258 if (info->HasTexture) {
1259 s->TEXCount++;
1260 }
1261 }
1262
1263 /* XXX: This causes SemWait to be set for all instructions in
1264 * a block if the previous block contained a TEX instruction.
1265 * We can do better here, but it will take a lot of work. */
1266 if (s->PrevBlockHasTex) {
1267 s->Current->TexReadCount = 1;
1268 }
1269
1270 s->Current->Instruction = inst;
1271 inst->IP = ip++;
1272
1273 DBG("%i: Scanning\n", inst->IP);
1274
1275 /* The order of things here is subtle and maybe slightly
1276 * counter-intuitive, to account for the case where an
1277 * instruction writes to the same register as it reads
1278 * from. */
1279 rc_for_all_writes_chan(inst, &scan_write, s);
1280 rc_for_all_reads_chan(inst, &scan_read, s);
1281
1282 DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1283
1284 if (!s->Current->NumDependencies) {
1285 instruction_ready(s, s->Current);
1286 }
1287
1288 /* Get global readers for possible RGB->Alpha conversion. */
1289 s->Current->GlobalReaders.ExitOnAbort = 1;
1290 rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1291 is_rgb_to_alpha_possible_normal,
1292 is_rgb_to_alpha_possible, NULL);
1293 }
1294
1295 /* Temporarily unlink all instructions */
1296 begin->Prev->Next = end;
1297 end->Prev = begin->Prev;
1298
1299 /* Schedule instructions back */
1300 while(!s->C->Error &&
1301 (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1302 emit_instruction(s, end);
1303 }
1304 }
1305
is_controlflow(struct rc_instruction * inst)1306 static int is_controlflow(struct rc_instruction * inst)
1307 {
1308 if (inst->Type == RC_INSTRUCTION_NORMAL) {
1309 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1310 return opcode->IsFlowControl;
1311 }
1312 return 0;
1313 }
1314
rc_pair_schedule(struct radeon_compiler * cc,void * user)1315 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1316 {
1317 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1318 struct schedule_state s;
1319 struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1320 unsigned int * opt = user;
1321
1322 memset(&s, 0, sizeof(s));
1323 s.Opt = *opt;
1324 s.C = &c->Base;
1325 if (s.C->is_r500) {
1326 s.CalcScore = calc_score_readers;
1327 } else {
1328 s.CalcScore = calc_score_r300;
1329 }
1330 s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1331 while(inst != &c->Base.Program.Instructions) {
1332 struct rc_instruction * first;
1333
1334 if (is_controlflow(inst)) {
1335 /* The TexSemWait flag is already properly set for ALU
1336 * instructions using the results of normal TEX lookup,
1337 * however it was found empirically that TEXKIL also needs
1338 * synchronization with the control flow. This might not be optimal,
1339 * however the docs don't offer any guidance in this matter.
1340 */
1341 if (s.PrevBlockHasKil) {
1342 inst->U.I.TexSemWait = 1;
1343 s.PrevBlockHasKil = 0;
1344 }
1345 inst = inst->Next;
1346 continue;
1347 }
1348
1349 first = inst;
1350
1351 while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1352 inst = inst->Next;
1353
1354 DBG("Schedule one block\n");
1355 memset(s.Temporary, 0, sizeof(s.Temporary));
1356 s.TEXCount = 0;
1357 schedule_block(&s, first, inst);
1358 if (s.PendingTEX) {
1359 s.PrevBlockHasTex = 1;
1360 }
1361 }
1362 }
1363