xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_program_pair.h"
7 
8 #include <stdio.h>
9 
10 #include "radeon_compiler.h"
11 #include "radeon_compiler_util.h"
12 #include "radeon_dataflow.h"
13 #include "radeon_list.h"
14 #include "radeon_variable.h"
15 
16 #include "util/u_debug.h"
17 
18 #define VERBOSE 0
19 
20 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
21 
22 struct schedule_instruction {
23 	struct rc_instruction * Instruction;
24 
25 	/** Next instruction in the linked list of ready instructions. */
26 	struct schedule_instruction *NextReady;
27 
28 	/** Values that this instruction reads and writes */
29 	struct reg_value * WriteValues[4];
30 	struct reg_value * ReadValues[12];
31 	unsigned int NumWriteValues:3;
32 	unsigned int NumReadValues:4;
33 
34 	/**
35 	 * Number of (read and write) dependencies that must be resolved before
36 	 * this instruction can be scheduled.
37 	 */
38 	unsigned int NumDependencies:5;
39 
40 	/** List of all readers (see rc_get_readers() for the definition of
41 	 * "all readers"), even those outside the basic block this instruction
42 	 * lives in. */
43 	struct rc_reader_data GlobalReaders;
44 
45 	/** If the scheduler has paired an RGB and an Alpha instruction together,
46 	 * PairedInst references the alpha instruction's dependency information.
47 	 */
48 	struct schedule_instruction * PairedInst;
49 
50 	/** This scheduler uses the value of Score to determine which
51 	 * instruction to schedule.  Instructions with a higher value of Score
52 	 * will be scheduled first. */
53 	int Score;
54 
55 	/** The number of components that read from a TEX instruction. */
56 	unsigned TexReadCount;
57 
58 	/** For TEX instructions a list of readers */
59 	struct rc_list * TexReaders;
60 };
61 
62 
63 /**
64  * Used to keep track of which instructions read a value.
65  */
66 struct reg_value_reader {
67 	struct schedule_instruction *Reader;
68 	struct reg_value_reader *Next;
69 };
70 
71 /**
72  * Used to keep track which values are stored in each component of a
73  * RC_FILE_TEMPORARY.
74  */
75 struct reg_value {
76 	struct schedule_instruction * Writer;
77 
78 	/**
79 	 * Unordered linked list of instructions that read from this value.
80 	 * When this value becomes available, we increase all readers'
81 	 * dependency count.
82 	 */
83 	struct reg_value_reader *Readers;
84 
85 	/**
86 	 * Number of readers of this value. This is decremented each time
87 	 * a reader of the value is committed.
88 	 * When the reader count reaches zero, the dependency count
89 	 * of the instruction writing \ref Next is decremented.
90 	 */
91 	unsigned int NumReaders;
92 
93 	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
94 };
95 
96 struct register_state {
97 	struct reg_value * Values[4];
98 };
99 
100 struct remap_reg {
101 	struct rc_instruction * Inst;
102 	unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
103 	unsigned int OldSwizzle:3;
104 	unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
105 	unsigned int NewSwizzle:3;
106 	unsigned int OnlyTexReads:1;
107 	struct remap_reg * Next;
108 };
109 
110 struct schedule_state {
111 	struct radeon_compiler * C;
112 	struct schedule_instruction * Current;
113 	/** Array of the previous writers of Current's destination register
114 	 * indexed by channel. */
115 	struct schedule_instruction * PrevWriter[4];
116 
117 	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
118 
119 	/**
120 	 * Linked lists of instructions that can be scheduled right now,
121 	 * based on which ALU/TEX resources they require.
122 	 */
123 	/*@{*/
124 	struct schedule_instruction *ReadyFullALU;
125 	struct schedule_instruction *ReadyRGB;
126 	struct schedule_instruction *ReadyAlpha;
127 	struct schedule_instruction *ReadyTEX;
128 	/*@}*/
129 	struct rc_list *PendingTEX;
130 
131 	void (*CalcScore)(struct schedule_instruction *);
132 	long max_tex_group;
133 	unsigned PrevBlockHasTex:1;
134 	unsigned PrevBlockHasKil:1;
135 	unsigned TEXCount;
136 	unsigned Opt:1;
137 };
138 
get_reg_valuep(struct schedule_state * s,rc_register_file file,unsigned int index,unsigned int chan)139 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
140 		rc_register_file file, unsigned int index, unsigned int chan)
141 {
142 	if (file != RC_FILE_TEMPORARY)
143 		return NULL;
144 
145 	if (index >= RC_REGISTER_MAX_INDEX) {
146 		rc_error(s->C, "%s: index %i out of bounds\n", __func__, index);
147 		return NULL;
148 	}
149 
150 	return &s->Temporary[index].Values[chan];
151 }
152 
get_tex_read_count(struct schedule_instruction * sinst)153 static unsigned get_tex_read_count(struct schedule_instruction * sinst)
154 {
155 	unsigned tex_read_count = sinst->TexReadCount;
156 	if (sinst->PairedInst) {
157 		tex_read_count += sinst->PairedInst->TexReadCount;
158 	}
159 	return tex_read_count;
160 }
161 
162 #if VERBOSE
print_list(struct schedule_instruction * sinst)163 static void print_list(struct schedule_instruction * sinst)
164 {
165 	struct schedule_instruction * ptr;
166 	for (ptr = sinst; ptr; ptr=ptr->NextReady) {
167 		unsigned tex_read_count = get_tex_read_count(ptr);
168 		unsigned score = sinst->Score;
169 		fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
170 						tex_read_count);
171 	}
172 	fprintf(stderr, "\n");
173 }
174 #endif
175 
remove_inst_from_list(struct schedule_instruction ** list,struct schedule_instruction * inst)176 static void remove_inst_from_list(struct schedule_instruction ** list,
177 					struct schedule_instruction * inst)
178 {
179 	struct schedule_instruction * prev = NULL;
180 	struct schedule_instruction * list_ptr;
181 	for (list_ptr = *list; list_ptr; prev = list_ptr,
182 					list_ptr = list_ptr->NextReady) {
183 		if (list_ptr == inst) {
184 			if (prev) {
185 				prev->NextReady = inst->NextReady;
186 			} else {
187 				*list = inst->NextReady;
188 			}
189 			inst->NextReady = NULL;
190 			break;
191 		}
192 	}
193 }
194 
add_inst_to_list(struct schedule_instruction ** list,struct schedule_instruction * inst)195 static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
196 {
197 	inst->NextReady = *list;
198 	*list = inst;
199 }
200 
add_inst_to_list_score(struct schedule_instruction ** list,struct schedule_instruction * inst)201 static void add_inst_to_list_score(struct schedule_instruction ** list,
202 					struct schedule_instruction * inst)
203 {
204 	struct schedule_instruction * temp;
205 	struct schedule_instruction * prev;
206 	if (!*list) {
207 		*list = inst;
208 		return;
209 	}
210 	temp = *list;
211 	prev = NULL;
212 	while(temp && inst->Score <= temp->Score) {
213 		prev = temp;
214 		temp = temp->NextReady;
215 	}
216 
217 	if (!prev) {
218 		inst->NextReady = temp;
219 		*list = inst;
220 	} else {
221 		prev->NextReady = inst;
222 		inst->NextReady = temp;
223 	}
224 }
225 
instruction_ready(struct schedule_state * s,struct schedule_instruction * sinst)226 static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
227 {
228 	DBG("%i is now ready\n", sinst->Instruction->IP);
229 
230 	/* Adding Ready TEX instructions to the end of the "Ready List" helps
231 	 * us emit TEX instructions in blocks without losing our place. */
232 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
233 		add_inst_to_list_score(&s->ReadyTEX, sinst);
234 	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
235 		add_inst_to_list_score(&s->ReadyRGB, sinst);
236 	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
237 		add_inst_to_list_score(&s->ReadyAlpha, sinst);
238 	else
239 		add_inst_to_list_score(&s->ReadyFullALU, sinst);
240 }
241 
decrease_dependencies(struct schedule_state * s,struct schedule_instruction * sinst)242 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
243 {
244 	assert(sinst->NumDependencies > 0);
245 	sinst->NumDependencies--;
246 	if (!sinst->NumDependencies)
247 		instruction_ready(s, sinst);
248 }
249 
250 /* These functions provide different heuristics for scheduling instructions.
251  * The default is calc_score_readers. */
252 
253 #if 0
254 
255 static void calc_score_zero(struct schedule_instruction * sinst)
256 {
257 	sinst->Score = 0;
258 }
259 
260 static void calc_score_deps(struct schedule_instruction * sinst)
261 {
262 	int i;
263 	sinst->Score = 0;
264 	for (i = 0; i < sinst->NumWriteValues; i++) {
265 		struct reg_value * v = sinst->WriteValues[i];
266 		if (v->NumReaders) {
267 			struct reg_value_reader * r;
268 			for (r = v->Readers; r; r = r->Next) {
269 				if (r->Reader->NumDependencies == 1) {
270 					sinst->Score += 100;
271 				}
272 				sinst->Score += r->Reader->NumDependencies;
273 			}
274 		}
275 	}
276 }
277 
278 #endif
279 
280 #define NO_OUTPUT_SCORE (1 << 24)
281 
score_no_output(struct schedule_instruction * sinst)282 static void score_no_output(struct schedule_instruction * sinst)
283 {
284 	assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL);
285 	if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
286 			!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
287 		if (sinst->PairedInst) {
288 			if (!sinst->PairedInst->Instruction->U.P.
289 							RGB.OutputWriteMask
290 					&& !sinst->PairedInst->Instruction->U.P.
291 							Alpha.OutputWriteMask) {
292 				sinst->Score |= NO_OUTPUT_SCORE;
293 			}
294 
295 		} else {
296 			sinst->Score |= NO_OUTPUT_SCORE;
297 		}
298 	}
299 }
300 
301 #define PAIRED_SCORE (1 << 16)
302 
calc_score_r300(struct schedule_instruction * sinst)303 static void calc_score_r300(struct schedule_instruction * sinst)
304 {
305 	unsigned src_idx;
306 
307 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
308 		sinst->Score = 0;
309 		return;
310 	}
311 
312 	score_no_output(sinst);
313 
314 	if (sinst->PairedInst) {
315 		sinst->Score |= PAIRED_SCORE;
316 		return;
317 	}
318 
319 	for (src_idx = 0; src_idx < 4; src_idx++) {
320 		sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used +
321 				sinst->Instruction->U.P.Alpha.Src[src_idx].Used;
322 	}
323 }
324 
325 #define NO_READ_TEX_SCORE (1 << 16)
326 
calc_score_readers(struct schedule_instruction * sinst)327 static void calc_score_readers(struct schedule_instruction * sinst)
328 {
329 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
330 		sinst->Score = 0;
331 	} else {
332 		sinst->Score = sinst->NumReadValues;
333 		if (sinst->PairedInst) {
334 			sinst->Score += sinst->PairedInst->NumReadValues;
335 		}
336 		if (get_tex_read_count(sinst) == 0) {
337 			sinst->Score |= NO_READ_TEX_SCORE;
338 		}
339 		score_no_output(sinst);
340 	}
341 }
342 
343 /**
344  * This function decreases the dependencies of the next instruction that
345  * wants to write to each of sinst's read values.
346  */
commit_update_reads(struct schedule_state * s,struct schedule_instruction * sinst)347 static void commit_update_reads(struct schedule_state * s,
348 					struct schedule_instruction * sinst){
349 	do {
350 		for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
351 			struct reg_value * v = sinst->ReadValues[i];
352 			assert(v->NumReaders > 0);
353 			v->NumReaders--;
354 			if (!v->NumReaders) {
355 				if (v->Next) {
356 					decrease_dependencies(s, v->Next->Writer);
357 				}
358 			}
359 		}
360 	} while ((sinst = sinst->PairedInst));
361 }
362 
commit_update_writes(struct schedule_state * s,struct schedule_instruction * sinst)363 static void commit_update_writes(struct schedule_state * s,
364 					struct schedule_instruction * sinst){
365 	do {
366 		for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
367 			struct reg_value * v = sinst->WriteValues[i];
368 			if (v->NumReaders) {
369 				for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
370 					decrease_dependencies(s, r->Reader);
371 				}
372 			} else {
373 				/* This happens in instruction sequences of the type
374 				 *  OP r.x, ...;
375 				 *  OP r.x, r.x, ...;
376 				 * See also the subtlety in how instructions that both
377 				 * read and write the same register are scanned.
378 				 */
379 				if (v->Next)
380 					decrease_dependencies(s, v->Next->Writer);
381 			}
382 		}
383 	} while ((sinst = sinst->PairedInst));
384 }
385 
notify_sem_wait(struct schedule_state * s)386 static void notify_sem_wait(struct schedule_state *s)
387 {
388 	struct rc_list * pend_ptr;
389 	for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
390 		struct rc_list * read_ptr;
391 		struct schedule_instruction * pending = pend_ptr->Item;
392 		for (read_ptr = pending->TexReaders; read_ptr;
393 						read_ptr = read_ptr->Next) {
394 			struct schedule_instruction * reader = read_ptr->Item;
395 			reader->TexReadCount--;
396 		}
397 	}
398 	s->PendingTEX = NULL;
399 }
400 
commit_alu_instruction(struct schedule_state * s,struct schedule_instruction * sinst)401 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
402 {
403 	DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
404 
405 	commit_update_reads(s, sinst);
406 
407 	commit_update_writes(s, sinst);
408 
409 	if (get_tex_read_count(sinst) > 0) {
410 		sinst->Instruction->U.P.SemWait = 1;
411 		notify_sem_wait(s);
412 	}
413 }
414 
415 /**
416  * Emit all ready texture instructions in a single block.
417  *
418  * Emit as a single block to (hopefully) sample many textures in parallel,
419  * and to avoid hardware indirections on R300.
420  */
emit_all_tex(struct schedule_state * s,struct rc_instruction * before)421 static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
422 {
423 	struct schedule_instruction *readytex;
424 	struct rc_instruction * inst_begin;
425 
426 	assert(s->ReadyTEX);
427 	notify_sem_wait(s);
428 
429 	/* Node marker for R300 */
430 	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
431 	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
432 
433 	/* Link texture instructions back in */
434 	readytex = s->ReadyTEX;
435 	while(readytex) {
436 		rc_insert_instruction(before->Prev, readytex->Instruction);
437 		DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
438 
439 		/* All of the TEX instructions in the same TEX block have
440 		 * their source registers read from before any of the
441 		 * instructions in that block write to their destination
442 		 * registers.  This means that when we commit a TEX
443 		 * instruction, any other TEX instruction that wants to write
444 		 * to one of the committed instruction's source register can be
445 		 * marked as ready and should be emitted in the same TEX
446 		 * block. This prevents the following sequence from being
447 		 * emitted in two different TEX blocks:
448 		 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
449 		 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
450 		 */
451 		commit_update_reads(s, readytex);
452 		readytex = readytex->NextReady;
453 	}
454 	readytex = s->ReadyTEX;
455 	s->ReadyTEX = NULL;
456 	while(readytex){
457 		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
458 		commit_update_writes(s, readytex);
459 		/* Set semaphore bits for last TEX instruction in the block */
460 		if (!readytex->NextReady) {
461 			readytex->Instruction->U.I.TexSemAcquire = 1;
462 			readytex->Instruction->U.I.TexSemWait = 1;
463 		}
464 		rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
465 		readytex = readytex->NextReady;
466 	}
467 }
468 
469 /* This is a helper function for destructive_merge_instructions().  It helps
470  * merge presubtract sources from two instructions and makes sure the
471  * presubtract sources end up in the correct spot.  This function assumes that
472  * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
473  * but no scalar instruction (alpha).
474  * @return 0 if merging the presubtract sources fails.
475  * @return 1 if merging the presubtract sources succeeds.
476  */
merge_presub_sources(struct rc_pair_instruction * dst_full,struct rc_pair_sub_instruction src,unsigned int type)477 static int merge_presub_sources(
478 	struct rc_pair_instruction * dst_full,
479 	struct rc_pair_sub_instruction src,
480 	unsigned int type)
481 {
482 	unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
483 	struct rc_pair_sub_instruction * dst_sub;
484 	const struct rc_opcode_info * info;
485 
486 	assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
487 
488 	switch(type) {
489 	case RC_SOURCE_RGB:
490 		is_rgb = 1;
491 		is_alpha = 0;
492 		dst_sub = &dst_full->RGB;
493 		break;
494 	case RC_SOURCE_ALPHA:
495 		is_rgb = 0;
496 		is_alpha = 1;
497 		dst_sub = &dst_full->Alpha;
498 		break;
499 	default:
500 		assert(0);
501 		return 0;
502 	}
503 
504 	info = rc_get_opcode_info(dst_full->RGB.Opcode);
505 
506 	if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
507 		return 0;
508 
509 	srcp_regs = rc_presubtract_src_reg_count(
510 					src.Src[RC_PAIR_PRESUB_SRC].Index);
511 	for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
512 		unsigned int arg;
513 		int free_source;
514 		unsigned int one_way = 0;
515 		struct rc_pair_instruction_source srcp = src.Src[srcp_src];
516 		struct rc_pair_instruction_source temp;
517 
518 		free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
519 							srcp.File, srcp.Index);
520 
521 		/* If free_source < 0 then there are no free source
522 		 * slots. */
523 		if (free_source < 0)
524 			return 0;
525 
526 		temp = dst_sub->Src[srcp_src];
527 		dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
528 
529 		/* srcp needs src0 and src1 to be the same */
530 		if (free_source < srcp_src) {
531 			if (!temp.Used)
532 				continue;
533 			free_source = rc_pair_alloc_source(dst_full, is_rgb,
534 					is_alpha, temp.File, temp.Index);
535 			if (free_source < 0)
536 				return 0;
537 			one_way = 1;
538 		} else {
539 			dst_sub->Src[free_source] = temp;
540 		}
541 
542 		/* If free_source == srcp_src, then the presubtract
543 		 * source is already in the correct place. */
544 		if (free_source == srcp_src)
545 			continue;
546 
547 		/* Shuffle the sources, so we can put the
548 		 * presubtract source in the correct place. */
549 		for(arg = 0; arg < info->NumSrcRegs; arg++) {
550 			/* If the arg does read both from rgb and alpha, then we need to rewrite
551 			 * both sources and the code currently doesn't handle this.
552 			 * FIXME: This is definitely solvable, however shader-db shows it is
553 			 * not worth the effort.
554 			 */
555 			if (rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_ALPHA &&
556 				rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) & RC_SOURCE_RGB)
557 				return 0;
558 
559 			/*If this arg does not read from an rgb source,
560 			 * do nothing. */
561 			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
562 								& type)) {
563 				continue;
564 			}
565 
566 			if (dst_full->RGB.Arg[arg].Source == srcp_src)
567 				dst_full->RGB.Arg[arg].Source = free_source;
568 			/* We need to do this just in case register
569 			 * is one of the sources already, but in the
570 			 * wrong spot. */
571 			else if(dst_full->RGB.Arg[arg].Source == free_source
572 							&& !one_way) {
573 				dst_full->RGB.Arg[arg].Source = srcp_src;
574 			}
575 		}
576 	}
577 	return 1;
578 }
579 
580 
581 /* This function assumes that rgb.Alpha and alpha.RGB are unused */
destructive_merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)582 static int destructive_merge_instructions(
583 		struct rc_pair_instruction * rgb,
584 		struct rc_pair_instruction * alpha)
585 {
586 	const struct rc_opcode_info * opcode;
587 
588 	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
589 	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
590 
591 	/* Presubtract registers need to be merged first so that registers
592 	 * needed by the presubtract operation can be placed in src0 and/or
593 	 * src1. */
594 
595 	/* Merge the rgb presubtract registers. */
596 	if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
597 		if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
598 			return 0;
599 		}
600 	}
601 	/* Merge the alpha presubtract registers */
602 	if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
603 		if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
604 			return 0;
605 		}
606 	}
607 
608 	/* Copy alpha args into rgb */
609 	opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
610 
611 	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
612 		unsigned int srcrgb = 0;
613 		unsigned int srcalpha = 0;
614 		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
615 		rc_register_file file = 0;
616 		unsigned int index = 0;
617 		int source;
618 
619 		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
620 			srcrgb = 1;
621 			file = alpha->RGB.Src[oldsrc].File;
622 			index = alpha->RGB.Src[oldsrc].Index;
623 		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
624 			srcalpha = 1;
625 			file = alpha->Alpha.Src[oldsrc].File;
626 			index = alpha->Alpha.Src[oldsrc].Index;
627 		}
628 
629 		source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
630 		if (source < 0)
631 			return 0;
632 
633 		rgb->Alpha.Arg[arg].Source = source;
634 		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
635 		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
636 		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
637 	}
638 
639 	/* Copy alpha opcode into rgb */
640 	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
641 	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
642 	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
643 	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
644 	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
645 	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
646 	rgb->Alpha.Omod = alpha->Alpha.Omod;
647 
648 	/* Merge ALU result writing */
649 	if (alpha->WriteALUResult) {
650 		if (rgb->WriteALUResult)
651 			return 0;
652 
653 		rgb->WriteALUResult = alpha->WriteALUResult;
654 		rgb->ALUResultCompare = alpha->ALUResultCompare;
655 	}
656 
657 	/* Copy SemWait */
658 	rgb->SemWait |= alpha->SemWait;
659 
660 	return 1;
661 }
662 
663 /**
664  * Try to merge the given instructions into the rgb instructions.
665  *
666  * Return true on success; on failure, return false, and keep
667  * the instructions untouched.
668  */
merge_instructions(struct rc_pair_instruction * rgb,struct rc_pair_instruction * alpha)669 static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
670 {
671 	struct rc_pair_instruction backup;
672 
673 	/*Instructions can't write output registers and ALU result at the
674 	 * same time. */
675 	if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
676 		|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
677 		return 0;
678 	}
679 
680 	/* Writing output registers in the middle of shaders is slow, so
681 	 * we don't want to pair output writes with temp writes. */
682 	if ((rgb->RGB.OutputWriteMask && !alpha->Alpha.OutputWriteMask)
683 		|| (!rgb->RGB.OutputWriteMask && alpha->Alpha.OutputWriteMask)) {
684 		return 0;
685 	}
686 
687 	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
688 
689 	if (destructive_merge_instructions(rgb, alpha))
690 		return 1;
691 
692 	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
693 	return 0;
694 }
695 
presub_nop(struct rc_instruction * emitted)696 static void presub_nop(struct rc_instruction * emitted) {
697 	int prev_rgb_index, prev_alpha_index, i, num_src;
698 
699 	/* We don't need a nop if the previous instruction is a TEX. */
700 	if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
701 		return;
702 	}
703 	if (emitted->Prev->U.P.RGB.WriteMask)
704 		prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
705 	else
706 		prev_rgb_index = -1;
707 	if (emitted->Prev->U.P.Alpha.WriteMask)
708 		prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
709 	else
710 		prev_alpha_index = 1;
711 
712 	/* Check the previous rgb instruction */
713 	if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
714 		num_src = rc_presubtract_src_reg_count(
715 				emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
716 		for (i = 0; i < num_src; i++) {
717 			unsigned int index = emitted->U.P.RGB.Src[i].Index;
718 			if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
719 			    && (index  == prev_rgb_index
720 				|| index == prev_alpha_index)) {
721 				emitted->Prev->U.P.Nop = 1;
722 				return;
723 			}
724 		}
725 	}
726 
727 	/* Check the previous alpha instruction. */
728 	if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
729 		return;
730 
731 	num_src = rc_presubtract_src_reg_count(
732 				emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
733 	for (i = 0; i < num_src; i++) {
734 		unsigned int index = emitted->U.P.Alpha.Src[i].Index;
735 		if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
736 		   && (index == prev_rgb_index || index == prev_alpha_index)) {
737 			emitted->Prev->U.P.Nop = 1;
738 			return;
739 		}
740 	}
741 }
742 
rgb_to_alpha_remap(struct schedule_state * s,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,rc_register_file old_file,rc_swizzle old_swz,unsigned int new_index)743 static void rgb_to_alpha_remap (
744 	struct schedule_state * s,
745 	struct rc_instruction * inst,
746 	struct rc_pair_instruction_arg * arg,
747 	rc_register_file old_file,
748 	rc_swizzle old_swz,
749 	unsigned int new_index)
750 {
751 	int new_src_index;
752 	unsigned int i;
753 
754 	for (i = 0; i < 3; i++) {
755 		if (get_swz(arg->Swizzle, i) == old_swz) {
756 			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
757 		}
758 	}
759 	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
760 							old_file, new_index);
761 	/* This conversion is not possible, we must have made a mistake in
762 	 * is_rgb_to_alpha_possible. */
763 	if (new_src_index < 0) {
764         rc_error(s->C, "rgb_to_alpha_remap failed to allocate src.\n");
765 		return;
766 	}
767 
768 	arg->Source = new_src_index;
769 }
770 
can_remap(unsigned int opcode)771 static int can_remap(unsigned int opcode)
772 {
773 	switch(opcode) {
774 	case RC_OPCODE_DDX:
775 	case RC_OPCODE_DDY:
776 		return 0;
777 	default:
778 		return 1;
779 	}
780 }
781 
can_convert_opcode_to_alpha(unsigned int opcode)782 static int can_convert_opcode_to_alpha(unsigned int opcode)
783 {
784 	switch(opcode) {
785 	case RC_OPCODE_DDX:
786 	case RC_OPCODE_DDY:
787 	case RC_OPCODE_DP2:
788 	case RC_OPCODE_DP3:
789 	case RC_OPCODE_DP4:
790 		return 0;
791 	default:
792 		return 1;
793 	}
794 }
795 
is_rgb_to_alpha_possible(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)796 static void is_rgb_to_alpha_possible(
797 	void * userdata,
798 	struct rc_instruction * inst,
799 	struct rc_pair_instruction_arg * arg,
800 	struct rc_pair_instruction_source * src)
801 {
802 	unsigned int read_chan = RC_SWIZZLE_UNUSED;
803 	unsigned int alpha_sources = 0;
804 	unsigned int i;
805 	struct rc_reader_data * reader_data = userdata;
806 
807 	if (!can_remap(inst->U.P.RGB.Opcode)
808 	    || !can_remap(inst->U.P.Alpha.Opcode)) {
809 		reader_data->Abort = 1;
810 		return;
811 	}
812 
813 	if (!src)
814 		return;
815 
816 	/* XXX There are some cases where we can still do the conversion if
817 	 * a reader reads from a presubtract source, but for now we'll prevent
818 	 * it. */
819 	if (arg->Source == RC_PAIR_PRESUB_SRC) {
820 		reader_data->Abort = 1;
821 		return;
822 	}
823 
824 	/* Make sure the source only reads the register component that we
825 	 * are going to be converting from.  It is OK if the instruction uses
826 	 * this component more than once.
827 	 * XXX If the index we will be converting to is the same as the
828 	 * current index, then it is OK to read from more than one component.
829 	 */
830 	for (i = 0; i < 3; i++) {
831 		rc_swizzle swz = get_swz(arg->Swizzle, i);
832 		switch(swz) {
833 		case RC_SWIZZLE_X:
834 		case RC_SWIZZLE_Y:
835 		case RC_SWIZZLE_Z:
836 		case RC_SWIZZLE_W:
837 			if (read_chan == RC_SWIZZLE_UNUSED) {
838 				read_chan = swz;
839 			} else if (read_chan != swz) {
840 				reader_data->Abort = 1;
841 				return;
842 			}
843 			break;
844 		default:
845 			break;
846 		}
847 	}
848 
849 	/* Make sure there are enough alpha sources.
850 	 * XXX If we know what register all the readers are going
851 	 * to be remapped to, then in some situations we can still do
852 	 * the substitution, even if all 3 alpha sources are being used.*/
853 	for (i = 0; i < 3; i++) {
854 		if (inst->U.P.Alpha.Src[i].Used) {
855 			alpha_sources++;
856 		}
857 	}
858 	if (alpha_sources > 2) {
859 		reader_data->Abort = 1;
860 		return;
861 	}
862 }
863 
convert_rgb_to_alpha(struct schedule_state * s,struct schedule_instruction * sched_inst)864 static int convert_rgb_to_alpha(
865 	struct schedule_state * s,
866 	struct schedule_instruction * sched_inst)
867 {
868 	struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
869 	unsigned int old_mask = pair_inst->RGB.WriteMask;
870 	unsigned int old_swz = rc_mask_to_swizzle(old_mask);
871 	const struct rc_opcode_info * info =
872 				rc_get_opcode_info(pair_inst->RGB.Opcode);
873 	int new_index = -1;
874 	unsigned int i;
875 
876 	if (sched_inst->GlobalReaders.Abort)
877 		return 0;
878 
879 	/* Even though we checked that we can convert to alpha previously, it is
880 	 * possible that another rgb source of the reader instructions was already
881 	 * converted to alpha and we thus have no longer free alpha sources.
882 	 */
883 	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
884 		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
885 		if (reader.Inst->U.P.Alpha.Src[2].Used)
886 			return 0;
887 	}
888 
889 	if (!pair_inst->RGB.WriteMask)
890 		return 0;
891 
892 	if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
893 	    || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
894 		return 0;
895 	}
896 
897 	assert(sched_inst->NumWriteValues == 1);
898 
899 	if (!sched_inst->WriteValues[0]) {
900 		assert(0);
901 		return 0;
902 	}
903 
904 	/* We start at the old index, because if we can reuse the same
905 	 * register and just change the swizzle then it is more likely we
906 	 * will be able to convert all the readers. */
907 	for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
908 		struct reg_value ** new_regvalp = get_reg_valuep(
909 						s, RC_FILE_TEMPORARY, i, 3);
910 		if (!*new_regvalp) {
911 			struct reg_value ** old_regvalp =
912 				get_reg_valuep(s,
913 					RC_FILE_TEMPORARY,
914 					pair_inst->RGB.DestIndex,
915 					rc_mask_to_swizzle(old_mask));
916 			new_index = i;
917 			*new_regvalp = *old_regvalp;
918 			break;
919 		}
920 	}
921 	if (new_index < 0) {
922 		return 0;
923 	}
924 
925 	/* If we are converting a full instruction with RC_OPCODE_REPL_ALPHA
926 	 * as the RGB opcode, then the Alpha instruction will already contain
927 	 * the correct opcode and instruction args, so we do not want to
928 	 * overwrite them.
929 	 */
930 	if (pair_inst->RGB.Opcode != RC_OPCODE_REPL_ALPHA) {
931 		pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
932 		memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
933 						sizeof(pair_inst->Alpha.Arg));
934 	}
935 	pair_inst->Alpha.DestIndex = new_index;
936 	pair_inst->Alpha.WriteMask = RC_MASK_W;
937 	pair_inst->Alpha.Target = pair_inst->RGB.Target;
938 	pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
939 	pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
940 	pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
941 	pair_inst->Alpha.Omod = pair_inst->RGB.Omod;
942 	/* Move the swizzles into the first chan */
943 	for (i = 0; i < info->NumSrcRegs; i++) {
944 		unsigned int j;
945 		for (j = 0; j < 3; j++) {
946 			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
947 			if (swz != RC_SWIZZLE_UNUSED) {
948 				pair_inst->Alpha.Arg[i].Swizzle =
949 							rc_init_swizzle(swz, 1);
950 				break;
951 			}
952 		}
953 	}
954 	pair_inst->RGB.Opcode = RC_OPCODE_NOP;
955 	pair_inst->RGB.DestIndex = 0;
956 	pair_inst->RGB.WriteMask = 0;
957 	pair_inst->RGB.Target = 0;
958 	pair_inst->RGB.OutputWriteMask = 0;
959 	pair_inst->RGB.DepthWriteMask = 0;
960 	pair_inst->RGB.Saturate = 0;
961 	memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
962 
963 	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
964 		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
965 		rgb_to_alpha_remap(s, reader.Inst, reader.U.P.Arg,
966 					RC_FILE_TEMPORARY, old_swz, new_index);
967 	}
968 	return 1;
969 }
970 
try_convert_and_pair(struct schedule_state * s,struct schedule_instruction ** inst_list)971 static void try_convert_and_pair(
972 	struct schedule_state *s,
973 	struct schedule_instruction ** inst_list)
974 {
975 	struct schedule_instruction * list_ptr = *inst_list;
976 	while (list_ptr && *inst_list && (*inst_list)->NextReady) {
977 		int paired = 0;
978 		if (list_ptr->Instruction->U.P.Alpha.Opcode != RC_OPCODE_NOP
979 			&& list_ptr->Instruction->U.P.RGB.Opcode
980 						!= RC_OPCODE_REPL_ALPHA) {
981 				goto next;
982 		}
983 		if (list_ptr->NumWriteValues == 1
984 					&& convert_rgb_to_alpha(s, list_ptr)) {
985 
986 			struct schedule_instruction * pair_ptr;
987 			remove_inst_from_list(inst_list, list_ptr);
988 			add_inst_to_list_score(&s->ReadyAlpha, list_ptr);
989 
990 			for (pair_ptr = s->ReadyRGB; pair_ptr;
991 					pair_ptr = pair_ptr->NextReady) {
992 				if (merge_instructions(&pair_ptr->Instruction->U.P,
993 						&list_ptr->Instruction->U.P)) {
994 					remove_inst_from_list(&s->ReadyAlpha, list_ptr);
995 					remove_inst_from_list(&s->ReadyRGB, pair_ptr);
996 					pair_ptr->PairedInst = list_ptr;
997 
998 					add_inst_to_list(&s->ReadyFullALU, pair_ptr);
999 					list_ptr = *inst_list;
1000 					paired = 1;
1001 					break;
1002 				}
1003 
1004 			}
1005 		}
1006 		if (!paired) {
1007 next:
1008 			list_ptr = list_ptr->NextReady;
1009 		}
1010 	}
1011 }
1012 
1013 /**
1014  * This function attempts to merge RGB and Alpha instructions together.
1015  */
pair_instructions(struct schedule_state * s)1016 static void pair_instructions(struct schedule_state * s)
1017 {
1018 	struct schedule_instruction *rgb_ptr;
1019 	struct schedule_instruction *alpha_ptr;
1020 
1021 	/* Some pairings might fail because they require too
1022 	 * many source slots; try all possible pairings if necessary */
1023 	rgb_ptr = s->ReadyRGB;
1024 	while(rgb_ptr) {
1025 		struct schedule_instruction * rgb_next = rgb_ptr->NextReady;
1026 		alpha_ptr = s->ReadyAlpha;
1027 		while(alpha_ptr) {
1028 			struct schedule_instruction * alpha_next = alpha_ptr->NextReady;
1029 			if (merge_instructions(&rgb_ptr->Instruction->U.P, &alpha_ptr->Instruction->U.P)) {
1030 				/* Remove RGB and Alpha from their ready lists.
1031 				 */
1032 				remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
1033 				remove_inst_from_list(&s->ReadyAlpha, alpha_ptr);
1034 				rgb_ptr->PairedInst = alpha_ptr;
1035 				add_inst_to_list(&s->ReadyFullALU, rgb_ptr);
1036 				break;
1037 			}
1038 			alpha_ptr = alpha_next;
1039 		}
1040 		rgb_ptr = rgb_next;
1041 	}
1042 
1043 	if (!s->Opt) {
1044 		return;
1045 	}
1046 
1047 	/* Full instructions that have RC_OPCODE_REPL_ALPHA in the RGB
1048 	 * slot can be converted into Alpha instructions. */
1049 	try_convert_and_pair(s, &s->ReadyFullALU);
1050 
1051 	/* Try to convert some of the RGB instructions to Alpha and
1052 	 * try to pair it with another RGB. */
1053 	try_convert_and_pair(s, &s->ReadyRGB);
1054 }
1055 
update_max_score(struct schedule_state * s,struct schedule_instruction ** list,int * max_score,struct schedule_instruction ** max_inst_out,struct schedule_instruction *** list_out)1056 static void update_max_score(
1057 	struct schedule_state * s,
1058 	struct schedule_instruction ** list,
1059 	int * max_score,
1060 	struct schedule_instruction ** max_inst_out,
1061 	struct schedule_instruction *** list_out)
1062 {
1063 	struct schedule_instruction * list_ptr;
1064 	for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
1065 		int score;
1066 		s->CalcScore(list_ptr);
1067 		score = list_ptr->Score;
1068 		if (!*max_inst_out || score > *max_score) {
1069 			*max_score = score;
1070 			*max_inst_out = list_ptr;
1071 			*list_out = list;
1072 		}
1073 	}
1074 }
1075 
emit_instruction(struct schedule_state * s,struct rc_instruction * before)1076 static void emit_instruction(
1077 	struct schedule_state * s,
1078 	struct rc_instruction * before)
1079 {
1080 	int max_score = -1;
1081 	struct schedule_instruction * max_inst = NULL;
1082 	struct schedule_instruction ** max_list = NULL;
1083 	unsigned tex_count = 0;
1084 	struct schedule_instruction * tex_ptr;
1085 
1086 	pair_instructions(s);
1087 #if VERBOSE
1088 	fprintf(stderr, "Full:\n");
1089 	print_list(s->ReadyFullALU);
1090 	fprintf(stderr, "RGB:\n");
1091 	print_list(s->ReadyRGB);
1092 	fprintf(stderr, "Alpha:\n");
1093 	print_list(s->ReadyAlpha);
1094 	fprintf(stderr, "TEX:\n");
1095 	print_list(s->ReadyTEX);
1096 #endif
1097 
1098 	for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
1099 		if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) {
1100 			emit_all_tex(s, before);
1101 			s->PrevBlockHasKil = 1;
1102 			return;
1103 		}
1104 		tex_count++;
1105 	}
1106 	update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
1107 	update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
1108 	update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
1109 
1110 	if (tex_count >= s->max_tex_group || max_score == -1
1111 		|| (s->TEXCount > 0 && tex_count == s->TEXCount)
1112 		|| (tex_count > 0 && max_score < NO_OUTPUT_SCORE)) {
1113 		emit_all_tex(s, before);
1114 	} else {
1115 
1116 
1117 		remove_inst_from_list(max_list, max_inst);
1118 		rc_insert_instruction(before->Prev, max_inst->Instruction);
1119 		commit_alu_instruction(s, max_inst);
1120 
1121 		presub_nop(before->Prev);
1122 	}
1123 }
1124 
add_tex_reader(struct schedule_state * s,struct schedule_instruction * writer,struct schedule_instruction * reader)1125 static void add_tex_reader(
1126 	struct schedule_state * s,
1127 	struct schedule_instruction * writer,
1128 	struct schedule_instruction * reader)
1129 {
1130 	if (!writer || writer->Instruction->Type != RC_INSTRUCTION_NORMAL) {
1131 		/*Not a TEX instructions */
1132 		return;
1133 	}
1134 	reader->TexReadCount++;
1135 	rc_list_add(&writer->TexReaders, rc_list(&s->C->Pool, reader));
1136 }
1137 
scan_read(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1138 static void scan_read(void * data, struct rc_instruction * inst,
1139 		rc_register_file file, unsigned int index, unsigned int chan)
1140 {
1141 	struct schedule_state * s = data;
1142 	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
1143 	struct reg_value_reader * reader;
1144 
1145 	if (!v)
1146 		return;
1147 
1148 	if (*v && (*v)->Writer == s->Current) {
1149 		/* The instruction reads and writes to a register component.
1150 		 * In this case, we only want to increment dependencies by one.
1151 		 * Why?
1152 		 * Because each instruction depends on the writers of its source
1153 		 * registers _and_ the most recent writer of its destination
1154 		 * register.  In this case, the current instruction (s->Current)
1155 		 * has a dependency that both writes to one of its source
1156 		 * registers and was the most recent writer to its destination
1157 		 * register.  We have already marked this dependency in
1158 		 * scan_write(), so we don't need to do it again.
1159 		 */
1160 
1161 		/* We need to make sure we are adding s->Current to the
1162 		 * previous writer's list of TexReaders, if the previous writer
1163 		 * was a TEX instruction.
1164 		 */
1165 		add_tex_reader(s, s->PrevWriter[chan], s->Current);
1166 
1167 		return;
1168 	}
1169 
1170 	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1171 
1172 	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
1173 	reader->Reader = s->Current;
1174 	if (!*v) {
1175 		/* In this situation, the instruction reads from a register
1176 		 * that hasn't been written to or read from in the current
1177 		 * block. */
1178 		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
1179 		memset(*v, 0, sizeof(struct reg_value));
1180 		(*v)->Readers = reader;
1181 	} else {
1182 		reader->Next = (*v)->Readers;
1183 		(*v)->Readers = reader;
1184 		/* Only update the current instruction's dependencies if the
1185 		 * register it reads from has been written to in this block. */
1186 		if ((*v)->Writer) {
1187 			add_tex_reader(s, (*v)->Writer, s->Current);
1188 			s->Current->NumDependencies++;
1189 		}
1190 	}
1191 	(*v)->NumReaders++;
1192 
1193 	if (s->Current->NumReadValues >= 12) {
1194 		rc_error(s->C, "%s: NumReadValues overflow\n", __func__);
1195 	} else {
1196 		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
1197 	}
1198 }
1199 
scan_write(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int chan)1200 static void scan_write(void * data, struct rc_instruction * inst,
1201 		rc_register_file file, unsigned int index, unsigned int chan)
1202 {
1203 	struct schedule_state * s = data;
1204 	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
1205 	struct reg_value * newv;
1206 
1207 	if (!pv)
1208 		return;
1209 
1210 	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
1211 
1212 	newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
1213 	memset(newv, 0, sizeof(*newv));
1214 
1215 	newv->Writer = s->Current;
1216 
1217 	if (*pv) {
1218 		(*pv)->Next = newv;
1219 		s->Current->NumDependencies++;
1220 		/* Keep track of the previous writer to s->Current's destination
1221 		 * register */
1222 		s->PrevWriter[chan] = (*pv)->Writer;
1223 	}
1224 
1225 	*pv = newv;
1226 
1227 	if (s->Current->NumWriteValues >= 4) {
1228 		rc_error(s->C, "%s: NumWriteValues overflow\n", __func__);
1229 	} else {
1230 		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
1231 	}
1232 }
1233 
is_rgb_to_alpha_possible_normal(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)1234 static void is_rgb_to_alpha_possible_normal(
1235 	void * userdata,
1236 	struct rc_instruction * inst,
1237 	struct rc_src_register * src)
1238 {
1239 	struct rc_reader_data * reader_data = userdata;
1240 	reader_data->Abort = 1;
1241 
1242 }
1243 
schedule_block(struct schedule_state * s,struct rc_instruction * begin,struct rc_instruction * end)1244 static void schedule_block(struct schedule_state * s,
1245 		struct rc_instruction * begin, struct rc_instruction * end)
1246 {
1247 	unsigned int ip;
1248 
1249 	/* Scan instructions for data dependencies */
1250 	ip = 0;
1251 	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
1252 		s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
1253 		memset(s->Current, 0, sizeof(struct schedule_instruction));
1254 
1255 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
1256 			const struct rc_opcode_info * info =
1257 					rc_get_opcode_info(inst->U.I.Opcode);
1258 			if (info->HasTexture) {
1259 				s->TEXCount++;
1260 			}
1261 		}
1262 
1263 		/* XXX: This causes SemWait to be set for all instructions in
1264 		 * a block if the previous block contained a TEX instruction.
1265 		 * We can do better here, but it will take a lot of work. */
1266 		if (s->PrevBlockHasTex) {
1267 			s->Current->TexReadCount = 1;
1268 		}
1269 
1270 		s->Current->Instruction = inst;
1271 		inst->IP = ip++;
1272 
1273 		DBG("%i: Scanning\n", inst->IP);
1274 
1275 		/* The order of things here is subtle and maybe slightly
1276 		 * counter-intuitive, to account for the case where an
1277 		 * instruction writes to the same register as it reads
1278 		 * from. */
1279 		rc_for_all_writes_chan(inst, &scan_write, s);
1280 		rc_for_all_reads_chan(inst, &scan_read, s);
1281 
1282 		DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
1283 
1284 		if (!s->Current->NumDependencies) {
1285 			instruction_ready(s, s->Current);
1286 		}
1287 
1288 		/* Get global readers for possible RGB->Alpha conversion. */
1289 		s->Current->GlobalReaders.ExitOnAbort = 1;
1290 		rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
1291 				is_rgb_to_alpha_possible_normal,
1292 				is_rgb_to_alpha_possible, NULL);
1293 	}
1294 
1295 	/* Temporarily unlink all instructions */
1296 	begin->Prev->Next = end;
1297 	end->Prev = begin->Prev;
1298 
1299 	/* Schedule instructions back */
1300 	while(!s->C->Error &&
1301 	      (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
1302 		emit_instruction(s, end);
1303 	}
1304 }
1305 
is_controlflow(struct rc_instruction * inst)1306 static int is_controlflow(struct rc_instruction * inst)
1307 {
1308 	if (inst->Type == RC_INSTRUCTION_NORMAL) {
1309 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
1310 		return opcode->IsFlowControl;
1311 	}
1312 	return 0;
1313 }
1314 
rc_pair_schedule(struct radeon_compiler * cc,void * user)1315 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
1316 {
1317 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
1318 	struct schedule_state s;
1319 	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
1320 	unsigned int * opt = user;
1321 
1322 	memset(&s, 0, sizeof(s));
1323 	s.Opt = *opt;
1324 	s.C = &c->Base;
1325 	if (s.C->is_r500) {
1326 		s.CalcScore = calc_score_readers;
1327 	} else {
1328 		s.CalcScore = calc_score_r300;
1329 	}
1330 	s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
1331 	while(inst != &c->Base.Program.Instructions) {
1332 		struct rc_instruction * first;
1333 
1334 		if (is_controlflow(inst)) {
1335 			/* The TexSemWait flag is already properly set for ALU
1336 			 * instructions using the results of normal TEX lookup,
1337 			 * however it was found empirically that TEXKIL also needs
1338 			 * synchronization with the control flow. This might not be optimal,
1339 			 * however the docs don't offer any guidance in this matter.
1340 			 */
1341 			if (s.PrevBlockHasKil) {
1342 				inst->U.I.TexSemWait = 1;
1343 				s.PrevBlockHasKil = 0;
1344 			}
1345 			inst = inst->Next;
1346 			continue;
1347 		}
1348 
1349 		first = inst;
1350 
1351 		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
1352 			inst = inst->Next;
1353 
1354 		DBG("Schedule one block\n");
1355 		memset(s.Temporary, 0, sizeof(s.Temporary));
1356 		s.TEXCount = 0;
1357 		schedule_block(&s, first, inst);
1358 		if (s.PendingTEX) {
1359 			s.PrevBlockHasTex = 1;
1360 		}
1361 	}
1362 }
1363