1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_optimizer.h"
8
9 #include "sfn_debug.h"
10 #include "sfn_instr_alugroup.h"
11 #include "sfn_instr_controlflow.h"
12 #include "sfn_instr_export.h"
13 #include "sfn_instr_fetch.h"
14 #include "sfn_instr_lds.h"
15 #include "sfn_instr_mem.h"
16 #include "sfn_instr_tex.h"
17 #include "sfn_peephole.h"
18 #include "sfn_valuefactory.h"
19 #include "sfn_virtualvalues.h"
20
21 #include <sstream>
22
23 namespace r600 {
24
25 bool
optimize(Shader & shader)26 optimize(Shader& shader)
27 {
28 bool progress;
29
30 sfn_log << SfnLog::opt << "Shader before optimization\n";
31 if (sfn_log.has_debug_flag(SfnLog::opt)) {
32 std::stringstream ss;
33 shader.print(ss);
34 sfn_log << ss.str() << "\n\n";
35 }
36
37 do {
38 progress = false;
39 progress |= copy_propagation_fwd(shader);
40 progress |= dead_code_elimination(shader);
41 progress |= copy_propagation_backward(shader);
42 progress |= dead_code_elimination(shader);
43 progress |= simplify_source_vectors(shader);
44 progress |= peephole(shader);
45 progress |= dead_code_elimination(shader);
46 } while (progress);
47
48 return progress;
49 }
50
51 class DCEVisitor : public InstrVisitor {
52 public:
53 DCEVisitor();
54
55 void visit(AluInstr *instr) override;
56 void visit(AluGroup *instr) override;
57 void visit(TexInstr *instr) override;
visit(ExportInstr * instr)58 void visit(ExportInstr *instr) override { (void)instr; };
59 void visit(FetchInstr *instr) override;
60 void visit(Block *instr) override;
61
visit(ControlFlowInstr * instr)62 void visit(ControlFlowInstr *instr) override { (void)instr; };
visit(IfInstr * instr)63 void visit(IfInstr *instr) override { (void)instr; };
visit(ScratchIOInstr * instr)64 void visit(ScratchIOInstr *instr) override { (void)instr; };
visit(StreamOutInstr * instr)65 void visit(StreamOutInstr *instr) override { (void)instr; };
visit(MemRingOutInstr * instr)66 void visit(MemRingOutInstr *instr) override { (void)instr; };
visit(EmitVertexInstr * instr)67 void visit(EmitVertexInstr *instr) override { (void)instr; };
visit(GDSInstr * instr)68 void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)69 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)70 void visit(LDSAtomicInstr *instr) override { (void)instr; };
71 void visit(LDSReadInstr *instr) override;
visit(RatInstr * instr)72 void visit(RatInstr *instr) override { (void)instr; };
73
74 bool progress;
75 };
76
77 bool
dead_code_elimination(Shader & shader)78 dead_code_elimination(Shader& shader)
79 {
80 DCEVisitor dce;
81
82 do {
83
84 sfn_log << SfnLog::opt << "start dce run\n";
85
86 dce.progress = false;
87 for (auto& b : shader.func())
88 b->accept(dce);
89
90 sfn_log << SfnLog::opt << "finished dce run\n\n";
91
92 } while (dce.progress);
93
94 sfn_log << SfnLog::opt << "Shader after DCE\n";
95 if (sfn_log.has_debug_flag(SfnLog::opt)) {
96 std::stringstream ss;
97 shader.print(ss);
98 sfn_log << ss.str() << "\n\n";
99 }
100
101 return dce.progress;
102 }
103
DCEVisitor()104 DCEVisitor::DCEVisitor():
105 progress(false)
106 {
107 }
108
109 void
visit(AluInstr * instr)110 DCEVisitor::visit(AluInstr *instr)
111 {
112 sfn_log << SfnLog::opt << "DCE: visit '" << *instr;
113
114 if (instr->has_instr_flag(Instr::dead))
115 return;
116
117 if (instr->dest() && (instr->dest()->has_uses())) {
118 sfn_log << SfnLog::opt << " dest used\n";
119 return;
120 }
121
122 switch (instr->opcode()) {
123 case op2_kille:
124 case op2_killne:
125 case op2_kille_int:
126 case op2_killne_int:
127 case op2_killge:
128 case op2_killge_int:
129 case op2_killge_uint:
130 case op2_killgt:
131 case op2_killgt_int:
132 case op2_killgt_uint:
133 case op0_group_barrier:
134 sfn_log << SfnLog::opt << " never kill\n";
135 return;
136 default:;
137 }
138
139 bool dead = instr->set_dead();
140 sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n";
141 progress |= dead;
142 }
143
144 void
visit(LDSReadInstr * instr)145 DCEVisitor::visit(LDSReadInstr *instr)
146 {
147 sfn_log << SfnLog::opt << "visit " << *instr << "\n";
148 progress |= instr->remove_unused_components();
149 }
150
151 void
visit(AluGroup * instr)152 DCEVisitor::visit(AluGroup *instr)
153 {
154 /* Groups are created because the instructions are used together
155 * so don't try to eliminate code there */
156 (void)instr;
157 }
158
159 void
visit(TexInstr * instr)160 DCEVisitor::visit(TexInstr *instr)
161 {
162 auto& dest = instr->dst();
163
164 bool has_uses = false;
165 RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
166 for (int i = 0; i < 4; ++i) {
167 if (!dest[i]->has_uses())
168 swz[i] = 7;
169 else
170 has_uses |= true;
171 }
172 instr->set_dest_swizzle(swz);
173
174 if (has_uses)
175 return;
176
177 progress |= instr->set_dead();
178 }
179
180 void
visit(FetchInstr * instr)181 DCEVisitor::visit(FetchInstr *instr)
182 {
183 auto& dest = instr->dst();
184
185 bool has_uses = false;
186 RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
187 for (int i = 0; i < 4; ++i) {
188 if (!dest[i]->has_uses())
189 swz[i] = 7;
190 else
191 has_uses |= true;
192 }
193 instr->set_dest_swizzle(swz);
194
195 if (has_uses)
196 return;
197
198 sfn_log << SfnLog::opt << "set dead: " << *instr << "\n";
199
200 progress |= instr->set_dead();
201 }
202
203 void
visit(Block * block)204 DCEVisitor::visit(Block *block)
205 {
206 auto i = block->begin();
207 auto e = block->end();
208 while (i != e) {
209 auto n = i++;
210 if (!(*n)->keep()) {
211 (*n)->accept(*this);
212 if ((*n)->is_dead()) {
213 block->erase(n);
214 }
215 }
216 }
217 }
218
219 class CopyPropFwdVisitor : public InstrVisitor {
220 public:
221 CopyPropFwdVisitor(ValueFactory& vf);
222
223 void visit(AluInstr *instr) override;
224 void visit(AluGroup *instr) override;
225 void visit(TexInstr *instr) override;
226 void visit(ExportInstr *instr) override;
227 void visit(FetchInstr *instr) override;
228 void visit(Block *instr) override;
visit(ControlFlowInstr * instr)229 void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)230 void visit(IfInstr *instr) override { (void)instr; }
visit(ScratchIOInstr * instr)231 void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)232 void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)233 void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)234 void visit(EmitVertexInstr *instr) override { (void)instr; }
235 void visit(GDSInstr *instr) override;
visit(WriteTFInstr * instr)236 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(RatInstr * instr)237 void visit(RatInstr *instr) override { (void)instr; };
238
239 // TODO: these two should use copy propagation
visit(LDSAtomicInstr * instr)240 void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)241 void visit(LDSReadInstr *instr) override { (void)instr; };
242
243 void propagate_to(RegisterVec4& src, Instr *instr);
244 bool assigned_register_direct(PRegister reg);
245
246 ValueFactory& value_factory;
247 bool progress;
248 };
249
250 class CopyPropBackVisitor : public InstrVisitor {
251 public:
252 CopyPropBackVisitor();
253
254 void visit(AluInstr *instr) override;
255 void visit(AluGroup *instr) override;
256 void visit(TexInstr *instr) override;
visit(ExportInstr * instr)257 void visit(ExportInstr *instr) override { (void)instr; }
258 void visit(FetchInstr *instr) override;
259 void visit(Block *instr) override;
visit(ControlFlowInstr * instr)260 void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)261 void visit(IfInstr *instr) override { (void)instr; }
visit(ScratchIOInstr * instr)262 void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)263 void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)264 void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)265 void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)266 void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)267 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)268 void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)269 void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)270 void visit(RatInstr *instr) override { (void)instr; };
271
272 bool progress;
273 };
274
275 bool
copy_propagation_fwd(Shader & shader)276 copy_propagation_fwd(Shader& shader)
277 {
278 auto& root = shader.func();
279 CopyPropFwdVisitor copy_prop(shader.value_factory());
280
281 do {
282 copy_prop.progress = false;
283 for (auto b : root)
284 b->accept(copy_prop);
285 } while (copy_prop.progress);
286
287 sfn_log << SfnLog::opt << "Shader after Copy Prop forward\n";
288 if (sfn_log.has_debug_flag(SfnLog::opt)) {
289 std::stringstream ss;
290 shader.print(ss);
291 sfn_log << ss.str() << "\n\n";
292 }
293
294 return copy_prop.progress;
295 }
296
297 bool
copy_propagation_backward(Shader & shader)298 copy_propagation_backward(Shader& shader)
299 {
300 CopyPropBackVisitor copy_prop;
301
302 do {
303 copy_prop.progress = false;
304 for (auto b : shader.func())
305 b->accept(copy_prop);
306 } while (copy_prop.progress);
307
308 sfn_log << SfnLog::opt << "Shader after Copy Prop backwards\n";
309 if (sfn_log.has_debug_flag(SfnLog::opt)) {
310 std::stringstream ss;
311 shader.print(ss);
312 sfn_log << ss.str() << "\n\n";
313 }
314
315 return copy_prop.progress;
316 }
317
CopyPropFwdVisitor(ValueFactory & vf)318 CopyPropFwdVisitor::CopyPropFwdVisitor(ValueFactory& vf):
319 value_factory(vf),
320 progress(false)
321 {
322 }
323
324 void
visit(AluInstr * instr)325 CopyPropFwdVisitor::visit(AluInstr *instr)
326 {
327 sfn_log << SfnLog::opt << "CopyPropFwdVisitor:[" << instr->block_id() << ":"
328 << instr->index() << "] " << *instr << " dset=" << instr->dest() << " ";
329
330 if (instr->dest()) {
331 sfn_log << SfnLog::opt << "has uses; " << instr->dest()->uses().size();
332 }
333
334 sfn_log << SfnLog::opt << "\n";
335
336 if (!instr->can_propagate_src()) {
337 return;
338 }
339
340 auto src = instr->psrc(0);
341 auto dest = instr->dest();
342
343 /* Don't propagate an indirect load to more than one
344 * instruction, because we may have to split the address loads
345 * creating more instructions */
346 if (dest->uses().size() > 1) {
347 auto [addr, is_for_dest, index] = instr->indirect_addr();
348 if (addr && !is_for_dest)
349 return;
350 }
351
352
353 auto ii = dest->uses().begin();
354 auto ie = dest->uses().end();
355
356 auto mov_block_id = instr->block_id();
357
358 /** libc++ seems to invalidate the end iterator too if a std::set is
359 * made empty by an erase operation,
360 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7931
361 */
362 while(ii != ie && !dest->uses().empty()) {
363 auto i = *ii;
364 auto target_block_id = i->block_id();
365
366 ++ii;
367 /* SSA can always be propagated, registers only in the same block
368 * and only if they are assigned in the same block */
369 bool dest_can_propagate = dest->has_flag(Register::ssa);
370
371 if (!dest_can_propagate) {
372
373 /* Register can propagate if the assignment was in the same
374 * block, and we don't have a second assignment coming later
375 * (e.g. helper invocation evaluation does
376 *
377 * 1: MOV R0.x, -1
378 * 2: FETCH R0.0 VPM
379 * 3: MOV SN.x, R0.x
380 *
381 * Here we can't prpagate the move in 1 to SN.x in 3 */
382 if ((mov_block_id == target_block_id && instr->index() < i->index())) {
383 dest_can_propagate = true;
384 if (dest->parents().size() > 1) {
385 for (auto p : dest->parents()) {
386 if (p->block_id() == i->block_id() && p->index() > instr->index()) {
387 dest_can_propagate = false;
388 break;
389 }
390 }
391 }
392 }
393 }
394 bool move_addr_use = false;
395 bool src_can_propagate = false;
396 if (auto rsrc = src->as_register()) {
397 if (rsrc->has_flag(Register::ssa)) {
398 src_can_propagate = true;
399 } else if (mov_block_id == target_block_id) {
400 if (auto a = rsrc->addr()) {
401 if (a->as_register() &&
402 !a->as_register()->has_flag(Register::addr_or_idx) &&
403 i->block_id() == mov_block_id &&
404 i->index() == instr->index() + 1) {
405 src_can_propagate = true;
406 move_addr_use = true;
407 }
408 } else {
409 src_can_propagate = true;
410 }
411 for (auto p : rsrc->parents()) {
412 if (p->block_id() == mov_block_id &&
413 p->index() > instr->index() &&
414 p->index() < i->index()) {
415 src_can_propagate = false;
416 break;
417 }
418 }
419 }
420 } else {
421 src_can_propagate = true;
422 }
423
424 if (dest_can_propagate && src_can_propagate) {
425 sfn_log << SfnLog::opt << " Try replace in " << i->block_id() << ":"
426 << i->index() << *i << "\n";
427
428 if (i->as_alu() && i->as_alu()->parent_group()) {
429 progress |= i->as_alu()->parent_group()->replace_source(dest, src);
430 } else {
431 bool success = i->replace_source(dest, src);
432 if (success && move_addr_use) {
433 for (auto r : instr->required_instr()){
434 std::cerr << "add " << *r << " to " << *i << "\n";
435 i->add_required_instr(r);
436 }
437 }
438 progress |= success;
439 }
440 }
441 }
442 if (instr->dest()) {
443 sfn_log << SfnLog::opt << "has uses; " << instr->dest()->uses().size();
444 }
445 sfn_log << SfnLog::opt << " done\n";
446 }
447
448 void
visit(AluGroup * instr)449 CopyPropFwdVisitor::visit(AluGroup *instr)
450 {
451 (void)instr;
452 }
453
454 void
visit(TexInstr * instr)455 CopyPropFwdVisitor::visit(TexInstr *instr)
456 {
457 propagate_to(instr->src(), instr);
458 }
459
visit(GDSInstr * instr)460 void CopyPropFwdVisitor::visit(GDSInstr *instr)
461 {
462 propagate_to(instr->src(), instr);
463 }
464
465 void
visit(ExportInstr * instr)466 CopyPropFwdVisitor::visit(ExportInstr *instr)
467 {
468 propagate_to(instr->value(), instr);
469 }
470
register_sel_can_change(Pin pin)471 static bool register_sel_can_change(Pin pin)
472 {
473 return pin == pin_free || pin == pin_none;
474 }
475
register_chan_is_pinned(Pin pin)476 static bool register_chan_is_pinned(Pin pin)
477 {
478 return pin == pin_chan ||
479 pin == pin_fully ||
480 pin == pin_chgr;
481 }
482
483
484 void
propagate_to(RegisterVec4 & value,Instr * instr)485 CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr)
486 {
487 /* Collect parent instructions - only ALU move without modifiers
488 * and without indirect access are allowed. */
489 AluInstr *parents[4] = {nullptr};
490 bool have_candidates = false;
491 for (int i = 0; i < 4; ++i) {
492 if (value[i]->chan() < 4 && value[i]->has_flag(Register::ssa)) {
493 /* We have a pre-define value, so we can't propagate a copy */
494 if (value[i]->parents().empty())
495 return;
496
497 if (value[i]->uses().size() > 1)
498 return;
499
500 assert(value[i]->parents().size() == 1);
501 parents[i] = (*value[i]->parents().begin())->as_alu();
502
503 /* Parent op is not an ALU instruction, so we can't
504 copy-propagate */
505 if (!parents[i])
506 return;
507
508
509 if ((parents[i]->opcode() != op1_mov) ||
510 parents[i]->has_source_mod(0, AluInstr::mod_neg) ||
511 parents[i]->has_source_mod(0, AluInstr::mod_abs) ||
512 parents[i]->has_alu_flag(alu_dst_clamp) ||
513 parents[i]->has_alu_flag(alu_src0_rel))
514 return;
515
516 auto [addr, dummy0, index_reg_dummy] = parents[i]->indirect_addr();
517
518 /* Don't accept moves with indirect reads, because they are not
519 * supported with instructions that use vec4 values */
520 if (addr || index_reg_dummy)
521 return;
522
523 have_candidates = true;
524 }
525 }
526
527 if (!have_candidates)
528 return;
529
530 /* Collect the new source registers. We may have to move all registers
531 * to a new virtual sel index. */
532
533 PRegister new_src[4] = {0};
534 int new_chan[4] = {0,0,0,0};
535
536 uint8_t used_chan_mask = 0;
537 int new_sel = -1;
538 bool all_sel_can_change = true;
539
540 bool is_ssa = true;
541
542 for (int i = 0; i < 4; ++i) {
543
544 /* No parent means we either ignore the channel or insert 0 or 1.*/
545 if (!parents[i])
546 continue;
547
548 unsigned allowed_mask = 0xf & ~used_chan_mask;
549
550 auto src = parents[i]->src(0).as_register();
551 if (!src)
552 return;
553
554 /* Don't accept an array element for now, we would need extra checking
555 * that the value is not overwritten by an indirect access */
556 if (src->pin() == pin_array)
557 return;
558
559 /* Is this check still needed ? */
560 if (!src->has_flag(Register::ssa) &&
561 !assigned_register_direct(src)) {
562 return;
563 }
564
565 /* If the channel chan't switch we have to update the channel mask
566 * TODO: assign channel pinned registers first might give more
567 * opportunities for this optimization */
568 if (register_chan_is_pinned(src->pin()))
569 allowed_mask = 1 << src->chan();
570
571 /* Update the possible channel mask based on the sourcee's parent
572 * instruction(s) */
573 for (auto p : src->parents()) {
574 auto alu = p->as_alu();
575 if (alu)
576 allowed_mask &= alu->allowed_dest_chan_mask();
577 }
578
579 for (auto u : src->uses()) {
580 auto alu = u->as_alu();
581 if (alu)
582 allowed_mask &= alu->allowed_src_chan_mask();
583 }
584
585 if (!allowed_mask)
586 return;
587
588 /* Prefer keeping the channel, but if that's not possible
589 * i.e. if the sel has to change, then pick the next free channel
590 * (see below) */
591 new_chan[i] = src->chan();
592
593 if (new_sel < 0) {
594 new_sel = src->sel();
595 is_ssa = src->has_flag(Register::ssa);
596 } else if (new_sel != src->sel()) {
597 /* If we have to assign a new register sel index do so only
598 * if all already assigned source can get a new register index,
599 * and all registers are either SSA or registers.
600 * TODO: check whether this last restriction is required */
601 if (all_sel_can_change &&
602 register_sel_can_change(src->pin()) &&
603 (is_ssa == src->has_flag(Register::ssa))) {
604 new_sel = value_factory.new_register_index();
605 new_chan[i] = u_bit_scan(&allowed_mask);
606 } else /* Sources can't be combined to a vec4 so bail out */
607 return;
608 }
609
610 new_src[i] = src;
611 used_chan_mask |= 1 << new_chan[i];
612 if (!register_sel_can_change(src->pin()))
613 all_sel_can_change = false;
614 }
615
616 /* Apply the changes to the vec4 source */
617 value.del_use(instr);
618 for (int i = 0; i < 4; ++i) {
619 if (parents[i]) {
620 new_src[i]->set_sel(new_sel);
621 if (is_ssa)
622 new_src[i]->set_flag(Register::ssa);
623 new_src[i]->set_chan(new_chan[i]);
624
625 value.set_value(i, new_src[i]);
626
627 if (new_src[i]->pin() != pin_fully &&
628 new_src[i]->pin() != pin_chgr) {
629 if (new_src[i]->pin() == pin_chan)
630 new_src[i]->set_pin(pin_chgr);
631 else
632 new_src[i]->set_pin(pin_group);
633 }
634 progress |= true;
635 }
636 }
637 value.add_use(instr);
638 if (progress)
639 value.validate();
640 }
641
assigned_register_direct(PRegister reg)642 bool CopyPropFwdVisitor::assigned_register_direct(PRegister reg)
643 {
644 for (auto p: reg->parents()) {
645 if (p->as_alu()) {
646 auto [addr, dummy, index_reg] = p->as_alu()->indirect_addr();
647 if (addr)
648 return false;
649 }
650 }
651 return true;
652 }
653
654 void
visit(FetchInstr * instr)655 CopyPropFwdVisitor::visit(FetchInstr *instr)
656 {
657 (void)instr;
658 }
659
660 void
visit(Block * instr)661 CopyPropFwdVisitor::visit(Block *instr)
662 {
663 for (auto& i : *instr)
664 i->accept(*this);
665 }
666
CopyPropBackVisitor()667 CopyPropBackVisitor::CopyPropBackVisitor():
668 progress(false)
669 {
670 }
671
672 void
visit(AluInstr * instr)673 CopyPropBackVisitor::visit(AluInstr *instr)
674 {
675 bool local_progress = false;
676
677 sfn_log << SfnLog::opt << "CopyPropBackVisitor:[" << instr->block_id() << ":"
678 << instr->index() << "] " << *instr << "\n";
679
680 if (!instr->can_propagate_dest()) {
681 return;
682 }
683
684 auto src_reg = instr->psrc(0)->as_register();
685 if (!src_reg) {
686 return;
687 }
688
689 if (src_reg->uses().size() > 1)
690 return;
691
692 auto dest = instr->dest();
693 if (!dest || !instr->has_alu_flag(alu_write)) {
694 return;
695 }
696
697 if (!dest->has_flag(Register::ssa) && dest->parents().size() > 1)
698 return;
699
700 for (auto& i : src_reg->parents()) {
701 sfn_log << SfnLog::opt << "Try replace dest in " << i->block_id() << ":"
702 << i->index() << *i << "\n";
703
704 if (i->replace_dest(dest, instr)) {
705 dest->del_parent(instr);
706 dest->add_parent(i);
707 for (auto d : instr->dependend_instr()) {
708 d->add_required_instr(i);
709 }
710 local_progress = true;
711 }
712 }
713
714 if (local_progress)
715 instr->set_dead();
716
717 progress |= local_progress;
718 }
719
720 void
visit(AluGroup * instr)721 CopyPropBackVisitor::visit(AluGroup *instr)
722 {
723 for (auto& i : *instr) {
724 if (i)
725 i->accept(*this);
726 }
727 }
728
729 void
visit(TexInstr * instr)730 CopyPropBackVisitor::visit(TexInstr *instr)
731 {
732 (void)instr;
733 }
734
735 void
visit(FetchInstr * instr)736 CopyPropBackVisitor::visit(FetchInstr *instr)
737 {
738 (void)instr;
739 }
740
741 void
visit(Block * instr)742 CopyPropBackVisitor::visit(Block *instr)
743 {
744 for (auto i = instr->rbegin(); i != instr->rend(); ++i)
745 if (!(*i)->is_dead())
746 (*i)->accept(*this);
747 }
748
749 class SimplifySourceVecVisitor : public InstrVisitor {
750 public:
SimplifySourceVecVisitor()751 SimplifySourceVecVisitor():
752 progress(false)
753 {
754 }
755
visit(AluInstr * instr)756 void visit(AluInstr *instr) override { (void)instr; }
visit(AluGroup * instr)757 void visit(AluGroup *instr) override { (void)instr; }
758 void visit(TexInstr *instr) override;
759 void visit(ExportInstr *instr) override;
760 void visit(FetchInstr *instr) override;
761 void visit(Block *instr) override;
762 void visit(ControlFlowInstr *instr) override;
763 void visit(IfInstr *instr) override;
764 void visit(ScratchIOInstr *instr) override;
765 void visit(StreamOutInstr *instr) override;
766 void visit(MemRingOutInstr *instr) override;
visit(EmitVertexInstr * instr)767 void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)768 void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)769 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)770 void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)771 void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)772 void visit(RatInstr *instr) override { (void)instr; };
773
774 void replace_src(Instr *instr, RegisterVec4& reg4);
775
776 bool progress;
777 };
778
779 class HasVecDestVisitor : public ConstInstrVisitor {
780 public:
HasVecDestVisitor()781 HasVecDestVisitor():
782 has_group_dest(false)
783 {
784 }
785
visit(const AluInstr & instr)786 void visit(const AluInstr& instr) override { (void)instr; }
visit(const AluGroup & instr)787 void visit(const AluGroup& instr) override { (void)instr; }
visit(const TexInstr & instr)788 void visit(const TexInstr& instr) override { (void)instr; has_group_dest = true; };
visit(const ExportInstr & instr)789 void visit(const ExportInstr& instr) override { (void)instr; }
visit(const FetchInstr & instr)790 void visit(const FetchInstr& instr) override { (void)instr; has_group_dest = true; };
visit(const Block & instr)791 void visit(const Block& instr) override { (void)instr; };
visit(const ControlFlowInstr & instr)792 void visit(const ControlFlowInstr& instr) override{ (void)instr; }
visit(const IfInstr & instr)793 void visit(const IfInstr& instr) override{ (void)instr; }
visit(const ScratchIOInstr & instr)794 void visit(const ScratchIOInstr& instr) override { (void)instr; };
visit(const StreamOutInstr & instr)795 void visit(const StreamOutInstr& instr) override { (void)instr; }
visit(const MemRingOutInstr & instr)796 void visit(const MemRingOutInstr& instr) override { (void)instr; }
visit(const EmitVertexInstr & instr)797 void visit(const EmitVertexInstr& instr) override { (void)instr; }
visit(const GDSInstr & instr)798 void visit(const GDSInstr& instr) override { (void)instr; }
visit(const WriteTFInstr & instr)799 void visit(const WriteTFInstr& instr) override { (void)instr; };
visit(const LDSAtomicInstr & instr)800 void visit(const LDSAtomicInstr& instr) override { (void)instr; };
visit(const LDSReadInstr & instr)801 void visit(const LDSReadInstr& instr) override { (void)instr; };
visit(const RatInstr & instr)802 void visit(const RatInstr& instr) override { (void)instr; };
803
804 bool has_group_dest;
805 };
806
807 class HasVecSrcVisitor : public ConstInstrVisitor {
808 public:
HasVecSrcVisitor()809 HasVecSrcVisitor():
810 has_group_src(false)
811 {
812 }
813
visit(UNUSED const AluInstr & instr)814 void visit(UNUSED const AluInstr& instr) override { }
visit(UNUSED const AluGroup & instr)815 void visit(UNUSED const AluGroup& instr) override { }
visit(UNUSED const FetchInstr & instr)816 void visit(UNUSED const FetchInstr& instr) override { };
visit(UNUSED const Block & instr)817 void visit(UNUSED const Block& instr) override { };
visit(UNUSED const ControlFlowInstr & instr)818 void visit(UNUSED const ControlFlowInstr& instr) override{ }
visit(UNUSED const IfInstr & instr)819 void visit(UNUSED const IfInstr& instr) override{ }
visit(UNUSED const LDSAtomicInstr & instr)820 void visit(UNUSED const LDSAtomicInstr& instr) override { };
visit(UNUSED const LDSReadInstr & instr)821 void visit(UNUSED const LDSReadInstr& instr) override { };
822
visit(const TexInstr & instr)823 void visit(const TexInstr& instr) override { check(instr.src()); }
visit(const ExportInstr & instr)824 void visit(const ExportInstr& instr) override { check(instr.value()); }
visit(const GDSInstr & instr)825 void visit(const GDSInstr& instr) override { check(instr.src()); }
826
827 // No swizzling supported, so we want to keep the register group
visit(UNUSED const ScratchIOInstr & instr)828 void visit(UNUSED const ScratchIOInstr& instr) override { has_group_src = true; };
visit(UNUSED const StreamOutInstr & instr)829 void visit(UNUSED const StreamOutInstr& instr) override { has_group_src = true; }
visit(UNUSED const MemRingOutInstr & instr)830 void visit(UNUSED const MemRingOutInstr& instr) override { has_group_src = true; }
visit(UNUSED const RatInstr & instr)831 void visit(UNUSED const RatInstr& instr) override { has_group_src = true; };
832
visit(UNUSED const EmitVertexInstr & instr)833 void visit(UNUSED const EmitVertexInstr& instr) override { }
834
835 // We always emit at least two values
visit(UNUSED const WriteTFInstr & instr)836 void visit(UNUSED const WriteTFInstr& instr) override { has_group_src = true; };
837
838
839 void check(const RegisterVec4& value);
840
841 bool has_group_src;
842 };
843
check(const RegisterVec4 & value)844 void HasVecSrcVisitor::check(const RegisterVec4& value)
845 {
846 int nval = 0;
847 for (int i = 0; i < 4 && nval < 2; ++i) {
848 if (value[i]->chan() < 4)
849 ++nval;
850 }
851 has_group_src = nval > 1;
852 }
853
854 bool
simplify_source_vectors(Shader & sh)855 simplify_source_vectors(Shader& sh)
856 {
857 SimplifySourceVecVisitor visitor;
858
859 for (auto b : sh.func())
860 b->accept(visitor);
861
862 return visitor.progress;
863 }
864
865 void
visit(TexInstr * instr)866 SimplifySourceVecVisitor::visit(TexInstr *instr)
867 {
868
869 if (instr->opcode() != TexInstr::get_resinfo) {
870 auto& src = instr->src();
871 replace_src(instr, src);
872 int nvals = 0;
873 for (int i = 0; i < 4; ++i)
874 if (src[i]->chan() < 4)
875 ++nvals;
876 if (nvals == 1) {
877 for (int i = 0; i < 4; ++i)
878 if (src[i]->chan() < 4) {
879 HasVecDestVisitor check_dests;
880 for (auto p : src[i]->parents()) {
881 p->accept(check_dests);
882 if (check_dests.has_group_dest)
883 break;
884 }
885
886 HasVecSrcVisitor check_src;
887 for (auto p : src[i]->uses()) {
888 p->accept(check_src);
889 if (check_src.has_group_src)
890 break;
891 }
892
893 if (check_dests.has_group_dest || check_src.has_group_src)
894 break;
895
896 if (src[i]->pin() == pin_group)
897 src[i]->set_pin(pin_free);
898 else if (src[i]->pin() == pin_chgr)
899 src[i]->set_pin(pin_chan);
900 }
901 }
902 }
903 for (auto& prep : instr->prepare_instr()) {
904 prep->accept(*this);
905 }
906 }
907
908 void
visit(ScratchIOInstr * instr)909 SimplifySourceVecVisitor::visit(ScratchIOInstr *instr)
910 {
911 (void)instr;
912 }
913
914 class ReplaceConstSource : public AluInstrVisitor {
915 public:
ReplaceConstSource(Instr * old_use_,RegisterVec4 & vreg_,int i)916 ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i):
917 old_use(old_use_),
918 vreg(vreg_),
919 index(i),
920 success(false)
921 {
922 }
923
924 using AluInstrVisitor::visit;
925
926 void visit(AluInstr *alu) override;
927
928 Instr *old_use;
929 RegisterVec4& vreg;
930 int index;
931 bool success;
932 };
933
934 void
visit(ExportInstr * instr)935 SimplifySourceVecVisitor::visit(ExportInstr *instr)
936 {
937 replace_src(instr, instr->value());
938 }
939
940 void
replace_src(Instr * instr,RegisterVec4 & reg4)941 SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4)
942 {
943 for (int i = 0; i < 4; ++i) {
944 auto s = reg4[i];
945
946 if (s->chan() > 3)
947 continue;
948
949 if (!s->has_flag(Register::ssa))
950 continue;
951
952 /* Cayman trans ops have more then one parent for
953 * one dest */
954 if (s->parents().size() != 1)
955 continue;
956
957 auto& op = *s->parents().begin();
958
959 ReplaceConstSource visitor(instr, reg4, i);
960
961 op->accept(visitor);
962
963 progress |= visitor.success;
964 }
965 }
966
967 void
visit(StreamOutInstr * instr)968 SimplifySourceVecVisitor::visit(StreamOutInstr *instr)
969 {
970 (void)instr;
971 }
972
973 void
visit(MemRingOutInstr * instr)974 SimplifySourceVecVisitor::visit(MemRingOutInstr *instr)
975 {
976 (void)instr;
977 }
978
979 void
visit(AluInstr * alu)980 ReplaceConstSource::visit(AluInstr *alu)
981 {
982 if (alu->opcode() != op1_mov)
983 return;
984
985 if (alu->has_source_mod(0, AluInstr::mod_abs) ||
986 alu->has_source_mod(0, AluInstr::mod_neg))
987 return;
988
989 auto src = alu->psrc(0);
990 assert(src);
991
992 int override_chan = -1;
993
994 if (value_is_const_uint(*src, 0)) {
995 override_chan = 4;
996 } else if (value_is_const_float(*src, 1.0f)) {
997 override_chan = 5;
998 }
999
1000 if (override_chan >= 0) {
1001 vreg[index]->del_use(old_use);
1002 auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin());
1003 vreg.set_value(index, reg);
1004 success = true;
1005 }
1006 }
1007
1008 void
visit(FetchInstr * instr)1009 SimplifySourceVecVisitor::visit(FetchInstr *instr)
1010 {
1011 (void)instr;
1012 }
1013
1014 void
visit(Block * instr)1015 SimplifySourceVecVisitor::visit(Block *instr)
1016 {
1017 for (auto i = instr->rbegin(); i != instr->rend(); ++i)
1018 if (!(*i)->is_dead())
1019 (*i)->accept(*this);
1020 }
1021
1022 void
visit(ControlFlowInstr * instr)1023 SimplifySourceVecVisitor::visit(ControlFlowInstr *instr)
1024 {
1025 (void)instr;
1026 }
1027
1028 void
visit(IfInstr * instr)1029 SimplifySourceVecVisitor::visit(IfInstr *instr)
1030 {
1031 (void)instr;
1032 }
1033
1034 } // namespace r600
1035