xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2022 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "sfn_optimizer.h"
8 
9 #include "sfn_debug.h"
10 #include "sfn_instr_alugroup.h"
11 #include "sfn_instr_controlflow.h"
12 #include "sfn_instr_export.h"
13 #include "sfn_instr_fetch.h"
14 #include "sfn_instr_lds.h"
15 #include "sfn_instr_mem.h"
16 #include "sfn_instr_tex.h"
17 #include "sfn_peephole.h"
18 #include "sfn_valuefactory.h"
19 #include "sfn_virtualvalues.h"
20 
21 #include <sstream>
22 
23 namespace r600 {
24 
25 bool
optimize(Shader & shader)26 optimize(Shader& shader)
27 {
28    bool progress;
29 
30    sfn_log << SfnLog::opt << "Shader before optimization\n";
31    if (sfn_log.has_debug_flag(SfnLog::opt)) {
32       std::stringstream ss;
33       shader.print(ss);
34       sfn_log << ss.str() << "\n\n";
35    }
36 
37    do {
38       progress = false;
39       progress |= copy_propagation_fwd(shader);
40       progress |= dead_code_elimination(shader);
41       progress |= copy_propagation_backward(shader);
42       progress |= dead_code_elimination(shader);
43       progress |= simplify_source_vectors(shader);
44       progress |= peephole(shader);
45       progress |= dead_code_elimination(shader);
46    } while (progress);
47 
48    return progress;
49 }
50 
51 class DCEVisitor : public InstrVisitor {
52 public:
53    DCEVisitor();
54 
55    void visit(AluInstr *instr) override;
56    void visit(AluGroup *instr) override;
57    void visit(TexInstr *instr) override;
visit(ExportInstr * instr)58    void visit(ExportInstr *instr) override { (void)instr; };
59    void visit(FetchInstr *instr) override;
60    void visit(Block *instr) override;
61 
visit(ControlFlowInstr * instr)62    void visit(ControlFlowInstr *instr) override { (void)instr; };
visit(IfInstr * instr)63    void visit(IfInstr *instr) override { (void)instr; };
visit(ScratchIOInstr * instr)64    void visit(ScratchIOInstr *instr) override { (void)instr; };
visit(StreamOutInstr * instr)65    void visit(StreamOutInstr *instr) override { (void)instr; };
visit(MemRingOutInstr * instr)66    void visit(MemRingOutInstr *instr) override { (void)instr; };
visit(EmitVertexInstr * instr)67    void visit(EmitVertexInstr *instr) override { (void)instr; };
visit(GDSInstr * instr)68    void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)69    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)70    void visit(LDSAtomicInstr *instr) override { (void)instr; };
71    void visit(LDSReadInstr *instr) override;
visit(RatInstr * instr)72    void visit(RatInstr *instr) override { (void)instr; };
73 
74    bool progress;
75 };
76 
77 bool
dead_code_elimination(Shader & shader)78 dead_code_elimination(Shader& shader)
79 {
80    DCEVisitor dce;
81 
82    do {
83 
84       sfn_log << SfnLog::opt << "start dce run\n";
85 
86       dce.progress = false;
87       for (auto& b : shader.func())
88          b->accept(dce);
89 
90       sfn_log << SfnLog::opt << "finished dce run\n\n";
91 
92    } while (dce.progress);
93 
94    sfn_log << SfnLog::opt << "Shader after DCE\n";
95    if (sfn_log.has_debug_flag(SfnLog::opt)) {
96       std::stringstream ss;
97       shader.print(ss);
98       sfn_log << ss.str() << "\n\n";
99    }
100 
101    return dce.progress;
102 }
103 
DCEVisitor()104 DCEVisitor::DCEVisitor():
105     progress(false)
106 {
107 }
108 
109 void
visit(AluInstr * instr)110 DCEVisitor::visit(AluInstr *instr)
111 {
112    sfn_log << SfnLog::opt << "DCE: visit '" << *instr;
113 
114    if (instr->has_instr_flag(Instr::dead))
115       return;
116 
117    if (instr->dest() && (instr->dest()->has_uses())) {
118       sfn_log << SfnLog::opt << " dest used\n";
119       return;
120    }
121 
122    switch (instr->opcode()) {
123    case op2_kille:
124    case op2_killne:
125    case op2_kille_int:
126    case op2_killne_int:
127    case op2_killge:
128    case op2_killge_int:
129    case op2_killge_uint:
130    case op2_killgt:
131    case op2_killgt_int:
132    case op2_killgt_uint:
133    case op0_group_barrier:
134       sfn_log << SfnLog::opt << " never kill\n";
135       return;
136    default:;
137    }
138 
139    bool dead = instr->set_dead();
140    sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n";
141    progress |= dead;
142 }
143 
144 void
visit(LDSReadInstr * instr)145 DCEVisitor::visit(LDSReadInstr *instr)
146 {
147    sfn_log << SfnLog::opt << "visit " << *instr << "\n";
148    progress |= instr->remove_unused_components();
149 }
150 
151 void
visit(AluGroup * instr)152 DCEVisitor::visit(AluGroup *instr)
153 {
154    /* Groups are created because the instructions are used together
155     * so don't try to eliminate code there */
156    (void)instr;
157 }
158 
159 void
visit(TexInstr * instr)160 DCEVisitor::visit(TexInstr *instr)
161 {
162    auto& dest = instr->dst();
163 
164    bool has_uses = false;
165    RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
166    for (int i = 0; i < 4; ++i) {
167       if (!dest[i]->has_uses())
168          swz[i] = 7;
169       else
170          has_uses |= true;
171    }
172    instr->set_dest_swizzle(swz);
173 
174    if (has_uses)
175       return;
176 
177    progress |= instr->set_dead();
178 }
179 
180 void
visit(FetchInstr * instr)181 DCEVisitor::visit(FetchInstr *instr)
182 {
183    auto& dest = instr->dst();
184 
185    bool has_uses = false;
186    RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
187    for (int i = 0; i < 4; ++i) {
188       if (!dest[i]->has_uses())
189          swz[i] = 7;
190       else
191          has_uses |= true;
192    }
193    instr->set_dest_swizzle(swz);
194 
195    if (has_uses)
196       return;
197 
198    sfn_log << SfnLog::opt << "set dead: " << *instr << "\n";
199 
200    progress |= instr->set_dead();
201 }
202 
203 void
visit(Block * block)204 DCEVisitor::visit(Block *block)
205 {
206    auto i = block->begin();
207    auto e = block->end();
208    while (i != e) {
209       auto n = i++;
210       if (!(*n)->keep()) {
211          (*n)->accept(*this);
212          if ((*n)->is_dead()) {
213             block->erase(n);
214          }
215       }
216    }
217 }
218 
219 class CopyPropFwdVisitor : public InstrVisitor {
220 public:
221    CopyPropFwdVisitor(ValueFactory& vf);
222 
223    void visit(AluInstr *instr) override;
224    void visit(AluGroup *instr) override;
225    void visit(TexInstr *instr) override;
226    void visit(ExportInstr *instr) override;
227    void visit(FetchInstr *instr) override;
228    void visit(Block *instr) override;
visit(ControlFlowInstr * instr)229    void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)230    void visit(IfInstr *instr) override { (void)instr; }
visit(ScratchIOInstr * instr)231    void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)232    void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)233    void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)234    void visit(EmitVertexInstr *instr) override { (void)instr; }
235    void visit(GDSInstr *instr) override;
visit(WriteTFInstr * instr)236    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(RatInstr * instr)237    void visit(RatInstr *instr) override { (void)instr; };
238 
239    // TODO: these two should use copy propagation
visit(LDSAtomicInstr * instr)240    void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)241    void visit(LDSReadInstr *instr) override { (void)instr; };
242 
243    void propagate_to(RegisterVec4& src, Instr *instr);
244    bool assigned_register_direct(PRegister reg);
245 
246    ValueFactory& value_factory;
247    bool progress;
248 };
249 
250 class CopyPropBackVisitor : public InstrVisitor {
251 public:
252    CopyPropBackVisitor();
253 
254    void visit(AluInstr *instr) override;
255    void visit(AluGroup *instr) override;
256    void visit(TexInstr *instr) override;
visit(ExportInstr * instr)257    void visit(ExportInstr *instr) override { (void)instr; }
258    void visit(FetchInstr *instr) override;
259    void visit(Block *instr) override;
visit(ControlFlowInstr * instr)260    void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)261    void visit(IfInstr *instr) override { (void)instr; }
visit(ScratchIOInstr * instr)262    void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)263    void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)264    void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)265    void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)266    void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)267    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)268    void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)269    void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)270    void visit(RatInstr *instr) override { (void)instr; };
271 
272    bool progress;
273 };
274 
275 bool
copy_propagation_fwd(Shader & shader)276 copy_propagation_fwd(Shader& shader)
277 {
278    auto& root = shader.func();
279    CopyPropFwdVisitor copy_prop(shader.value_factory());
280 
281    do {
282       copy_prop.progress = false;
283       for (auto b : root)
284          b->accept(copy_prop);
285    } while (copy_prop.progress);
286 
287    sfn_log << SfnLog::opt << "Shader after Copy Prop forward\n";
288    if (sfn_log.has_debug_flag(SfnLog::opt)) {
289       std::stringstream ss;
290       shader.print(ss);
291       sfn_log << ss.str() << "\n\n";
292    }
293 
294    return copy_prop.progress;
295 }
296 
297 bool
copy_propagation_backward(Shader & shader)298 copy_propagation_backward(Shader& shader)
299 {
300    CopyPropBackVisitor copy_prop;
301 
302    do {
303       copy_prop.progress = false;
304       for (auto b : shader.func())
305          b->accept(copy_prop);
306    } while (copy_prop.progress);
307 
308    sfn_log << SfnLog::opt << "Shader after Copy Prop backwards\n";
309    if (sfn_log.has_debug_flag(SfnLog::opt)) {
310       std::stringstream ss;
311       shader.print(ss);
312       sfn_log << ss.str() << "\n\n";
313    }
314 
315    return copy_prop.progress;
316 }
317 
CopyPropFwdVisitor(ValueFactory & vf)318 CopyPropFwdVisitor::CopyPropFwdVisitor(ValueFactory& vf):
319    value_factory(vf),
320    progress(false)
321 {
322 }
323 
324 void
visit(AluInstr * instr)325 CopyPropFwdVisitor::visit(AluInstr *instr)
326 {
327    sfn_log << SfnLog::opt << "CopyPropFwdVisitor:[" << instr->block_id() << ":"
328            << instr->index() << "] " << *instr << " dset=" << instr->dest() << " ";
329 
330    if (instr->dest()) {
331       sfn_log << SfnLog::opt << "has uses; " << instr->dest()->uses().size();
332    }
333 
334    sfn_log << SfnLog::opt << "\n";
335 
336    if (!instr->can_propagate_src()) {
337       return;
338    }
339 
340    auto src = instr->psrc(0);
341    auto dest = instr->dest();
342 
343    /* Don't propagate an indirect load to more than one
344     * instruction, because we may have to split the address loads
345     * creating more instructions */
346    if (dest->uses().size() > 1) {
347       auto [addr, is_for_dest, index] = instr->indirect_addr();
348       if (addr && !is_for_dest)
349          return;
350    }
351 
352 
353    auto ii = dest->uses().begin();
354    auto ie = dest->uses().end();
355 
356    auto mov_block_id = instr->block_id();
357 
358    /** libc++ seems to invalidate the end iterator too if a std::set is
359     *  made empty by an erase operation,
360     *  https://gitlab.freedesktop.org/mesa/mesa/-/issues/7931
361     */
362    while(ii != ie && !dest->uses().empty()) {
363       auto i = *ii;
364       auto target_block_id = i->block_id();
365 
366       ++ii;
367       /* SSA can always be propagated, registers only in the same block
368        * and only if they are assigned in the same block */
369       bool dest_can_propagate = dest->has_flag(Register::ssa);
370 
371       if (!dest_can_propagate) {
372 
373          /* Register can propagate if the assignment was in the same
374           * block, and we don't have a second assignment coming later
375           * (e.g. helper invocation evaluation does
376           *
377           * 1: MOV R0.x, -1
378           * 2: FETCH R0.0 VPM
379           * 3: MOV SN.x, R0.x
380           *
381           * Here we can't prpagate the move in 1 to SN.x in 3 */
382          if ((mov_block_id == target_block_id && instr->index() < i->index())) {
383             dest_can_propagate = true;
384             if (dest->parents().size() > 1) {
385                for (auto p : dest->parents()) {
386                   if (p->block_id() == i->block_id() && p->index() > instr->index()) {
387                      dest_can_propagate = false;
388                      break;
389                   }
390                }
391             }
392          }
393       }
394       bool move_addr_use = false;
395       bool src_can_propagate = false;
396       if (auto rsrc = src->as_register()) {
397          if (rsrc->has_flag(Register::ssa)) {
398             src_can_propagate = true;
399          } else if (mov_block_id == target_block_id) {
400             if (auto a = rsrc->addr()) {
401                if (a->as_register() &&
402                    !a->as_register()->has_flag(Register::addr_or_idx) &&
403                    i->block_id() == mov_block_id &&
404                    i->index() == instr->index() + 1) {
405                   src_can_propagate = true;
406                   move_addr_use = true;
407                }
408             } else {
409                src_can_propagate = true;
410             }
411             for (auto p : rsrc->parents()) {
412                if (p->block_id() == mov_block_id &&
413                    p->index() > instr->index() &&
414                    p->index() < i->index()) {
415                   src_can_propagate = false;
416                   break;
417                }
418             }
419          }
420       } else {
421          src_can_propagate = true;
422       }
423 
424       if (dest_can_propagate && src_can_propagate) {
425          sfn_log << SfnLog::opt << "   Try replace in " << i->block_id() << ":"
426                  << i->index() << *i << "\n";
427 
428          if (i->as_alu() && i->as_alu()->parent_group()) {
429             progress |= i->as_alu()->parent_group()->replace_source(dest, src);
430          } else {
431             bool success = i->replace_source(dest, src);
432             if (success && move_addr_use) {
433                for (auto r : instr->required_instr()){
434                   std::cerr << "add " << *r << " to " << *i << "\n";
435                   i->add_required_instr(r);
436                }
437             }
438             progress |= success;
439          }
440       }
441    }
442    if (instr->dest()) {
443       sfn_log << SfnLog::opt << "has uses; " << instr->dest()->uses().size();
444    }
445    sfn_log << SfnLog::opt << "  done\n";
446 }
447 
448 void
visit(AluGroup * instr)449 CopyPropFwdVisitor::visit(AluGroup *instr)
450 {
451    (void)instr;
452 }
453 
454 void
visit(TexInstr * instr)455 CopyPropFwdVisitor::visit(TexInstr *instr)
456 {
457    propagate_to(instr->src(), instr);
458 }
459 
visit(GDSInstr * instr)460 void CopyPropFwdVisitor::visit(GDSInstr *instr)
461 {
462    propagate_to(instr->src(), instr);
463 }
464 
465 void
visit(ExportInstr * instr)466 CopyPropFwdVisitor::visit(ExportInstr *instr)
467 {
468    propagate_to(instr->value(), instr);
469 }
470 
register_sel_can_change(Pin pin)471 static bool register_sel_can_change(Pin pin)
472 {
473    return pin == pin_free || pin == pin_none;
474 }
475 
register_chan_is_pinned(Pin pin)476 static bool register_chan_is_pinned(Pin pin)
477 {
478    return pin == pin_chan ||
479          pin == pin_fully ||
480          pin == pin_chgr;
481 }
482 
483 
484 void
propagate_to(RegisterVec4 & value,Instr * instr)485 CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr)
486 {
487    /* Collect parent instructions - only ALU move without modifiers
488     * and without indirect access are allowed. */
489    AluInstr *parents[4] = {nullptr};
490    bool have_candidates = false;
491    for (int i = 0; i < 4; ++i) {
492       if (value[i]->chan() < 4 && value[i]->has_flag(Register::ssa)) {
493          /*  We have a pre-define value, so we can't propagate a copy */
494          if (value[i]->parents().empty())
495             return;
496 
497          if (value[i]->uses().size() > 1)
498             return;
499 
500          assert(value[i]->parents().size() == 1);
501          parents[i] = (*value[i]->parents().begin())->as_alu();
502 
503          /* Parent op is not an ALU instruction, so we can't
504             copy-propagate */
505          if (!parents[i])
506              return;
507 
508 
509          if ((parents[i]->opcode() != op1_mov) ||
510              parents[i]->has_source_mod(0, AluInstr::mod_neg) ||
511              parents[i]->has_source_mod(0, AluInstr::mod_abs) ||
512              parents[i]->has_alu_flag(alu_dst_clamp) ||
513              parents[i]->has_alu_flag(alu_src0_rel))
514             return;
515 
516          auto [addr, dummy0, index_reg_dummy] = parents[i]->indirect_addr();
517 
518          /* Don't accept moves with indirect reads, because they are not
519           * supported with instructions that use vec4 values */
520          if (addr || index_reg_dummy)
521              return;
522 
523          have_candidates = true;
524       }
525    }
526 
527    if (!have_candidates)
528       return;
529 
530    /* Collect the new source registers. We may have to move all registers
531     * to a new virtual sel index. */
532 
533    PRegister new_src[4] = {0};
534    int new_chan[4] = {0,0,0,0};
535 
536    uint8_t used_chan_mask = 0;
537    int new_sel = -1;
538    bool all_sel_can_change = true;
539 
540    bool is_ssa = true;
541 
542    for (int i = 0; i < 4; ++i) {
543 
544       /* No parent means we either ignore the channel or insert 0 or 1.*/
545       if (!parents[i])
546          continue;
547 
548       unsigned allowed_mask = 0xf & ~used_chan_mask;
549 
550       auto src = parents[i]->src(0).as_register();
551       if (!src)
552          return;
553 
554       /* Don't accept an array element for now, we would need extra checking
555        * that the value is not overwritten by an indirect access */
556       if (src->pin() == pin_array)
557          return;
558 
559       /* Is this check still needed ? */
560       if (!src->has_flag(Register::ssa) &&
561           !assigned_register_direct(src)) {
562          return;
563       }
564 
565       /* If the channel chan't switch we have to update the channel mask
566        * TODO: assign channel pinned registers first might give more
567        *  opportunities for this optimization */
568       if (register_chan_is_pinned(src->pin()))
569          allowed_mask = 1 << src->chan();
570 
571       /* Update the possible channel mask based on the sourcee's parent
572        * instruction(s) */
573       for (auto p : src->parents()) {
574          auto alu = p->as_alu();
575          if (alu)
576             allowed_mask &= alu->allowed_dest_chan_mask();
577       }
578 
579       for (auto u : src->uses()) {
580          auto alu = u->as_alu();
581          if (alu)
582             allowed_mask &= alu->allowed_src_chan_mask();
583       }
584 
585       if (!allowed_mask)
586          return;
587 
588       /* Prefer keeping the channel, but if that's not possible
589        * i.e. if the sel has to change, then  pick the next free channel
590        * (see below) */
591       new_chan[i] = src->chan();
592 
593       if (new_sel < 0) {
594          new_sel = src->sel();
595          is_ssa = src->has_flag(Register::ssa);
596       } else if (new_sel != src->sel()) {
597          /* If we have to assign a new register sel index do so only
598           * if all already assigned source can get a new register index,
599           * and all registers are either SSA or registers.
600           * TODO: check whether this last restriction is required */
601          if (all_sel_can_change &&
602              register_sel_can_change(src->pin()) &&
603              (is_ssa == src->has_flag(Register::ssa))) {
604             new_sel = value_factory.new_register_index();
605             new_chan[i] = u_bit_scan(&allowed_mask);
606          } else /* Sources can't be combined to a vec4 so bail out */
607             return;
608       }
609 
610       new_src[i] = src;
611       used_chan_mask |= 1 << new_chan[i];
612       if (!register_sel_can_change(src->pin()))
613          all_sel_can_change = false;
614    }
615 
616    /* Apply the changes to the vec4 source */
617    value.del_use(instr);
618    for (int i = 0; i < 4; ++i) {
619       if (parents[i]) {
620          new_src[i]->set_sel(new_sel);
621          if (is_ssa)
622             new_src[i]->set_flag(Register::ssa);
623          new_src[i]->set_chan(new_chan[i]);
624 
625          value.set_value(i, new_src[i]);
626 
627          if (new_src[i]->pin() != pin_fully &&
628              new_src[i]->pin() != pin_chgr) {
629             if (new_src[i]->pin() == pin_chan)
630                new_src[i]->set_pin(pin_chgr);
631             else
632                new_src[i]->set_pin(pin_group);
633          }
634          progress |= true;
635       }
636    }
637    value.add_use(instr);
638    if (progress)
639       value.validate();
640 }
641 
assigned_register_direct(PRegister reg)642 bool CopyPropFwdVisitor::assigned_register_direct(PRegister reg)
643 {
644    for (auto p: reg->parents()) {
645       if (p->as_alu())  {
646           auto [addr, dummy, index_reg] = p->as_alu()->indirect_addr();
647           if (addr)
648              return false;
649       }
650    }
651    return true;
652 }
653 
654 void
visit(FetchInstr * instr)655 CopyPropFwdVisitor::visit(FetchInstr *instr)
656 {
657    (void)instr;
658 }
659 
660 void
visit(Block * instr)661 CopyPropFwdVisitor::visit(Block *instr)
662 {
663    for (auto& i : *instr)
664       i->accept(*this);
665 }
666 
CopyPropBackVisitor()667 CopyPropBackVisitor::CopyPropBackVisitor():
668     progress(false)
669 {
670 }
671 
672 void
visit(AluInstr * instr)673 CopyPropBackVisitor::visit(AluInstr *instr)
674 {
675    bool local_progress = false;
676 
677    sfn_log << SfnLog::opt << "CopyPropBackVisitor:[" << instr->block_id() << ":"
678            << instr->index() << "] " << *instr << "\n";
679 
680    if (!instr->can_propagate_dest()) {
681       return;
682    }
683 
684    auto src_reg = instr->psrc(0)->as_register();
685    if (!src_reg) {
686       return;
687    }
688 
689    if (src_reg->uses().size() > 1)
690       return;
691 
692    auto dest = instr->dest();
693    if (!dest || !instr->has_alu_flag(alu_write)) {
694       return;
695    }
696 
697    if (!dest->has_flag(Register::ssa) && dest->parents().size() > 1)
698       return;
699 
700    for (auto& i : src_reg->parents()) {
701       sfn_log << SfnLog::opt << "Try replace dest in " << i->block_id() << ":"
702               << i->index() << *i << "\n";
703 
704       if (i->replace_dest(dest, instr)) {
705          dest->del_parent(instr);
706          dest->add_parent(i);
707          for (auto d : instr->dependend_instr()) {
708             d->add_required_instr(i);
709          }
710          local_progress = true;
711       }
712    }
713 
714    if (local_progress)
715       instr->set_dead();
716 
717    progress |= local_progress;
718 }
719 
720 void
visit(AluGroup * instr)721 CopyPropBackVisitor::visit(AluGroup *instr)
722 {
723    for (auto& i : *instr) {
724       if (i)
725          i->accept(*this);
726    }
727 }
728 
729 void
visit(TexInstr * instr)730 CopyPropBackVisitor::visit(TexInstr *instr)
731 {
732    (void)instr;
733 }
734 
735 void
visit(FetchInstr * instr)736 CopyPropBackVisitor::visit(FetchInstr *instr)
737 {
738    (void)instr;
739 }
740 
741 void
visit(Block * instr)742 CopyPropBackVisitor::visit(Block *instr)
743 {
744    for (auto i = instr->rbegin(); i != instr->rend(); ++i)
745       if (!(*i)->is_dead())
746          (*i)->accept(*this);
747 }
748 
749 class SimplifySourceVecVisitor : public InstrVisitor {
750 public:
SimplifySourceVecVisitor()751    SimplifySourceVecVisitor():
752        progress(false)
753    {
754    }
755 
visit(AluInstr * instr)756    void visit(AluInstr *instr) override { (void)instr; }
visit(AluGroup * instr)757    void visit(AluGroup *instr) override { (void)instr; }
758    void visit(TexInstr *instr) override;
759    void visit(ExportInstr *instr) override;
760    void visit(FetchInstr *instr) override;
761    void visit(Block *instr) override;
762    void visit(ControlFlowInstr *instr) override;
763    void visit(IfInstr *instr) override;
764    void visit(ScratchIOInstr *instr) override;
765    void visit(StreamOutInstr *instr) override;
766    void visit(MemRingOutInstr *instr) override;
visit(EmitVertexInstr * instr)767    void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)768    void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)769    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)770    void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)771    void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)772    void visit(RatInstr *instr) override { (void)instr; };
773 
774    void replace_src(Instr *instr, RegisterVec4& reg4);
775 
776    bool progress;
777 };
778 
779 class HasVecDestVisitor : public ConstInstrVisitor {
780 public:
HasVecDestVisitor()781    HasVecDestVisitor():
782        has_group_dest(false)
783    {
784    }
785 
visit(const AluInstr & instr)786    void visit(const AluInstr& instr) override { (void)instr; }
visit(const AluGroup & instr)787    void visit(const AluGroup& instr) override { (void)instr; }
visit(const TexInstr & instr)788    void visit(const TexInstr& instr) override  {  (void)instr; has_group_dest = true; };
visit(const ExportInstr & instr)789    void visit(const ExportInstr& instr) override { (void)instr; }
visit(const FetchInstr & instr)790    void visit(const FetchInstr& instr) override  {  (void)instr; has_group_dest = true; };
visit(const Block & instr)791    void visit(const Block& instr) override { (void)instr; };
visit(const ControlFlowInstr & instr)792    void visit(const ControlFlowInstr& instr) override{ (void)instr; }
visit(const IfInstr & instr)793    void visit(const IfInstr& instr) override{ (void)instr; }
visit(const ScratchIOInstr & instr)794    void visit(const ScratchIOInstr& instr) override  { (void)instr; };
visit(const StreamOutInstr & instr)795    void visit(const StreamOutInstr& instr) override { (void)instr; }
visit(const MemRingOutInstr & instr)796    void visit(const MemRingOutInstr& instr) override { (void)instr; }
visit(const EmitVertexInstr & instr)797    void visit(const EmitVertexInstr& instr) override { (void)instr; }
visit(const GDSInstr & instr)798    void visit(const GDSInstr& instr) override { (void)instr; }
visit(const WriteTFInstr & instr)799    void visit(const WriteTFInstr& instr) override { (void)instr; };
visit(const LDSAtomicInstr & instr)800    void visit(const LDSAtomicInstr& instr) override { (void)instr; };
visit(const LDSReadInstr & instr)801    void visit(const LDSReadInstr& instr) override { (void)instr; };
visit(const RatInstr & instr)802    void visit(const RatInstr& instr) override {  (void)instr; };
803 
804    bool has_group_dest;
805 };
806 
807 class HasVecSrcVisitor : public ConstInstrVisitor {
808 public:
HasVecSrcVisitor()809    HasVecSrcVisitor():
810        has_group_src(false)
811    {
812    }
813 
visit(UNUSED const AluInstr & instr)814    void visit(UNUSED const AluInstr& instr) override { }
visit(UNUSED const AluGroup & instr)815    void visit(UNUSED const AluGroup& instr) override { }
visit(UNUSED const FetchInstr & instr)816    void visit(UNUSED const FetchInstr& instr) override  { };
visit(UNUSED const Block & instr)817    void visit(UNUSED const Block& instr) override { };
visit(UNUSED const ControlFlowInstr & instr)818    void visit(UNUSED const ControlFlowInstr& instr) override{ }
visit(UNUSED const IfInstr & instr)819    void visit(UNUSED const IfInstr& instr) override{ }
visit(UNUSED const LDSAtomicInstr & instr)820    void visit(UNUSED const LDSAtomicInstr& instr) override { };
visit(UNUSED const LDSReadInstr & instr)821    void visit(UNUSED const LDSReadInstr& instr) override { };
822 
visit(const TexInstr & instr)823    void visit(const TexInstr& instr) override { check(instr.src()); }
visit(const ExportInstr & instr)824    void visit(const ExportInstr& instr) override { check(instr.value()); }
visit(const GDSInstr & instr)825    void visit(const GDSInstr& instr) override { check(instr.src()); }
826 
827    // No swizzling supported, so we want to keep the register group
visit(UNUSED const ScratchIOInstr & instr)828    void visit(UNUSED const ScratchIOInstr& instr) override  { has_group_src = true; };
visit(UNUSED const StreamOutInstr & instr)829    void visit(UNUSED const StreamOutInstr& instr) override { has_group_src = true; }
visit(UNUSED const MemRingOutInstr & instr)830    void visit(UNUSED const MemRingOutInstr& instr) override { has_group_src = true; }
visit(UNUSED const RatInstr & instr)831    void visit(UNUSED const RatInstr& instr) override { has_group_src = true; };
832 
visit(UNUSED const EmitVertexInstr & instr)833    void visit(UNUSED const EmitVertexInstr& instr) override { }
834 
835    // We always emit at least two values
visit(UNUSED const WriteTFInstr & instr)836    void visit(UNUSED const WriteTFInstr& instr) override { has_group_src = true; };
837 
838 
839    void check(const RegisterVec4& value);
840 
841    bool has_group_src;
842 };
843 
check(const RegisterVec4 & value)844 void HasVecSrcVisitor::check(const RegisterVec4& value)
845 {
846    int nval = 0;
847    for (int i = 0; i < 4 && nval < 2; ++i) {
848       if (value[i]->chan() < 4)
849          ++nval;
850    }
851    has_group_src = nval > 1;
852 }
853 
854 bool
simplify_source_vectors(Shader & sh)855 simplify_source_vectors(Shader& sh)
856 {
857    SimplifySourceVecVisitor visitor;
858 
859    for (auto b : sh.func())
860       b->accept(visitor);
861 
862    return visitor.progress;
863 }
864 
865 void
visit(TexInstr * instr)866 SimplifySourceVecVisitor::visit(TexInstr *instr)
867 {
868 
869    if (instr->opcode() != TexInstr::get_resinfo) {
870       auto& src = instr->src();
871       replace_src(instr, src);
872       int nvals = 0;
873       for (int i = 0; i < 4; ++i)
874          if (src[i]->chan() < 4)
875             ++nvals;
876       if (nvals == 1) {
877          for (int i = 0; i < 4; ++i)
878             if (src[i]->chan() < 4) {
879                HasVecDestVisitor check_dests;
880                for (auto p : src[i]->parents()) {
881                   p->accept(check_dests);
882                   if (check_dests.has_group_dest)
883                      break;
884                }
885 
886                HasVecSrcVisitor check_src;
887                for (auto p : src[i]->uses()) {
888                   p->accept(check_src);
889                   if (check_src.has_group_src)
890                      break;
891                }
892 
893                if (check_dests.has_group_dest || check_src.has_group_src)
894                   break;
895 
896                if (src[i]->pin() == pin_group)
897                   src[i]->set_pin(pin_free);
898                else if (src[i]->pin() == pin_chgr)
899                   src[i]->set_pin(pin_chan);
900             }
901       }
902    }
903    for (auto& prep : instr->prepare_instr()) {
904       prep->accept(*this);
905    }
906 }
907 
908 void
visit(ScratchIOInstr * instr)909 SimplifySourceVecVisitor::visit(ScratchIOInstr *instr)
910 {
911    (void)instr;
912 }
913 
914 class ReplaceConstSource : public AluInstrVisitor {
915 public:
ReplaceConstSource(Instr * old_use_,RegisterVec4 & vreg_,int i)916    ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i):
917        old_use(old_use_),
918        vreg(vreg_),
919        index(i),
920        success(false)
921    {
922    }
923 
924    using AluInstrVisitor::visit;
925 
926    void visit(AluInstr *alu) override;
927 
928    Instr *old_use;
929    RegisterVec4& vreg;
930    int index;
931    bool success;
932 };
933 
934 void
visit(ExportInstr * instr)935 SimplifySourceVecVisitor::visit(ExportInstr *instr)
936 {
937    replace_src(instr, instr->value());
938 }
939 
940 void
replace_src(Instr * instr,RegisterVec4 & reg4)941 SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4)
942 {
943    for (int i = 0; i < 4; ++i) {
944       auto s = reg4[i];
945 
946       if (s->chan() > 3)
947          continue;
948 
949       if (!s->has_flag(Register::ssa))
950          continue;
951 
952       /* Cayman trans ops have more then one parent for
953        * one dest */
954       if (s->parents().size() != 1)
955          continue;
956 
957       auto& op = *s->parents().begin();
958 
959       ReplaceConstSource visitor(instr, reg4, i);
960 
961       op->accept(visitor);
962 
963       progress |= visitor.success;
964    }
965 }
966 
967 void
visit(StreamOutInstr * instr)968 SimplifySourceVecVisitor::visit(StreamOutInstr *instr)
969 {
970    (void)instr;
971 }
972 
973 void
visit(MemRingOutInstr * instr)974 SimplifySourceVecVisitor::visit(MemRingOutInstr *instr)
975 {
976    (void)instr;
977 }
978 
979 void
visit(AluInstr * alu)980 ReplaceConstSource::visit(AluInstr *alu)
981 {
982    if (alu->opcode() != op1_mov)
983       return;
984 
985    if (alu->has_source_mod(0, AluInstr::mod_abs) ||
986        alu->has_source_mod(0, AluInstr::mod_neg))
987       return;
988 
989    auto src = alu->psrc(0);
990    assert(src);
991 
992    int override_chan = -1;
993 
994    if (value_is_const_uint(*src, 0)) {
995       override_chan = 4;
996    } else if (value_is_const_float(*src, 1.0f)) {
997       override_chan = 5;
998    }
999 
1000    if (override_chan >= 0) {
1001       vreg[index]->del_use(old_use);
1002       auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin());
1003       vreg.set_value(index, reg);
1004       success = true;
1005    }
1006 }
1007 
1008 void
visit(FetchInstr * instr)1009 SimplifySourceVecVisitor::visit(FetchInstr *instr)
1010 {
1011    (void)instr;
1012 }
1013 
1014 void
visit(Block * instr)1015 SimplifySourceVecVisitor::visit(Block *instr)
1016 {
1017    for (auto i = instr->rbegin(); i != instr->rend(); ++i)
1018       if (!(*i)->is_dead())
1019          (*i)->accept(*this);
1020 }
1021 
1022 void
visit(ControlFlowInstr * instr)1023 SimplifySourceVecVisitor::visit(ControlFlowInstr *instr)
1024 {
1025    (void)instr;
1026 }
1027 
1028 void
visit(IfInstr * instr)1029 SimplifySourceVecVisitor::visit(IfInstr *instr)
1030 {
1031    (void)instr;
1032 }
1033 
1034 } // namespace r600
1035