xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2022 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "sfn_instr_alu.h"
8 
9 #include "sfn_alu_defines.h"
10 #include "sfn_debug.h"
11 #include "sfn_instr_alugroup.h"
12 #include "sfn_instr_tex.h"
13 #include "sfn_shader.h"
14 #include "sfn_virtualvalues.h"
15 
16 #include <algorithm>
17 #include <sstream>
18 
19 namespace r600 {
20 
21 using std::istream;
22 using std::string;
23 using std::vector;
24 
AluInstr(EAluOp opcode,PRegister dest,SrcValues src,const std::set<AluModifiers> & flags,int slots)25 AluInstr::AluInstr(EAluOp opcode,
26                    PRegister dest,
27                    SrcValues src,
28                    const std::set<AluModifiers>& flags,
29                    int slots):
30     m_opcode(opcode),
31     m_dest(dest),
32     m_bank_swizzle(alu_vec_unknown),
33     m_cf_type(cf_alu),
34     m_alu_slots(slots)
35 {
36    m_src.swap(src);
37 
38    if (m_src.size() == 3)
39       m_alu_flags.set(alu_op3);
40 
41    for (auto f : flags)
42       m_alu_flags.set(f);
43 
44    ASSERT_OR_THROW(m_src.size() ==
45                       static_cast<size_t>(alu_ops.at(opcode).nsrc * m_alu_slots),
46                    "Unexpected number of source values");
47 
48    if (m_alu_flags.test(alu_write))
49       ASSERT_OR_THROW(dest, "Write flag is set, but no destination register is given");
50 
51    update_uses();
52 
53    if (dest && slots > 1) {
54       switch (m_opcode) {
55       case op2_dot_ieee: m_allowed_dest_mask = (1 << (5 - slots)) - 1;
56          break;
57       default:
58          if (has_alu_flag(alu_is_cayman_trans)) {
59             m_allowed_dest_mask = (1 << slots) - 1;
60          }
61       }
62    }
63    assert(!dest || (m_allowed_dest_mask & (1 << dest->chan())));
64 }
65 
AluInstr(EAluOp opcode)66 AluInstr::AluInstr(EAluOp opcode):
67     AluInstr(opcode, nullptr, SrcValues(alu_ops.at(opcode).nsrc), {}, 1)
68 {
69 }
70 
AluInstr(EAluOp opcode,int chan)71 AluInstr::AluInstr(EAluOp opcode, int chan):
72     AluInstr(opcode, nullptr, SrcValues(), {}, 1)
73 {
74    m_fallback_chan = chan;
75 }
76 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,const std::set<AluModifiers> & m_flags)77 AluInstr::AluInstr(EAluOp opcode,
78                    PRegister dest,
79                    PVirtualValue src0,
80                    const std::set<AluModifiers>& m_flags):
81     AluInstr(opcode, dest, SrcValues{src0}, m_flags, 1)
82 {
83 }
84 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,const std::set<AluModifiers> & m_flags)85 AluInstr::AluInstr(EAluOp opcode,
86                    PRegister dest,
87                    PVirtualValue src0,
88                    PVirtualValue src1,
89                    const std::set<AluModifiers>& m_flags):
90     AluInstr(opcode, dest, SrcValues{src0, src1}, m_flags, 1)
91 {
92 }
93 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,PVirtualValue src2,const std::set<AluModifiers> & m_flags)94 AluInstr::AluInstr(EAluOp opcode,
95                    PRegister dest,
96                    PVirtualValue src0,
97                    PVirtualValue src1,
98                    PVirtualValue src2,
99                    const std::set<AluModifiers>& m_flags):
100     AluInstr(opcode, dest, SrcValues{src0, src1, src2}, m_flags, 1)
101 {
102 }
103 
AluInstr(ESDOp op,PVirtualValue src0,PVirtualValue src1,PVirtualValue address)104 AluInstr::AluInstr(ESDOp op,
105                    PVirtualValue src0,
106                    PVirtualValue src1,
107                    PVirtualValue address):
108     m_lds_opcode(op)
109 {
110    set_alu_flag(alu_is_lds);
111 
112    m_src.push_back(address);
113    if (src0) {
114       m_src.push_back(src0);
115       if (src1)
116          m_src.push_back(src1);
117    }
118    update_uses();
119 }
120 
AluInstr(ESDOp op,const SrcValues & src,const std::set<AluModifiers> & flags)121 AluInstr::AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags):
122     m_lds_opcode(op),
123     m_src(src)
124 {
125    for (auto f : flags)
126       set_alu_flag(f);
127 
128    set_alu_flag(alu_is_lds);
129    update_uses();
130 }
131 
132 void
update_uses()133 AluInstr::update_uses()
134 {
135    for (auto& s : m_src) {
136       auto r = s->as_register();
137       if (r) {
138          r->add_use(this);
139          // move this to add_use
140          if (r->pin() == pin_array) {
141             auto array_elm = static_cast<LocalArrayValue *>(r);
142             auto addr = array_elm->addr();
143             if (addr && addr->as_register())
144                addr->as_register()->add_use(this);
145          }
146       }
147       auto u = s->as_uniform();
148       if (u && u->buf_addr() && u->buf_addr()->as_register())
149          u->buf_addr()->as_register()->add_use(this);
150    }
151 
152    if (m_dest &&
153        (has_alu_flag(alu_write) ||
154         m_opcode == op1_mova_int ||
155         m_opcode == op1_set_cf_idx0 ||
156         m_opcode == op1_set_cf_idx1)) {
157       m_dest->add_parent(this);
158 
159       if (m_dest->pin() == pin_array) {
160          // move this to add_parent
161          auto array_elm = static_cast<LocalArrayValue *>(m_dest);
162          auto addr = array_elm->addr();
163          if (addr && addr->as_register())
164             addr->as_register()->add_use(this);
165       }
166    }
167 }
168 
169 void
accept(ConstInstrVisitor & visitor) const170 AluInstr::accept(ConstInstrVisitor& visitor) const
171 {
172    visitor.visit(*this);
173 }
174 
175 void
accept(InstrVisitor & visitor)176 AluInstr::accept(InstrVisitor& visitor)
177 {
178    visitor.visit(this);
179 }
180 
181 const std::map<ECFAluOpCode, std::string> AluInstr::cf_map = {
182    {cf_alu_break,       "BREAK"      },
183    {cf_alu_continue,    "CONT"       },
184    {cf_alu_else_after,  "ELSE_AFTER" },
185    {cf_alu_extended,    "EXTENDED"   },
186    {cf_alu_pop_after,   "POP_AFTER"  },
187    {cf_alu_pop2_after,  "POP2_AFTER" },
188    {cf_alu_push_before, "PUSH_BEFORE"}
189 };
190 
191 const std::map<AluBankSwizzle, std::string> AluInstr::bank_swizzle_map = {
192    {alu_vec_012, "VEC_012"},
193    {alu_vec_021, "VEC_021"},
194    {alu_vec_102, "VEC_102"},
195    {alu_vec_120, "VEC_120"},
196    {alu_vec_201, "VEC_201"},
197    {alu_vec_210, "VEC_210"}
198 };
199 
200 const AluModifiers AluInstr::src_rel_flags[3] = {
201    alu_src0_rel, alu_src1_rel, alu_src2_rel};
202 
203 struct ValuePrintFlags {
ValuePrintFlagsr600::ValuePrintFlags204    ValuePrintFlags(int im, int f):
205        index_mode(im),
206        flags(f)
207    {
208    }
209    int index_mode = 0;
210    int flags = 0;
211    static const int is_rel = 1;
212    static const int has_abs = 2;
213    static const int has_neg = 4;
214    static const int literal_is_float = 8;
215    static const int index_ar = 16;
216    static const int index_loopidx = 32;
217 };
218 
219 void
do_print(std::ostream & os) const220 AluInstr::do_print(std::ostream& os) const
221 {
222    const char swzchar[] = "xyzw01?_";
223 
224    unsigned i = 0;
225 
226    os << "ALU ";
227 
228    if (has_alu_flag(alu_is_lds)) {
229       os << "LDS " << lds_ops.at(m_lds_opcode).name;
230       os << " __.x : ";
231    } else {
232 
233       os << alu_ops.at(m_opcode).name;
234       if (has_alu_flag(alu_dst_clamp))
235          os << " CLAMP";
236 
237       if (m_dest) {
238          if (has_alu_flag(alu_write) || m_dest->has_flag(Register::addr_or_idx)) {
239             os << " " << *m_dest;
240          } else {
241             os << " __"
242                << "." << swzchar[m_dest->chan()];
243             if (m_dest->pin() != pin_none)
244                os << "@" << m_dest->pin();
245          }
246          os << " : ";
247       } else {
248          os << " __." << swzchar[dest_chan()] << " : ";
249       }
250    }
251 
252    const int n_source_per_slot =
253       has_alu_flag(alu_is_lds) ? m_src.size() : alu_ops.at(m_opcode).nsrc;
254 
255 
256    for (int s = 0; s < m_alu_slots; ++s) {
257 
258       if (s > 0)
259          os << " +";
260 
261       for (int k = 0; k < n_source_per_slot; ++k) {
262          int pflags = 0;
263          if (i)
264             os << ' ';
265          if (has_source_mod(i, mod_neg))
266             pflags |= ValuePrintFlags::has_neg;
267          if (has_alu_flag(src_rel_flags[k]))
268             pflags |= ValuePrintFlags::is_rel;
269          if (n_source_per_slot <= 2)
270             if (has_source_mod(i, mod_abs))
271                pflags |= ValuePrintFlags::has_abs;
272 
273          if (pflags & ValuePrintFlags::has_neg)
274             os << '-';
275          if (pflags & ValuePrintFlags::has_abs)
276             os << '|';
277          os << *m_src[i];
278          if (pflags & ValuePrintFlags::has_abs)
279             os << '|';
280          ++i;
281       }
282    }
283 
284    os << " {";
285    if (has_alu_flag(alu_write))
286       os << 'W';
287    if (has_alu_flag(alu_last_instr))
288       os << 'L';
289    if (has_alu_flag(alu_update_exec))
290       os << 'E';
291    if (has_alu_flag(alu_update_pred))
292       os << 'P';
293    os << "}";
294 
295    auto bs_name = bank_swizzle_map.find(m_bank_swizzle);
296    if (bs_name != bank_swizzle_map.end())
297       os << ' ' << bs_name->second;
298 
299    auto cf_name = cf_map.find(m_cf_type);
300    if (cf_name != cf_map.end())
301       os << ' ' << cf_name->second;
302 }
303 
304 bool
can_propagate_src() const305 AluInstr::can_propagate_src() const
306 {
307    /* We can use the source in the next instruction */
308    if (!can_copy_propagate())
309       return false;
310 
311    auto src_reg = m_src[0]->as_register();
312    if (!src_reg)
313       return true;
314 
315    assert(m_dest);
316 
317    if (!m_dest->has_flag(Register::ssa)) {
318       return false;
319    }
320 
321    if (m_dest->pin() == pin_fully)
322       return m_dest->equal_to(*src_reg);
323 
324    if (m_dest->pin() == pin_chan)
325       return src_reg->pin() == pin_none ||
326              src_reg->pin() == pin_free ||
327              (src_reg->pin() == pin_chan && src_reg->chan() == m_dest->chan());
328 
329    return m_dest->pin() == pin_none || m_dest->pin() == pin_free;
330 }
331 
332 class ReplaceIndirectArrayAddr : public RegisterVisitor {
333 public:
visit(Register & value)334    void visit(Register& value) override { (void)value; }
visit(LocalArray & value)335    void visit(LocalArray& value) override
336    {
337       (void)value;
338       unreachable("An array can't be used as address");
339    }
340    void visit(LocalArrayValue& value) override;
341    void visit(UniformValue& value) override;
visit(LiteralConstant & value)342    void visit(LiteralConstant& value) override { (void)value; }
visit(InlineConstant & value)343    void visit(InlineConstant& value) override { (void)value; }
344 
345    PRegister new_addr;
346 };
347 
visit(LocalArrayValue & value)348 void ReplaceIndirectArrayAddr::visit(LocalArrayValue& value)
349 {
350    if (new_addr->sel() == 0 && value.addr()
351        && value.addr()->as_register())
352       value.set_addr(new_addr);
353 }
354 
visit(UniformValue & value)355 void ReplaceIndirectArrayAddr::visit(UniformValue& value)
356 {
357    if (value.buf_addr() && value.buf_addr()->as_register() &&
358        (new_addr->sel() == 1 || new_addr->sel() == 2)) {
359       value.set_buf_addr(new_addr);
360    }
361 }
362 
update_indirect_addr(UNUSED PRegister old_reg,PRegister reg)363 void AluInstr::update_indirect_addr(UNUSED PRegister old_reg, PRegister reg)
364 {
365    ReplaceIndirectArrayAddr visitor;
366 
367    visitor.new_addr = reg;
368    assert(reg->has_flag(Register::addr_or_idx));
369 
370    if (m_dest)
371       m_dest->accept(visitor);
372 
373    for (auto src : m_src)
374       src->accept(visitor);
375 
376    reg->add_use(this);
377 }
378 
379 bool
can_propagate_dest() const380 AluInstr::can_propagate_dest() const
381 {
382    if (!can_copy_propagate()) {
383       return false;
384    }
385 
386    auto src_reg = m_src[0]->as_register();
387    if (!src_reg) {
388       return false;
389    }
390 
391    assert(m_dest);
392 
393    if (src_reg->pin() == pin_fully) {
394       return false;
395    }
396 
397    if (!src_reg->has_flag(Register::ssa))
398       return false;
399 
400    if (!m_dest->has_flag(Register::ssa))
401       return false;
402 
403    if (src_reg->pin() == pin_chan)
404       return m_dest->pin() == pin_none || m_dest->pin() == pin_free ||
405              ((m_dest->pin() == pin_chan || m_dest->pin() == pin_group) &&
406               src_reg->chan() == m_dest->chan());
407 
408    return (src_reg->pin() == pin_none || src_reg->pin() == pin_free);
409 }
410 
411 bool
can_copy_propagate() const412 AluInstr::can_copy_propagate() const
413 {
414    if (m_opcode != op1_mov)
415       return false;
416 
417    if (has_source_mod(0, mod_abs) || has_source_mod(0, mod_neg) ||
418        has_alu_flag(alu_dst_clamp))
419       return false;
420 
421    return has_alu_flag(alu_write);
422 }
423 
424 bool
replace_source(PRegister old_src,PVirtualValue new_src)425 AluInstr::replace_source(PRegister old_src, PVirtualValue new_src)
426 {
427    if (!can_replace_source(old_src, new_src))
428       return false;
429 
430    return do_replace_source(old_src, new_src);
431 }
432 
do_replace_source(PRegister old_src,PVirtualValue new_src)433 bool AluInstr::do_replace_source(PRegister old_src, PVirtualValue new_src)
434 {
435    bool process = false;
436 
437    for (unsigned i = 0; i < m_src.size(); ++i) {
438       if (old_src->equal_to(*m_src[i])) {
439          m_src[i] = new_src;
440          process = true;
441       }
442    }
443    if (process) {
444       auto r = new_src->as_register();
445       if (r)
446          r->add_use(this);
447       old_src->del_use(this);
448    }
449 
450    return process;
451 }
452 
replace_src(int i,PVirtualValue new_src,uint32_t to_set,SourceMod to_clear)453 bool AluInstr::replace_src(int i, PVirtualValue new_src, uint32_t to_set,
454                            SourceMod to_clear)
455 {
456    auto old_src = m_src[i]->as_register();
457    assert(old_src);
458 
459    if (!can_replace_source(old_src, new_src))
460       return false;
461 
462    assert(old_src);
463    old_src->del_use(this);
464 
465    m_src[i] = new_src;
466 
467    auto r = new_src->as_register();
468    if (r)
469       r->add_use(this);
470 
471    m_source_modifiers |= to_set << (2 * i);
472    m_source_modifiers &= ~(to_clear  << (2 * i));
473 
474    return true;
475 }
476 
477 
can_replace_source(PRegister old_src,PVirtualValue new_src)478 bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src)
479 {
480    if (!check_readport_validation(old_src, new_src))
481       return false;
482 
483    /* If the old or new source is an array element, we assume that there
484     * might have been an (untracked) indirect access, so don't replace
485     * this source */
486    if (old_src->pin() == pin_array && new_src->pin() == pin_array)
487       return false;
488 
489    auto [addr, dummy, index] = indirect_addr();
490    auto addr_reg = addr ?  addr->as_register() : nullptr;
491    auto index_reg = index ? index->as_register() : nullptr;
492 
493    if (auto u = new_src->as_uniform()) {
494       if (u && u->buf_addr()) {
495 
496          /* Don't mix indirect buffer and indirect registers, because the
497           * scheduler can't handle it yet. */
498          if (addr_reg)
499             return false;
500 
501          /* Don't allow two different index registers, can't deal with that yet */
502          if (index_reg && !index_reg->equal_to(*u->buf_addr()))
503             return false;
504       }
505    }
506 
507    if (auto new_addr = new_src->get_addr()) {
508       auto new_addr_reg = new_addr->as_register();
509       bool new_addr_lowered = new_addr_reg &&
510                               new_addr_reg->has_flag(Register::addr_or_idx);
511 
512       if (addr_reg) {
513          if (!addr_reg->equal_to(*new_addr) || new_addr_lowered ||
514              addr_reg->has_flag(Register::addr_or_idx))
515             return false;
516       }
517       if (m_dest->has_flag(Register::addr_or_idx)) {
518          if (new_src->pin() == pin_array) {
519             auto s = static_cast<const LocalArrayValue *>(new_src)->addr();
520             if (!s->as_inline_const() || !s->as_literal())
521                return false;
522          }
523       }
524    }
525    return true;
526 }
527 
528 void
set_sources(SrcValues src)529 AluInstr::set_sources(SrcValues src)
530 {
531    for (auto& s : m_src) {
532       auto r = s->as_register();
533       if (r)
534          r->del_use(this);
535    }
536    m_src.swap(src);
537    for (auto& s : m_src) {
538       auto r = s->as_register();
539       if (r)
540          r->add_use(this);
541    }
542 }
543 
allowed_src_chan_mask() const544 uint8_t AluInstr::allowed_src_chan_mask() const
545 {
546    if (m_alu_slots < 2)
547       return 0xf;
548 
549    int chan_use_count[4] = {0};
550 
551    for (auto s : m_src) {
552        auto r = s->as_register();
553        if (r)
554            ++chan_use_count[r->chan()];
555    }
556    /* Each channel can only be loaded in one of three cycles,
557     * so if a channel is already used three times, we can't
558     * add another source withthis channel.
559     * Since we want to move away from one channel to another, it
560     * is not important to know which is the old channel that will
561     * be freed by the channel switch.*/
562    int mask = 0;
563 
564    /* Be conservative about channel use when using more than two
565     * slots. Currently a constellatioon of
566     *
567     *  ALU d.x = f(r0.x, r1.y)
568     *  ALU _.y = f(r2.y, r3.x)
569     *  ALU _.z = f(r4.x, r5.y)
570     *
571     * will fail to be split. To get constellations like this to be scheduled
572     * properly will need some work on the bank swizzle check.
573     */
574    int maxuse = m_alu_slots > 2 ? 2 : 3;
575    for (int i = 0; i < 4; ++i) {
576        if (chan_use_count[i] < maxuse)
577            mask |= 1 << i;
578    }
579    return mask;
580 }
581 
582 bool
replace_dest(PRegister new_dest,AluInstr * move_instr)583 AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
584 {
585    if (m_dest->equal_to(*new_dest))
586       return false;
587 
588    if (m_dest->uses().size() > 1)
589       return false;
590 
591    if (new_dest->pin() == pin_array)
592       return false;
593 
594    /* Currently we bail out when an array write should be moved, because
595     * declaring an array write is currently not well defined. The
596     * Whole "backwards" copy propagation should dprobably be replaced by some
597     * forward peep holew optimization */
598    /*
599    if (new_dest->pin() == pin_array) {
600       auto dav = static_cast<const LocalArrayValue *>(new_dest)->addr();
601       for (auto s: m_src) {
602          if (s->pin() == pin_array) {
603             auto sav = static_cast<const LocalArrayValue *>(s)->addr();
604             if (dav && sav && dav->as_register() &&  !dav->equal_to(*sav))
605                return false;
606          }
607       }
608    }
609    */
610 
611    if (m_dest->pin() == pin_chan && new_dest->chan() != m_dest->chan())
612       return false;
613 
614    if (m_dest->pin() == pin_chan) {
615       if (new_dest->pin() == pin_group)
616          new_dest->set_pin(pin_chgr);
617       else if (new_dest->pin() != pin_chgr)
618          new_dest->set_pin(pin_chan);
619    }
620 
621    m_dest = new_dest;
622    if (!move_instr->has_alu_flag(alu_last_instr))
623       reset_alu_flag(alu_last_instr);
624 
625    if (has_alu_flag(alu_is_cayman_trans)) {
626       /* Copy propagation puts an instruction into the w channel, but we
627        * don't have the slots for a w channel */
628       if (m_dest->chan() == 3 && m_alu_slots < 4) {
629          m_alu_slots = 4;
630          assert(m_src.size() == 3);
631          m_src.push_back(m_src[0]);
632       }
633    }
634 
635    return true;
636 }
637 
638 void
pin_sources_to_chan()639 AluInstr::pin_sources_to_chan()
640 {
641    for (auto s : m_src) {
642       auto r = s->as_register();
643       if (r) {
644          if (r->pin() == pin_free)
645             r->set_pin(pin_chan);
646          else if (r->pin() == pin_group)
647             r->set_pin(pin_chgr);
648       }
649    }
650 }
651 
652 bool
check_readport_validation(PRegister old_src,PVirtualValue new_src) const653 AluInstr::check_readport_validation(PRegister old_src, PVirtualValue new_src) const
654 {
655    if (m_src.size() < 3)
656       return true;
657 
658    bool success = true;
659    AluReadportReservation rpr_sum;
660 
661    unsigned nsrc = alu_ops.at(m_opcode).nsrc;
662    assert(nsrc * m_alu_slots == m_src.size());
663 
664    for (int s = 0; s < m_alu_slots && success; ++s) {
665       PVirtualValue src[3];
666       auto ireg = m_src.begin() + s * nsrc;
667 
668       for (unsigned i = 0; i < nsrc; ++i, ++ireg)
669          src[i] = old_src->equal_to(**ireg) ? new_src : *ireg;
670 
671       AluBankSwizzle bs = alu_vec_012;
672       while (bs != alu_vec_unknown) {
673          AluReadportReservation rpr = rpr_sum;
674          if (rpr.schedule_vec_src(src, nsrc, bs)) {
675             rpr_sum = rpr;
676             break;
677          }
678          ++bs;
679       }
680 
681       if (bs == alu_vec_unknown)
682          success = false;
683    }
684    return success;
685 }
686 
687 void
add_extra_dependency(PVirtualValue value)688 AluInstr::add_extra_dependency(PVirtualValue value)
689 {
690    auto reg = value->as_register();
691    if (reg)
692       m_extra_dependencies.insert(reg);
693 }
694 
695 bool
is_equal_to(const AluInstr & lhs) const696 AluInstr::is_equal_to(const AluInstr& lhs) const
697 {
698    if (lhs.m_opcode != m_opcode || lhs.m_bank_swizzle != m_bank_swizzle ||
699        lhs.m_cf_type != m_cf_type || lhs.m_alu_flags != m_alu_flags) {
700       return false;
701    }
702 
703    if (m_dest) {
704       if (!lhs.m_dest) {
705          return false;
706       } else {
707          if (has_alu_flag(alu_write)) {
708             if (!m_dest->equal_to(*lhs.m_dest))
709                return false;
710          } else {
711             if (m_dest->chan() != lhs.m_dest->chan())
712                return false;
713          }
714       }
715    } else {
716       if (lhs.m_dest)
717          return false;
718    }
719 
720    if (m_src.size() != lhs.m_src.size())
721       return false;
722 
723    for (unsigned i = 0; i < m_src.size(); ++i) {
724       if (!m_src[i]->equal_to(*lhs.m_src[i]))
725          return false;
726    }
727 
728    return true;
729 }
730 
731 class ResolveIndirectArrayAddr : public ConstRegisterVisitor {
732 public:
visit(const Register & value)733    void visit(const Register& value) { (void)value; }
visit(const LocalArray & value)734    void visit(const LocalArray& value)
735    {
736       (void)value;
737       unreachable("An array can't be used as address");
738    }
739    void visit(const LocalArrayValue& value);
740    void visit(const UniformValue& value);
visit(const LiteralConstant & value)741    void visit(const LiteralConstant& value) { (void)value; }
visit(const InlineConstant & value)742    void visit(const InlineConstant& value) { (void)value; }
743 
744    PRegister addr{nullptr};
745    PRegister index{nullptr};
746    bool addr_is_for_dest{false};
747 };
748 
749 void
visit(const LocalArrayValue & value)750 ResolveIndirectArrayAddr::visit(const LocalArrayValue& value)
751 {
752    auto a = value.addr();
753    if (a) {
754       addr = a->as_register();
755       assert(!addr_is_for_dest);
756    }
757 }
758 
759 void
visit(const UniformValue & value)760 ResolveIndirectArrayAddr::visit(const UniformValue& value)
761 {
762    auto a = value.buf_addr();
763    if (a) {
764       index = a->as_register();
765    }
766 }
767 
768 std::tuple<PRegister, bool, PRegister>
indirect_addr() const769 AluInstr::indirect_addr() const
770 {
771    ResolveIndirectArrayAddr visitor;
772 
773    if (m_dest) {
774       m_dest->accept(visitor);
775       if (visitor.addr)
776           visitor.addr_is_for_dest = true;
777    }
778 
779    for (auto s : m_src) {
780       s->accept(visitor);
781    }
782    return {visitor.addr, visitor.addr_is_for_dest, visitor.index};
783 }
784 
785 AluGroup *
split(ValueFactory & vf)786 AluInstr::split(ValueFactory& vf)
787 {
788    if (m_alu_slots == 1)
789       return nullptr;
790 
791    sfn_log << SfnLog::instr << "Split " << *this << "\n";
792 
793    auto group = new AluGroup();
794 
795    m_dest->del_parent(this);
796 
797    int start_slot = 0;
798    bool is_dot = m_opcode == op2_dot_ieee;
799    auto last_opcode = m_opcode;
800 
801    if (is_dot) {
802       start_slot = m_dest->chan();
803       last_opcode = op2_mul_ieee;
804    }
805 
806 
807    for (int k = 0; k < m_alu_slots; ++k) {
808       int s = k + start_slot;
809 
810       PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
811       if (dst->pin() != pin_chgr) {
812          auto pin = pin_chan;
813          if (dst->pin() == pin_group && m_dest->chan() == s)
814             pin = pin_chgr;
815          dst->set_pin(pin);
816       }
817 
818       SrcValues src;
819       int nsrc = alu_ops.at(m_opcode).nsrc;
820       for (int i = 0; i < nsrc; ++i) {
821          auto old_src = m_src[k * nsrc + i];
822          // Make it easy for the scheduler and pin the register to the
823          // channel, otherwise scheduler would have to check whether a
824          // channel switch is possible
825          auto r = old_src->as_register();
826          if (r) {
827             if (r->pin() == pin_free || r->pin() == pin_none)
828                r->set_pin(pin_chan);
829             else if (r->pin() == pin_group)
830                r->set_pin(pin_chgr);
831          }
832          src.push_back(old_src);
833       }
834 
835       auto opcode = k < m_alu_slots -1 ? m_opcode : last_opcode;
836 
837 
838       auto instr = new AluInstr(opcode, dst, src, {}, 1);
839       instr->set_blockid(block_id(), index());
840 
841       if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
842          if (has_source_mod(nsrc * k + 0, mod_neg))
843             instr->set_source_mod(0, mod_neg);
844          if (has_source_mod(nsrc * k + 1, mod_neg))
845             instr->set_source_mod(1, mod_neg);
846          if (has_source_mod(nsrc * k + 2, mod_neg))
847             instr->set_source_mod(2, mod_neg);
848          if (has_source_mod(nsrc * k + 0, mod_abs))
849             instr->set_source_mod(0, mod_abs);
850          if (has_source_mod(nsrc * k + 1, mod_abs))
851             instr->set_source_mod(1, mod_abs);
852       }
853       if (has_alu_flag(alu_dst_clamp))
854          instr->set_alu_flag(alu_dst_clamp);
855 
856       if (s == m_dest->chan())
857          instr->set_alu_flag(alu_write);
858 
859       m_dest->add_parent(instr);
860       sfn_log << SfnLog::instr << "   " << *instr << "\n";
861 
862       if (!group->add_instruction(instr)) {
863          std::cerr << "Unable to schedule '" << *instr << "' into\n" << *group << "\n";
864 
865          unreachable("Invalid group instruction");
866       }
867    }
868    group->set_blockid(block_id(), index());
869 
870    for (auto s : m_src) {
871       auto r = s->as_register();
872       if (r) {
873          r->del_use(this);
874       }
875    }
876    group->set_origin(this);
877 
878    return group;
879 }
880 
881 /* Alu instructions that have SSA dest registers increase the  regietsr
882  * pressure Alu instructions that read from SSA registers may decresase the
883  * register pressure hency evaluate a priorityx values based on register
884  * pressure change */
885 int
register_priority() const886 AluInstr::register_priority() const
887 {
888    int priority = 0;
889    if (!has_alu_flag(alu_no_schedule_bias)) {
890 
891       if (m_dest) {
892          if (m_dest->has_flag(Register::ssa) && has_alu_flag(alu_write)) {
893             if (m_dest->pin() != pin_group && m_dest->pin() != pin_chgr &&
894                 !m_dest->addr())
895                priority--;
896          } else {
897             // Arrays and registers are pre-allocated, hence scheduling
898             // assignments early is unlikely to increase register pressure
899             priority++;
900          }
901       }
902 
903       for (const auto s : m_src) {
904          auto r = s->as_register();
905          if (r) {
906             if (r->has_flag(Register::ssa)) {
907                int pending = 0;
908                for (auto b : r->uses()) {
909                   if (!b->is_scheduled())
910                      ++pending;
911                }
912                if (pending == 1)
913                   ++priority;
914             }
915             if (r->addr() && r->addr()->as_register())
916                priority += 2;
917          }
918          if (s->as_uniform())
919             ++priority;
920       }
921    }
922    return priority;
923 }
924 
925 bool
propagate_death()926 AluInstr::propagate_death()
927 {
928    if (!m_dest)
929       return true;
930 
931    if (m_dest->pin() == pin_group || m_dest->pin() == pin_chan) {
932       switch (m_opcode) {
933       case op2_interp_x:
934       case op2_interp_xy:
935       case op2_interp_z:
936       case op2_interp_zw:
937          reset_alu_flag(alu_write);
938          return false;
939       default:;
940       }
941    }
942 
943    if (m_dest->pin() == pin_array)
944       return false;
945 
946    /* We assume that nir does a good job in eliminating all ALU results that
947     * are not needed, and we don't let copy propagation doesn't make the
948     * instruction obsolete, so just keep all */
949    if (has_alu_flag(alu_is_cayman_trans))
950       return false;
951 
952    for (auto& src : m_src) {
953       auto reg = src->as_register();
954       if (reg)
955          reg->del_use(this);
956    }
957    return true;
958 }
959 
960 bool
has_lds_access() const961 AluInstr::has_lds_access() const
962 {
963    return has_alu_flag(alu_is_lds) || has_lds_queue_read();
964 }
965 
966 bool
has_lds_queue_read() const967 AluInstr::has_lds_queue_read() const
968 {
969    for (auto& s : m_src) {
970       auto ic = s->as_inline_const();
971       if (!ic)
972          continue;
973 
974       if (ic->sel() == ALU_SRC_LDS_OQ_A_POP || ic->sel() == ALU_SRC_LDS_OQ_B_POP)
975          return true;
976    }
977    return false;
978 }
979 
980 struct OpDescr {
981    union {
982       EAluOp alu_opcode;
983       ESDOp lds_opcode;
984    };
985    int nsrc;
986 };
987 
988 static std::map<std::string, OpDescr> s_alu_map_by_name;
989 static std::map<std::string, OpDescr> s_lds_map_by_name;
990 
991 Instr::Pointer
from_string(istream & is,ValueFactory & value_factory,AluGroup * group,bool is_cayman)992 AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group, bool is_cayman)
993 {
994    vector<string> tokens;
995 
996    while (is.good() && !is.eof()) {
997       string t;
998       is >> t;
999       if (t.length() > 0) {
1000          tokens.push_back(t);
1001       }
1002    }
1003 
1004    std::set<AluModifiers> flags;
1005    auto t = tokens.begin();
1006 
1007    bool is_lds = false;
1008 
1009    if (*t == "LDS") {
1010       is_lds = true;
1011       t++;
1012    }
1013 
1014    string opstr = *t++;
1015    string deststr = *t++;
1016 
1017    if (deststr == "CLAMP") {
1018       flags.insert(alu_dst_clamp);
1019       deststr = *t++;
1020    }
1021 
1022    assert(*t == ":");
1023    OpDescr op_descr = {{op_invalid}, -1};
1024 
1025    if (is_lds) {
1026       auto op = s_lds_map_by_name.find(opstr);
1027       if (op == s_lds_map_by_name.end()) {
1028          for (auto [opcode, opdescr] : lds_ops) {
1029             if (opstr == opdescr.name) {
1030                op_descr.lds_opcode = opcode;
1031                op_descr.nsrc = opdescr.nsrc;
1032                s_alu_map_by_name[opstr] = op_descr;
1033                break;
1034             }
1035          }
1036 
1037          if (op_descr.nsrc == -1) {
1038             std::cerr << "'" << opstr << "'";
1039             unreachable("Unknown opcode");
1040             return nullptr;
1041          }
1042       } else {
1043          op_descr = op->second;
1044       }
1045    } else {
1046       auto op = s_alu_map_by_name.find(opstr);
1047       if (op == s_alu_map_by_name.end()) {
1048          for (auto [opcode, opdescr] : alu_ops) {
1049             if (opstr == opdescr.name) {
1050                op_descr = {{opcode}, opdescr.nsrc};
1051                s_alu_map_by_name[opstr] = op_descr;
1052                break;
1053             }
1054          }
1055 
1056          if (op_descr.nsrc == -1) {
1057             std::cerr << "'" << opstr << "'";
1058             unreachable("Unknown opcode");
1059             return nullptr;
1060          }
1061       } else {
1062          op_descr = op->second;
1063       }
1064       if (is_cayman) {
1065          switch (op_descr.alu_opcode) {
1066          case op1_cos:
1067          case op1_exp_ieee:
1068          case op1_log_clamped:
1069          case op1_recip_ieee:
1070          case op1_recipsqrt_ieee1:
1071          case op1_sqrt_ieee:
1072          case op1_sin:
1073          case op2_mullo_int:
1074          case op2_mulhi_int:
1075          case op2_mulhi_uint:
1076             flags.insert(alu_is_cayman_trans);
1077          default:
1078          ;
1079          }
1080       }
1081    }
1082 
1083    int slots = 0;
1084 
1085    uint32_t src_mods = 0;
1086    SrcValues sources;
1087    do {
1088       ++t;
1089       for (int i = 0; i < op_descr.nsrc; ++i) {
1090          string srcstr = *t++;
1091 
1092          if (srcstr[0] == '-') {
1093             src_mods |= AluInstr::mod_neg << (2 * sources.size());
1094             srcstr = srcstr.substr(1);
1095          }
1096 
1097          if (srcstr[0] == '|') {
1098             assert(srcstr[srcstr.length() - 1] == '|');
1099             src_mods |= AluInstr::mod_abs << (2 * sources.size());
1100             srcstr = srcstr.substr(1, srcstr.length() - 2);
1101          }
1102 
1103          auto src = value_factory.src_from_string(srcstr);
1104          if (!src) {
1105             std::cerr << "Unable to create src[" << i << "] from " << srcstr << "\n";
1106             assert(src);
1107          }
1108          sources.push_back(src);
1109       }
1110       ++slots;
1111    } while (t != tokens.end() && *t == "+");
1112 
1113    AluBankSwizzle bank_swizzle = alu_vec_unknown;
1114    ECFAluOpCode cf = cf_alu;
1115 
1116    while (t != tokens.end()) {
1117 
1118       switch ((*t)[0]) {
1119       case '{': {
1120          auto iflag = t->begin() + 1;
1121          while (iflag != t->end()) {
1122             if (*iflag == '}')
1123                break;
1124 
1125             switch (*iflag) {
1126             case 'L':
1127                flags.insert(alu_last_instr);
1128                break;
1129             case 'W':
1130                flags.insert(alu_write);
1131                break;
1132             case 'E':
1133                flags.insert(alu_update_exec);
1134                break;
1135             case 'P':
1136                flags.insert(alu_update_pred);
1137                break;
1138             }
1139             ++iflag;
1140          }
1141       } break;
1142 
1143       case 'V': {
1144          string bs = *t;
1145          if (bs == "VEC_012")
1146             bank_swizzle = alu_vec_012;
1147          else if (bs == "VEC_021")
1148             bank_swizzle = alu_vec_021;
1149          else if (bs == "VEC_102")
1150             bank_swizzle = alu_vec_102;
1151          else if (bs == "VEC_120")
1152             bank_swizzle = alu_vec_120;
1153          else if (bs == "VEC_201")
1154             bank_swizzle = alu_vec_201;
1155          else if (bs == "VEC_210")
1156             bank_swizzle = alu_vec_210;
1157          else {
1158             std::cerr << "'" << bs << "': ";
1159             unreachable("Unknowe bankswizzle given");
1160          }
1161       } break;
1162 
1163       default: {
1164          string cf_str = *t;
1165          if (cf_str == "PUSH_BEFORE")
1166             cf = cf_alu_push_before;
1167          else if (cf_str == "POP_AFTER")
1168             cf = cf_alu_pop_after;
1169          else if (cf_str == "POP2_AFTER")
1170             cf = cf_alu_pop2_after;
1171          else if (cf_str == "EXTENDED")
1172             cf = cf_alu_extended;
1173          else if (cf_str == "BREAK")
1174             cf = cf_alu_break;
1175          else if (cf_str == "CONT")
1176             cf = cf_alu_continue;
1177          else if (cf_str == "ELSE_AFTER")
1178             cf = cf_alu_else_after;
1179          else {
1180             std::cerr << " '" << cf_str << "' ";
1181             unreachable("Unknown tocken in ALU instruction");
1182          }
1183       }
1184       }
1185       ++t;
1186    }
1187 
1188    PRegister dest = nullptr;
1189    // construct instruction
1190    if (deststr != "(null)")
1191       dest = value_factory.dest_from_string(deststr);
1192 
1193    AluInstr *retval = nullptr;
1194    if (is_lds)
1195       retval = new AluInstr(op_descr.lds_opcode, sources, flags);
1196    else
1197       retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots);
1198 
1199    retval->m_source_modifiers = src_mods;
1200    retval->set_bank_swizzle(bank_swizzle);
1201    retval->set_cf_type(cf);
1202    if (group) {
1203       group->add_instruction(retval);
1204       retval = nullptr;
1205    }
1206    return retval;
1207 }
1208 
1209 bool
do_ready() const1210 AluInstr::do_ready() const
1211 {
1212    /* Alu instructions are shuffled by the scheduler, so
1213     * we have to make sure that required ops are already
1214     * scheduled before marking this one ready */
1215    for (auto i : required_instr()) {
1216       if (i->is_dead())
1217          continue;
1218 
1219       bool is_older_instr = i->block_id() <= block_id() &&
1220                             i->index() < index();
1221       bool is_lds = i->as_alu() && i->as_alu()->has_lds_access();
1222       if (!i->is_scheduled() && (is_older_instr || is_lds))
1223          return false;
1224    }
1225 
1226    for (auto s : m_src) {
1227       auto r = s->as_register();
1228       if (r) {
1229          if (!r->ready(block_id(), index()))
1230             return false;
1231       }
1232       auto u = s->as_uniform();
1233       if (u && u->buf_addr() && u->buf_addr()->as_register()) {
1234          if (!u->buf_addr()->as_register()->ready(block_id(), index()))
1235             return false;
1236       }
1237    }
1238 
1239    if (m_dest && !m_dest->has_flag(Register::ssa)) {
1240       if (m_dest->pin() == pin_array) {
1241          auto av = static_cast<const LocalArrayValue *>(m_dest);
1242          auto addr = av->addr();
1243          /* For true indiect dest access we have to make sure that all
1244           * instructions that write the value before are schedukled */
1245          if (addr && (!addr->ready(block_id(), index()) ||
1246                       !m_dest->ready(block_id(), index() - 1)))
1247             return false;
1248       }
1249 
1250       /* If a register is updates, we have to make sure that uses before that
1251        * update are scheduled, otherwise we may use the updated value when we
1252        * shouldn't */
1253       for (auto u : m_dest->uses()) {
1254          /* TODO: This is working around some sloppy use updates, dead instrzuctions
1255           * should remove themselves from uses. */
1256          if (u->is_dead())
1257             continue;
1258          if (!u->is_scheduled() &&
1259              u->block_id() <= block_id() &&
1260              u->index() < index()) {
1261             return false;
1262          }
1263       }
1264    }
1265 
1266    for (auto& r : m_extra_dependencies) {
1267       if (!r->ready(block_id(), index()))
1268          return false;
1269    }
1270 
1271    return true;
1272 }
1273 
1274 void
visit(AluGroup * instr)1275 AluInstrVisitor::visit(AluGroup *instr)
1276 {
1277    for (auto& i : *instr) {
1278       if (i)
1279          i->accept(*this);
1280    }
1281 }
1282 
1283 void
visit(Block * instr)1284 AluInstrVisitor::visit(Block *instr)
1285 {
1286    for (auto& i : *instr)
1287       i->accept(*this);
1288 }
1289 
1290 void
visit(IfInstr * instr)1291 AluInstrVisitor::visit(IfInstr *instr)
1292 {
1293    instr->predicate()->accept(*this);
1294 }
1295 
is_kill() const1296 bool AluInstr::is_kill() const
1297 {
1298    if (has_alu_flag(alu_is_lds))
1299       return false;
1300 
1301    switch (m_opcode) {
1302    case op2_kille:
1303    case op2_kille_int:
1304    case op2_killne:
1305    case op2_killne_int:
1306    case op2_killge:
1307    case op2_killge_int:
1308    case op2_killge_uint:
1309    case op2_killgt:
1310    case op2_killgt_int:
1311    case op2_killgt_uint:
1312       return true;
1313    default:
1314       return false;
1315    }
1316 }
1317 
1318 enum AluMods {
1319    mod_none,
1320    mod_src0_abs,
1321    mod_src0_neg,
1322    mod_dest_clamp,
1323 };
1324 
1325 static bool
1326 emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
1327 
1328 
1329 
1330 static bool
1331 emit_alu_op1(const nir_alu_instr& alu,
1332              EAluOp opcode,
1333              Shader& shader,
1334              AluMods mod = mod_none);
1335 static bool
1336 emit_alu_op1_64bit(const nir_alu_instr& alu,
1337                    EAluOp opcode,
1338                    Shader& shader,
1339                    bool switch_chan);
1340 static bool
1341 emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader);
1342 static bool
1343 emit_alu_neg(const nir_alu_instr& alu, Shader& shader);
1344 static bool
1345 emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1346 static bool
1347 emit_alu_op2_64bit(const nir_alu_instr& alu,
1348                    EAluOp opcode,
1349                    Shader& shader,
1350                    bool switch_order);
1351 static bool
1352 emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
1353                            EAluOp opcode,
1354                            Shader& shader,
1355                            bool switch_order);
1356 static bool
1357 emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1358 static bool
1359 emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader);
1360 static bool
1361 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
1362 static bool
1363 emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
1364 static bool
1365 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
1366 static bool
1367 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
1368 static bool
1369 emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader);
1370 
1371 static bool
1372 emit_alu_op2(const nir_alu_instr& alu,
1373              EAluOp opcode,
1374              Shader& shader,
1375              AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1376 static bool
1377 emit_alu_op2_int(const nir_alu_instr& alu,
1378                  EAluOp opcode,
1379                  Shader& shader,
1380                  AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1381 static bool
1382 emit_alu_op3(const nir_alu_instr& alu,
1383              EAluOp opcode,
1384              Shader& shader,
1385              const std::array<int, 3>& src_shuffle = {0, 1, 2});
1386 static bool
1387 emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1388 static bool
1389 emit_any_all_fcomp(
1390    const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1391 static bool
1392 emit_any_all_icomp(
1393    const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1394 
1395 static bool
1396 emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1397 static bool
1398 emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader);
1399 static bool
1400 emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1401 static bool
1402 emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1403 static bool
1404 emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader);
1405 static bool
1406 emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader);
1407 static bool
1408 emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader);
1409 
1410 static bool
1411 emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader);
1412 static bool
1413 emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader);
1414 
1415 static bool
1416 emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
1417 static bool
1418 emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader);
1419 static bool
1420 emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
1421 
1422 static bool
1423 emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1424 static bool
1425 emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1426 
1427 static bool
1428 emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1429 static bool
1430 emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1431 
1432 static bool
1433 emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1434 
1435 static bool
1436 emit_alu_cube(const nir_alu_instr& alu, Shader& shader);
1437 
1438 static bool
1439 emit_fdph(const nir_alu_instr& alu, Shader& shader);
1440 
1441 static bool
check_64_bit_op_src(nir_src * src,void * state)1442 check_64_bit_op_src(nir_src *src, void *state)
1443 {
1444    if (nir_src_bit_size(*src) == 64) {
1445       *(bool *)state = true;
1446       return false;
1447    }
1448    return true;
1449 }
1450 
1451 static bool
check_64_bit_op_def(nir_def * def,void * state)1452 check_64_bit_op_def(nir_def *def, void *state)
1453 {
1454    if (def->bit_size == 64) {
1455       *(bool *)state = true;
1456       return false;
1457    }
1458    return true;
1459 }
1460 
1461 bool
from_nir(nir_alu_instr * alu,Shader & shader)1462 AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
1463 {
1464    bool is_64bit_op = false;
1465    nir_foreach_src(&alu->instr, check_64_bit_op_src, &is_64bit_op);
1466    if (!is_64bit_op)
1467       nir_foreach_def(&alu->instr, check_64_bit_op_def, &is_64bit_op);
1468 
1469    if (is_64bit_op) {
1470       switch (alu->op) {
1471       case nir_op_pack_64_2x32:
1472       case nir_op_unpack_64_2x32:
1473       case nir_op_pack_64_2x32_split:
1474       case nir_op_pack_half_2x16_split:
1475       case nir_op_unpack_64_2x32_split_x:
1476       case nir_op_unpack_64_2x32_split_y:
1477          break;
1478       case nir_op_mov:
1479          return emit_alu_mov_64bit(*alu, shader);
1480       case nir_op_fneg:
1481          return emit_alu_neg(*alu, shader);
1482       case nir_op_fsat:
1483          return emit_alu_fsat64(*alu, shader);
1484       case nir_op_ffract:
1485          return emit_alu_op1_64bit(*alu, op1_fract_64, shader, true);
1486       case nir_op_feq32:
1487          return emit_alu_op2_64bit_one_dst(*alu, op2_sete_64, shader, false);
1488       case nir_op_fge32:
1489          return emit_alu_op2_64bit_one_dst(*alu, op2_setge_64, shader, false);
1490       case nir_op_flt32:
1491          return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
1492       case nir_op_fneu32:
1493          return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
1494       case nir_op_ffma:
1495          return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
1496 
1497       case nir_op_fadd:
1498          return emit_alu_op2_64bit(*alu, op2_add_64, shader, false);
1499       case nir_op_fmul:
1500          return emit_alu_op2_64bit(*alu, op2_mul_64, shader, false);
1501       case nir_op_fmax:
1502          return emit_alu_op2_64bit(*alu, op2_max_64, shader, false);
1503       case nir_op_fmin:
1504          return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
1505       case nir_op_b2f64:
1506          return emit_alu_b2f64(*alu, shader);
1507       case nir_op_f2f64:
1508          return emit_alu_f2f64(*alu, shader);
1509       case nir_op_i2f64:
1510          return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
1511       case nir_op_u2f64:
1512          return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
1513       case nir_op_f2f32:
1514          return emit_alu_f2f32(*alu, shader);
1515       case nir_op_fabs:
1516          return emit_alu_abs64(*alu, shader);
1517       case nir_op_fsqrt:
1518          return emit_alu_op1_64bit_trans(*alu, op1_sqrt_64, shader);
1519       case nir_op_frcp:
1520          return emit_alu_op1_64bit_trans(*alu, op1_recip_64, shader);
1521       case nir_op_frsq:
1522          return emit_alu_op1_64bit_trans(*alu, op1_recipsqrt_64, shader);
1523       case nir_op_vec2:
1524          return emit_alu_vec2_64(*alu, shader);
1525       default:
1526          return false;
1527          ;
1528       }
1529    }
1530 
1531    if (shader.chip_class() == ISA_CC_CAYMAN) {
1532       switch (alu->op) {
1533       case nir_op_fcos_amd:
1534          return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
1535       case nir_op_fexp2:
1536          return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
1537       case nir_op_flog2:
1538          return emit_alu_trans_op1_cayman(*alu, op1_log_clamped, shader);
1539       case nir_op_frcp:
1540          return emit_alu_trans_op1_cayman(*alu, op1_recip_ieee, shader);
1541       case nir_op_frsq:
1542          return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader);
1543       case nir_op_fsqrt:
1544          return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
1545       case nir_op_fsin_amd:
1546          return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
1547       case nir_op_i2f32:
1548          return emit_alu_op1(*alu, op1_int_to_flt, shader);
1549       case nir_op_u2f32:
1550          return emit_alu_op1(*alu, op1_uint_to_flt, shader);
1551       case nir_op_imul:
1552          return emit_alu_trans_op2_cayman(*alu, op2_mullo_int, shader);
1553       case nir_op_imul_high:
1554          return emit_alu_trans_op2_cayman(*alu, op2_mulhi_int, shader);
1555       case nir_op_umul_high:
1556          return emit_alu_trans_op2_cayman(*alu, op2_mulhi_uint, shader);
1557       case nir_op_f2u32:
1558          return emit_alu_op1(*alu, op1_flt_to_uint, shader);
1559       case nir_op_f2i32:
1560          return emit_alu_op1(*alu, op1_flt_to_int, shader);
1561       case nir_op_ishl:
1562          return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1563       case nir_op_ishr:
1564          return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1565       case nir_op_ushr:
1566          return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1567       default:;
1568       }
1569    } else {
1570       if (shader.chip_class() == ISA_CC_EVERGREEN) {
1571          switch (alu->op) {
1572          case nir_op_f2i32:
1573             return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_int, shader);
1574          case nir_op_f2u32:
1575             return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_uint, shader);
1576          default:;
1577          }
1578       }
1579 
1580       if (shader.chip_class() >= ISA_CC_R700) {
1581          switch (alu->op) {
1582          case nir_op_ishl:
1583             return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1584          case nir_op_ishr:
1585             return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1586          case nir_op_ushr:
1587             return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1588          default:;
1589          }
1590       } else {
1591          switch (alu->op) {
1592          case nir_op_ishl:
1593             return emit_alu_trans_op2_eg(*alu, op2_lshl_int, shader);
1594          case nir_op_ishr:
1595             return emit_alu_trans_op2_eg(*alu, op2_ashr_int, shader);
1596          case nir_op_ushr:
1597             return emit_alu_trans_op2_eg(*alu, op2_lshr_int, shader);
1598          default:;
1599          }
1600       }
1601 
1602       switch (alu->op) {
1603       case nir_op_f2i32:
1604          return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
1605       case nir_op_f2u32:
1606          return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
1607       case nir_op_fcos_amd:
1608          return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
1609       case nir_op_fexp2:
1610          return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
1611       case nir_op_flog2:
1612          return emit_alu_trans_op1_eg(*alu, op1_log_clamped, shader);
1613       case nir_op_frcp:
1614          return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
1615       case nir_op_frsq:
1616          return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
1617       case nir_op_fsin_amd:
1618          return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
1619       case nir_op_fsqrt:
1620          return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
1621       case nir_op_i2f32:
1622          return emit_alu_trans_op1_eg(*alu, op1_int_to_flt, shader);
1623       case nir_op_u2f32:
1624          return emit_alu_trans_op1_eg(*alu, op1_uint_to_flt, shader);
1625       case nir_op_imul:
1626          return emit_alu_trans_op2_eg(*alu, op2_mullo_int, shader);
1627       case nir_op_imul_high:
1628          return emit_alu_trans_op2_eg(*alu, op2_mulhi_int, shader);
1629       case nir_op_umul_high:
1630          return emit_alu_trans_op2_eg(*alu, op2_mulhi_uint, shader);
1631       default:;
1632       }
1633    }
1634 
1635    switch (alu->op) {
1636    case nir_op_b2b1:
1637       return emit_alu_op1(*alu, op1_mov, shader);
1638    case nir_op_b2b32:
1639       return emit_alu_op1(*alu, op1_mov, shader);
1640    case nir_op_b2f32:
1641       return emit_alu_b2x(*alu, ALU_SRC_1, shader);
1642    case nir_op_b2i32:
1643       return emit_alu_b2x(*alu, ALU_SRC_1_INT, shader);
1644 
1645    case nir_op_bfm:
1646       return emit_alu_op2_int(*alu, op2_bfm_int, shader, op2_opt_none);
1647    case nir_op_bit_count:
1648       return emit_alu_op1(*alu, op1_bcnt_int, shader);
1649 
1650    case nir_op_bitfield_reverse:
1651       return emit_alu_op1(*alu, op1_bfrev_int, shader);
1652    case nir_op_bitfield_select:
1653       return emit_alu_op3(*alu, op3_bfi_int, shader);
1654 
1655    case nir_op_b32all_fequal2:
1656       return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader);
1657    case nir_op_b32all_fequal3:
1658       return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader);
1659    case nir_op_b32all_fequal4:
1660       return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader);
1661    case nir_op_b32all_iequal2:
1662       return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader);
1663    case nir_op_b32all_iequal3:
1664       return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader);
1665    case nir_op_b32all_iequal4:
1666       return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader);
1667    case nir_op_b32any_fnequal2:
1668       return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader);
1669    case nir_op_b32any_fnequal3:
1670       return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader);
1671    case nir_op_b32any_fnequal4:
1672       return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader);
1673    case nir_op_b32any_inequal2:
1674       return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader);
1675    case nir_op_b32any_inequal3:
1676       return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader);
1677    case nir_op_b32any_inequal4:
1678       return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader);
1679    case nir_op_b32csel:
1680       return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
1681 
1682    case nir_op_fabs:
1683       return emit_alu_op1(*alu, op1_mov, shader, mod_src0_abs);
1684    case nir_op_fadd:
1685       return emit_alu_op2(*alu, op2_add, shader);
1686    case nir_op_fceil:
1687       return emit_alu_op1(*alu, op1_ceil, shader);
1688    case nir_op_fcsel:
1689       return emit_alu_op3(*alu, op3_cnde, shader, {0, 2, 1});
1690    case nir_op_fcsel_ge:
1691       return emit_alu_op3(*alu, op3_cndge, shader, {0, 1, 2});
1692    case nir_op_fcsel_gt:
1693       return emit_alu_op3(*alu, op3_cndgt, shader, {0, 1, 2});
1694 
1695    case nir_op_fdph:
1696       return emit_fdph(*alu, shader);
1697    case nir_op_fdot2:
1698       if (shader.chip_class() >= ISA_CC_EVERGREEN)
1699          return emit_dot(*alu, 2, shader);
1700       else
1701          return emit_dot4(*alu, 2, shader);
1702    case nir_op_fdot3:
1703       if (shader.chip_class() >= ISA_CC_EVERGREEN)
1704          return emit_dot(*alu, 3, shader);
1705       else
1706          return emit_dot4(*alu, 3, shader);
1707    case nir_op_fdot4:
1708       return emit_dot4(*alu, 4, shader);
1709 
1710    case nir_op_feq32:
1711    case nir_op_feq:
1712       return emit_alu_op2(*alu, op2_sete_dx10, shader);
1713    case nir_op_ffloor:
1714       return emit_alu_op1(*alu, op1_floor, shader);
1715    case nir_op_ffract:
1716       return emit_alu_op1(*alu, op1_fract, shader);
1717    case nir_op_fge32:
1718       return emit_alu_op2(*alu, op2_setge_dx10, shader);
1719    case nir_op_fge:
1720       return emit_alu_op2(*alu, op2_setge_dx10, shader);
1721    case nir_op_find_lsb:
1722       return emit_alu_op1(*alu, op1_ffbl_int, shader);
1723 
1724    case nir_op_flt32:
1725       return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1726    case nir_op_flt:
1727       return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1728    case nir_op_fmax:
1729       return emit_alu_op2(*alu, op2_max_dx10, shader);
1730    case nir_op_fmin:
1731       return emit_alu_op2(*alu, op2_min_dx10, shader);
1732 
1733    case nir_op_fmul:
1734       if (!shader.has_flag(Shader::sh_legacy_math_rules))
1735          return emit_alu_op2(*alu, op2_mul_ieee, shader);
1736       FALLTHROUGH;
1737    case nir_op_fmulz:
1738       return emit_alu_op2(*alu, op2_mul, shader);
1739 
1740    case nir_op_fneg:
1741       return emit_alu_op1(*alu, op1_mov, shader, mod_src0_neg);
1742    case nir_op_fneu32:
1743       return emit_alu_op2(*alu, op2_setne_dx10, shader);
1744    case nir_op_fneu:
1745       return emit_alu_op2(*alu, op2_setne_dx10, shader);
1746 
1747    case nir_op_fround_even:
1748       return emit_alu_op1(*alu, op1_rndne, shader);
1749    case nir_op_fsat:
1750       return emit_alu_op1(*alu, op1_mov, shader, mod_dest_clamp);
1751    case nir_op_fsub:
1752       return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
1753    case nir_op_ftrunc:
1754       return emit_alu_op1(*alu, op1_trunc, shader);
1755    case nir_op_iadd:
1756       return emit_alu_op2_int(*alu, op2_add_int, shader);
1757    case nir_op_iand:
1758       return emit_alu_op2_int(*alu, op2_and_int, shader);
1759    case nir_op_ibfe:
1760       return emit_alu_op3(*alu, op3_bfe_int, shader);
1761    case nir_op_i32csel_ge:
1762       return emit_alu_op3(*alu, op3_cndge_int, shader, {0, 1, 2});
1763    case nir_op_i32csel_gt:
1764       return emit_alu_op3(*alu, op3_cndgt_int, shader, {0, 1, 2});
1765    case nir_op_ieq32:
1766       return emit_alu_op2_int(*alu, op2_sete_int, shader);
1767    case nir_op_ieq:
1768       return emit_alu_op2_int(*alu, op2_sete_int, shader);
1769    case nir_op_ifind_msb_rev:
1770       return emit_alu_op1(*alu, op1_ffbh_int, shader);
1771    case nir_op_ige32:
1772       return emit_alu_op2_int(*alu, op2_setge_int, shader);
1773    case nir_op_ige:
1774       return emit_alu_op2_int(*alu, op2_setge_int, shader);
1775    case nir_op_ilt32:
1776       return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1777    case nir_op_ilt:
1778       return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1779    case nir_op_imax:
1780       return emit_alu_op2_int(*alu, op2_max_int, shader);
1781    case nir_op_imin:
1782       return emit_alu_op2_int(*alu, op2_min_int, shader);
1783    case nir_op_ine32:
1784       return emit_alu_op2_int(*alu, op2_setne_int, shader);
1785    case nir_op_ine:
1786       return emit_alu_op2_int(*alu, op2_setne_int, shader);
1787    case nir_op_ineg:
1788       return emit_alu_comb_with_zero(*alu, op2_sub_int, shader);
1789    case nir_op_inot:
1790       return emit_alu_op1(*alu, op1_not_int, shader);
1791    case nir_op_ior:
1792       return emit_alu_op2_int(*alu, op2_or_int, shader);
1793    case nir_op_isub:
1794       return emit_alu_op2_int(*alu, op2_sub_int, shader);
1795    case nir_op_ixor:
1796       return emit_alu_op2_int(*alu, op2_xor_int, shader);
1797    case nir_op_pack_64_2x32:
1798       return emit_pack_64_2x32(*alu, shader);
1799    case nir_op_unpack_64_2x32:
1800       return emit_unpack_64_2x32(*alu, shader);
1801    case nir_op_pack_64_2x32_split:
1802       return emit_pack_64_2x32_split(*alu, shader);
1803    case nir_op_pack_half_2x16_split:
1804       return emit_pack_32_2x16_split(*alu, shader);
1805    case nir_op_slt:
1806       return emit_alu_op2(*alu, op2_setgt, shader, op2_opt_reverse);
1807    case nir_op_sge:
1808       return emit_alu_op2(*alu, op2_setge, shader);
1809    case nir_op_seq:
1810       return emit_alu_op2(*alu, op2_sete, shader);
1811    case nir_op_sne:
1812       return emit_alu_op2(*alu, op2_setne, shader);
1813    case nir_op_ubfe:
1814       return emit_alu_op3(*alu, op3_bfe_uint, shader);
1815    case nir_op_ufind_msb_rev:
1816       return emit_alu_op1(*alu, op1_ffbh_uint, shader);
1817    case nir_op_uge32:
1818       return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1819    case nir_op_uge:
1820       return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1821    case nir_op_ult32:
1822       return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1823    case nir_op_ult:
1824       return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1825    case nir_op_umad24:
1826       return emit_alu_op3(*alu, op3_muladd_uint24, shader, {0, 1, 2});
1827    case nir_op_umax:
1828       return emit_alu_op2_int(*alu, op2_max_uint, shader);
1829    case nir_op_umin:
1830       return emit_alu_op2_int(*alu, op2_min_uint, shader);
1831    case nir_op_umul24:
1832       return emit_alu_op2(*alu, op2_mul_uint24, shader);
1833    case nir_op_unpack_64_2x32_split_x:
1834       return emit_unpack_64_2x32_split(*alu, 0, shader);
1835    case nir_op_unpack_64_2x32_split_y:
1836       return emit_unpack_64_2x32_split(*alu, 1, shader);
1837    case nir_op_unpack_half_2x16_split_x:
1838       return emit_unpack_32_2x16_split_x(*alu, shader);
1839    case nir_op_unpack_half_2x16_split_y:
1840       return emit_unpack_32_2x16_split_y(*alu, shader);
1841 
1842    case nir_op_ffma:
1843       if (!shader.has_flag(Shader::sh_legacy_math_rules))
1844          return emit_alu_op3(*alu, op3_muladd_ieee, shader);
1845       FALLTHROUGH;
1846    case nir_op_ffmaz:
1847       return emit_alu_op3(*alu, op3_muladd, shader);
1848 
1849    case nir_op_mov:
1850       return emit_alu_op1(*alu, op1_mov, shader);
1851    case nir_op_f2i32:
1852       return emit_alu_op1(*alu, op1_flt_to_int, shader);
1853    case nir_op_vec2:
1854       return emit_create_vec(*alu, 2, shader);
1855    case nir_op_vec3:
1856       return emit_create_vec(*alu, 3, shader);
1857    case nir_op_vec4:
1858       return emit_create_vec(*alu, 4, shader);
1859 
1860   case nir_op_cube_amd:
1861       return emit_alu_cube(*alu, shader);
1862    default:
1863       fprintf(stderr, "Unknown instruction '");
1864       nir_print_instr(&alu->instr, stderr);
1865       fprintf(stderr, "'\n");
1866       assert(0);
1867       return false;
1868    }
1869 }
1870 
1871 static Pin
pin_for_components(const nir_alu_instr & alu)1872 pin_for_components(const nir_alu_instr& alu)
1873 {
1874    return alu.def.num_components == 1 ? pin_free : pin_none;
1875 }
1876 
1877 static bool
emit_alu_op1_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_chan)1878 emit_alu_op1_64bit(const nir_alu_instr& alu,
1879                    EAluOp opcode,
1880                    Shader& shader,
1881                    bool switch_chan)
1882 {
1883    auto& value_factory = shader.value_factory();
1884 
1885    auto group = new AluGroup();
1886 
1887    AluInstr *ir = nullptr;
1888 
1889    int swz[2] = {0, 1};
1890    if (switch_chan) {
1891       swz[0] = 1;
1892       swz[1] = 0;
1893    }
1894 
1895    for (unsigned i = 0; i < alu.def.num_components; ++i) {
1896       ir = new AluInstr(opcode,
1897                         value_factory.dest(alu.def, 2 * i, pin_chan),
1898                         value_factory.src64(alu.src[0], i, swz[0]),
1899                         {alu_write});
1900       group->add_instruction(ir);
1901 
1902       ir = new AluInstr(opcode,
1903                         value_factory.dest(alu.def, 2 * i + 1, pin_chan),
1904                         value_factory.src64(alu.src[0], i, swz[1]),
1905                         {alu_write});
1906       group->add_instruction(ir);
1907    }
1908    if (ir)
1909       ir->set_alu_flag(alu_last_instr);
1910    shader.emit_instruction(group);
1911    return true;
1912 }
1913 
1914 static bool
emit_alu_mov_64bit(const nir_alu_instr & alu,Shader & shader)1915 emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader)
1916 {
1917    auto& value_factory = shader.value_factory();
1918 
1919    AluInstr *ir = nullptr;
1920 
1921    for (unsigned i = 0; i < alu.def.num_components; ++i) {
1922       for (unsigned c = 0; c < 2; ++c) {
1923          ir = new AluInstr(op1_mov,
1924                            value_factory.dest(alu.def, 2 * i + c, pin_free),
1925                            value_factory.src64(alu.src[0], i, c),
1926                            {alu_write});
1927          shader.emit_instruction(ir);
1928       }
1929    }
1930    if (ir)
1931       ir->set_alu_flag(alu_last_instr);
1932    return true;
1933 }
1934 
1935 static bool
emit_alu_neg(const nir_alu_instr & alu,Shader & shader)1936 emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
1937 {
1938    auto& value_factory = shader.value_factory();
1939 
1940    AluInstr *ir = nullptr;
1941 
1942    for (unsigned i = 0; i < alu.def.num_components; ++i) {
1943       for (unsigned c = 0; c < 2; ++c) {
1944          ir = new AluInstr(op1_mov,
1945                            value_factory.dest(alu.def, 2 * i + c, pin_chan),
1946                            value_factory.src64(alu.src[0], i, c),
1947                            {alu_write});
1948          shader.emit_instruction(ir);
1949       }
1950       ir->set_source_mod(0, AluInstr::mod_neg);
1951    }
1952    if (ir)
1953       ir->set_alu_flag(alu_last_instr);
1954 
1955    return true;
1956 }
1957 
1958 static bool
emit_alu_abs64(const nir_alu_instr & alu,Shader & shader)1959 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
1960 {
1961    auto& value_factory = shader.value_factory();
1962 
1963    assert(alu.def.num_components == 1);
1964 
1965    shader.emit_instruction(new AluInstr(op1_mov,
1966                                         value_factory.dest(alu.def, 0, pin_chan),
1967                                         value_factory.src64(alu.src[0], 0, 0),
1968                                         AluInstr::write));
1969 
1970    auto ir = new AluInstr(op1_mov,
1971                           value_factory.dest(alu.def, 1, pin_chan),
1972                           value_factory.src64(alu.src[0], 0, 1),
1973                           AluInstr::last_write);
1974    ir->set_source_mod(0, AluInstr::mod_abs);
1975    shader.emit_instruction(ir);
1976    return true;
1977 }
1978 
1979 static bool
try_propagat_fsat64(const nir_alu_instr & alu,Shader & shader)1980 try_propagat_fsat64(const nir_alu_instr& alu, Shader& shader)
1981 {
1982    auto& value_factory = shader.value_factory();
1983    auto src0 = value_factory.src64(alu.src[0], 0, 0);
1984    auto reg0 = src0->as_register();
1985    if (!reg0)
1986       return false;
1987 
1988    if (!reg0->has_flag(Register::ssa))
1989       return false;
1990 
1991    if (reg0->parents().size() != 1)
1992       return false;
1993 
1994    if (!reg0->uses().empty())
1995       return false;
1996 
1997    auto parent = (*reg0->parents().begin())->as_alu();
1998    if (!parent)
1999       return false;
2000 
2001    auto opinfo = alu_ops.at(parent->opcode());
2002    if (!opinfo.can_clamp)
2003       return false;
2004 
2005    parent->set_alu_flag(alu_dst_clamp);
2006    return true;
2007 }
2008 
2009 
2010 static bool
emit_alu_fsat64(const nir_alu_instr & alu,Shader & shader)2011 emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader)
2012 {
2013    auto& value_factory = shader.value_factory();
2014 
2015    assert(alu.def.num_components == 1);
2016 
2017    if (try_propagat_fsat64(alu, shader)) {
2018       auto ir = new AluInstr(op1_mov,
2019                              value_factory.dest(alu.def, 0, pin_chan),
2020                              value_factory.src64(alu.src[0], 0, 0),
2021                              AluInstr::write);
2022       shader.emit_instruction(ir);
2023 
2024       shader.emit_instruction(new AluInstr(op1_mov,
2025                                            value_factory.dest(alu.def, 1, pin_chan),
2026                                            value_factory.src64(alu.src[0], 0, 1),
2027                               AluInstr::last_write));
2028    } else {
2029 
2030       /* dest clamp doesn't work on plain 64 bit move, so add a zero
2031        * to apply the modifier */
2032 
2033       auto group = new AluGroup();
2034       auto ir = new AluInstr(op2_add_64,
2035                              value_factory.dest(alu.def, 0, pin_chan),
2036                              value_factory.src64(alu.src[0], 0, 1),
2037                              value_factory.literal(0),
2038                              AluInstr::write);
2039       ir->set_alu_flag(alu_dst_clamp);
2040       group->add_instruction(ir);
2041 
2042       group->add_instruction(new AluInstr(op2_add_64,
2043                                           value_factory.dest(alu.def, 1, pin_chan),
2044                                           value_factory.src64(alu.src[0], 0, 0),
2045                                           value_factory.literal(0),
2046                                           AluInstr::last_write));
2047       shader.emit_instruction(group);
2048 
2049    }
2050    return true;
2051 }
2052 
2053 
2054 static bool
emit_alu_op2_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_src)2055 emit_alu_op2_64bit(const nir_alu_instr& alu,
2056                    EAluOp opcode,
2057                    Shader& shader,
2058                    bool switch_src)
2059 {
2060    auto& value_factory = shader.value_factory();
2061    auto group = new AluGroup();
2062    AluInstr *ir = nullptr;
2063    int order[2] = {0, 1};
2064    if (switch_src) {
2065       order[0] = 1;
2066       order[1] = 0;
2067    }
2068 
2069    int num_emit0 = opcode == op2_mul_64 ? 3 : 1;
2070 
2071    std::array<std::array<PRegister, 4>,2> tmp;
2072    for (unsigned k = 0; k < alu.def.num_components; ++k) {
2073       tmp[k][0] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 1), 0);
2074       tmp[k][1] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 1), 1);
2075       tmp[k][2] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 0), 2);
2076       tmp[k][3] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 0), 3);
2077    }
2078 
2079    assert(num_emit0 == 1 || alu.def.num_components == 1);
2080 
2081    for (unsigned k = 0; k < alu.def.num_components; ++k) {
2082       int i = 0;
2083       for (; i < num_emit0; ++i) {
2084          auto dest = i < 2 ? value_factory.dest(alu.def, i, pin_chan)
2085                            : value_factory.dummy_dest(i);
2086 
2087          ir = new AluInstr(opcode,
2088                            dest,
2089                            tmp[k][0],
2090                            tmp[k][1],
2091                            i < 2 ? AluInstr::write : AluInstr::empty);
2092          group->add_instruction(ir);
2093       }
2094 
2095       auto dest =
2096          i == 1 ? value_factory.dest(alu.def, i, pin_chan) : value_factory.dummy_dest(i);
2097 
2098       ir = new AluInstr(opcode,
2099                         dest,
2100                         tmp[k][2],
2101                         tmp[k][3],
2102                         i == 1 ? AluInstr::write : AluInstr::empty);
2103       group->add_instruction(ir);
2104    }
2105    if (ir)
2106       ir->set_alu_flag(alu_last_instr);
2107 
2108    shader.emit_instruction(group);
2109    return true;
2110 }
2111 
2112 static bool
emit_alu_op2_64bit_one_dst(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_order)2113 emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
2114                            EAluOp opcode,
2115                            Shader& shader,
2116                            bool switch_order)
2117 {
2118    auto& value_factory = shader.value_factory();
2119    AluInstr *ir = nullptr;
2120    int order[2] = {0, 1};
2121    if (switch_order) {
2122       order[0] = 1;
2123       order[1] = 0;
2124    }
2125 
2126    AluInstr::SrcValues src(4);
2127 
2128    for (unsigned k = 0; k < alu.def.num_components; ++k) {
2129       auto dest = value_factory.dest(alu.def, 2 * k, pin_chan);
2130       src[0] = value_factory.src64(alu.src[order[0]], k, 1);
2131       src[1] = value_factory.src64(alu.src[order[1]], k, 1);
2132       src[2] = value_factory.src64(alu.src[order[0]], k, 0);
2133       src[3] = value_factory.src64(alu.src[order[1]], k, 0);
2134 
2135       ir = new AluInstr(opcode, dest, src, AluInstr::write, 2);
2136       ir->set_alu_flag(alu_64bit_op);
2137 
2138       shader.emit_instruction(ir);
2139    }
2140    if (ir)
2141       ir->set_alu_flag(alu_last_instr);
2142 
2143    return true;
2144 }
2145 
2146 static bool
emit_alu_op1_64bit_trans(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2147 emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2148 {
2149    auto& value_factory = shader.value_factory();
2150    auto group = new AluGroup();
2151    AluInstr *ir = nullptr;
2152    for (unsigned i = 0; i < 3; ++i) {
2153       ir = new AluInstr(opcode,
2154                         i < 2 ? value_factory.dest(alu.def, i, pin_chan)
2155                               : value_factory.dummy_dest(i),
2156                         value_factory.src64(alu.src[0], 0, 1),
2157                         value_factory.src64(alu.src[0], 0, 0),
2158                         i < 2 ? AluInstr::write : AluInstr::empty);
2159 
2160       if (opcode == op1_sqrt_64)
2161          ir->set_source_mod(0, AluInstr::mod_abs);
2162       group->add_instruction(ir);
2163    }
2164    if (ir)
2165       ir->set_alu_flag(alu_last_instr);
2166    shader.emit_instruction(group);
2167    return true;
2168 }
2169 
2170 static bool
emit_alu_fma_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2171 emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2172 {
2173    auto& value_factory = shader.value_factory();
2174    auto group = new AluGroup();
2175    AluInstr *ir = nullptr;
2176    for (unsigned i = 0; i < 4; ++i) {
2177 
2178       int chan = i < 3 ? 1 : 0;
2179       auto dest =
2180          i < 2 ? value_factory.dest(alu.def, i, pin_chan) : value_factory.dummy_dest(i);
2181 
2182       ir = new AluInstr(opcode,
2183                         dest,
2184                         value_factory.src64(alu.src[0], 0, chan),
2185                         value_factory.src64(alu.src[1], 0, chan),
2186                         value_factory.src64(alu.src[2], 0, chan),
2187                         i < 2 ? AluInstr::write : AluInstr::empty);
2188       group->add_instruction(ir);
2189    }
2190    if (ir)
2191       ir->set_alu_flag(alu_last_instr);
2192    shader.emit_instruction(group);
2193    return true;
2194 }
2195 
2196 static bool
emit_alu_b2f64(const nir_alu_instr & alu,Shader & shader)2197 emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader)
2198 {
2199    auto& value_factory = shader.value_factory();
2200 
2201    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2202       auto ir = new AluInstr(op2_and_int,
2203                         value_factory.dest(alu.def, 2 * i, pin_group),
2204                         value_factory.src(alu.src[0], i),
2205                         value_factory.zero(),
2206                         {alu_write});
2207       shader.emit_instruction(ir);
2208 
2209       ir = new AluInstr(op2_and_int,
2210                         value_factory.dest(alu.def, 2 * i + 1, pin_group),
2211                         value_factory.src(alu.src[0], i),
2212                         value_factory.literal(0x3ff00000),
2213                         {alu_write});
2214       shader.emit_instruction(ir);
2215    }
2216    return true;
2217 }
2218 
2219 static bool
emit_alu_i2f64(const nir_alu_instr & alu,EAluOp op,Shader & shader)2220 emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
2221 {
2222    /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
2223    auto& value_factory = shader.value_factory();
2224    auto group = new AluGroup();
2225    AluInstr *ir = nullptr;
2226 
2227    assert(alu.def.num_components == 1);
2228 
2229    auto tmpx = value_factory.temp_register();
2230    shader.emit_instruction(new AluInstr(op2_and_int,
2231                                         tmpx,
2232                                         value_factory.src(alu.src[0], 0),
2233                                         value_factory.literal(0xffffff00),
2234                                         AluInstr::write));
2235    auto tmpy = value_factory.temp_register();
2236    shader.emit_instruction(new AluInstr(op2_and_int,
2237                                         tmpy,
2238                                         value_factory.src(alu.src[0], 0),
2239                                         value_factory.literal(0xff),
2240                                         AluInstr::last_write));
2241 
2242    auto tmpx2 = value_factory.temp_register();
2243    auto tmpy2 = value_factory.temp_register();
2244    shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, AluInstr::last_write));
2245    shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, AluInstr::last_write));
2246 
2247    auto tmpx3 = value_factory.temp_register(0);
2248    auto tmpy3 = value_factory.temp_register(1);
2249    auto tmpz3 = value_factory.temp_register(2);
2250    auto tmpw3 = value_factory.temp_register(3);
2251 
2252    ir = new AluInstr(op1_flt32_to_flt64, tmpx3, tmpx2, AluInstr::write);
2253    group->add_instruction(ir);
2254    ir = new AluInstr(op1_flt32_to_flt64, tmpy3, value_factory.zero(), AluInstr::write);
2255    group->add_instruction(ir);
2256    ir = new AluInstr(op1_flt32_to_flt64, tmpz3, tmpy2, AluInstr::write);
2257    group->add_instruction(ir);
2258    ir =
2259       new AluInstr(op1_flt32_to_flt64, tmpw3, value_factory.zero(), AluInstr::last_write);
2260    group->add_instruction(ir);
2261    shader.emit_instruction(group);
2262 
2263    group = new AluGroup();
2264 
2265    ir = new AluInstr(op2_add_64,
2266                      value_factory.dest(alu.def, 0, pin_chan),
2267                      tmpy3,
2268                      tmpw3,
2269                      AluInstr::write);
2270    group->add_instruction(ir);
2271    ir = new AluInstr(op2_add_64,
2272                      value_factory.dest(alu.def, 1, pin_chan),
2273                      tmpx3,
2274                      tmpz3,
2275                      AluInstr::write);
2276    group->add_instruction(ir);
2277    shader.emit_instruction(group);
2278 
2279    return true;
2280 }
2281 
2282 static bool
emit_alu_f2f64(const nir_alu_instr & alu,Shader & shader)2283 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
2284 {
2285    auto& value_factory = shader.value_factory();
2286    auto group = new AluGroup();
2287    AluInstr *ir = nullptr;
2288 
2289    assert(alu.def.num_components == 1);
2290 
2291    ir = new AluInstr(op1_flt32_to_flt64,
2292                      value_factory.dest(alu.def, 0, pin_chan),
2293                      value_factory.src(alu.src[0], 0),
2294                      AluInstr::write);
2295    group->add_instruction(ir);
2296    ir = new AluInstr(op1_flt32_to_flt64,
2297                      value_factory.dest(alu.def, 1, pin_chan),
2298                      value_factory.zero(),
2299                      AluInstr::last_write);
2300    group->add_instruction(ir);
2301    shader.emit_instruction(group);
2302    return true;
2303 }
2304 
2305 static bool
emit_alu_f2f32(const nir_alu_instr & alu,Shader & shader)2306 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader)
2307 {
2308    auto& value_factory = shader.value_factory();
2309    auto group = new AluGroup();
2310    AluInstr *ir = nullptr;
2311 
2312    ir = new AluInstr(op1v_flt64_to_flt32,
2313                      value_factory.dest(alu.def, 0, pin_chan),
2314                      value_factory.src64(alu.src[0], 0, 1),
2315                      {alu_write});
2316    group->add_instruction(ir);
2317    ir = new AluInstr(op1v_flt64_to_flt32,
2318                      value_factory.dummy_dest(1),
2319                      value_factory.src64(alu.src[0], 0, 0),
2320                      AluInstr::last);
2321    group->add_instruction(ir);
2322    shader.emit_instruction(group);
2323    return true;
2324 }
2325 
2326 static bool
emit_alu_b2x(const nir_alu_instr & alu,AluInlineConstants mask,Shader & shader)2327 emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader)
2328 {
2329    auto& value_factory = shader.value_factory();
2330    AluInstr *ir = nullptr;
2331    auto pin = pin_for_components(alu);
2332 
2333    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2334       auto src = value_factory.src(alu.src[0], i);
2335       ir = new AluInstr(op2_and_int,
2336                         value_factory.dest(alu.def, i, pin),
2337                         src,
2338                         value_factory.inline_const(mask, 0),
2339                         {alu_write});
2340       shader.emit_instruction(ir);
2341    }
2342    if (ir)
2343       ir->set_alu_flag(alu_last_instr);
2344    return true;
2345 }
2346 
2347 static bool
emit_alu_op1(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluMods mod)2348 emit_alu_op1(const nir_alu_instr& alu,
2349              EAluOp opcode,
2350              Shader& shader,
2351              AluMods mod)
2352 {
2353    auto& value_factory = shader.value_factory();
2354 
2355    AluInstr *ir = nullptr;
2356    auto pin = pin_for_components(alu);
2357 
2358    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2359       ir = new AluInstr(opcode,
2360                         value_factory.dest(alu.def, i, pin),
2361                         value_factory.src(alu.src[0], i),
2362                         {alu_write});
2363       switch (mod) {
2364       case mod_src0_abs:
2365          ir->set_source_mod(0, AluInstr::mod_abs); break;
2366       case mod_src0_neg:
2367          ir->set_source_mod(0, AluInstr::mod_neg); break;
2368       case mod_dest_clamp:
2369          ir->set_alu_flag(alu_dst_clamp);
2370          default:;
2371       }
2372       shader.emit_instruction(ir);
2373    }
2374    if (ir)
2375       ir->set_alu_flag(alu_last_instr);
2376    return true;
2377 }
2378 
2379 static bool
emit_alu_op2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)2380 emit_alu_op2(const nir_alu_instr& alu,
2381              EAluOp opcode,
2382              Shader& shader,
2383              AluInstr::Op2Options opts)
2384 {
2385    auto& value_factory = shader.value_factory();
2386    const nir_alu_src *src0 = &alu.src[0];
2387    const nir_alu_src *src1 = &alu.src[1];
2388 
2389    int idx0 = 0;
2390    int idx1 = 1;
2391    if (opts & AluInstr::op2_opt_reverse) {
2392       std::swap(src0, src1);
2393       std::swap(idx0, idx1);
2394    }
2395 
2396    bool src1_negate = (opts & AluInstr::op2_opt_neg_src1);
2397 
2398    auto pin = pin_for_components(alu);
2399    AluInstr *ir = nullptr;
2400    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2401       ir = new AluInstr(opcode,
2402                         value_factory.dest(alu.def, i, pin),
2403                         value_factory.src(*src0, i),
2404                         value_factory.src(*src1, i),
2405                         {alu_write});
2406       if (src1_negate)
2407          ir->set_source_mod(1, AluInstr::mod_neg);
2408       shader.emit_instruction(ir);
2409    }
2410    if (ir)
2411       ir->set_alu_flag(alu_last_instr);
2412    return true;
2413 }
2414 
2415 static bool
emit_alu_op2_int(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)2416 emit_alu_op2_int(const nir_alu_instr& alu,
2417                  EAluOp opcode,
2418                  Shader& shader,
2419                  AluInstr::Op2Options opts)
2420 {
2421    return emit_alu_op2(alu, opcode, shader, opts);
2422 }
2423 
2424 static bool
emit_alu_op3(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,const std::array<int,3> & src_shuffle)2425 emit_alu_op3(const nir_alu_instr& alu,
2426              EAluOp opcode,
2427              Shader& shader,
2428              const std::array<int, 3>& src_shuffle)
2429 {
2430    auto& value_factory = shader.value_factory();
2431    const nir_alu_src *src[3];
2432    src[0] = &alu.src[src_shuffle[0]];
2433    src[1] = &alu.src[src_shuffle[1]];
2434    src[2] = &alu.src[src_shuffle[2]];
2435 
2436    auto pin = pin_for_components(alu);
2437    AluInstr *ir = nullptr;
2438    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2439       ir = new AluInstr(opcode,
2440                         value_factory.dest(alu.def, i, pin),
2441                         value_factory.src(*src[0], i),
2442                         value_factory.src(*src[1], i),
2443                         value_factory.src(*src[2], i),
2444                         {alu_write});
2445       ir->set_alu_flag(alu_write);
2446       shader.emit_instruction(ir);
2447    }
2448    if (ir)
2449       ir->set_alu_flag(alu_last_instr);
2450    return true;
2451 }
2452 
2453 static bool
emit_any_all_fcomp2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2454 emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2455 {
2456    AluInstr *ir = nullptr;
2457    auto& value_factory = shader.value_factory();
2458 
2459    PRegister tmp[2];
2460    tmp[0] = value_factory.temp_register();
2461    tmp[1] = value_factory.temp_register();
2462 
2463    for (unsigned i = 0; i < 2; ++i) {
2464       ir = new AluInstr(opcode,
2465                         tmp[i],
2466                         value_factory.src(alu.src[0], i),
2467                         value_factory.src(alu.src[1], i),
2468                         {alu_write});
2469       shader.emit_instruction(ir);
2470    }
2471    ir->set_alu_flag(alu_last_instr);
2472 
2473    opcode = (opcode == op2_setne_dx10) ? op2_or_int : op2_and_int;
2474    ir = new AluInstr(opcode,
2475                      value_factory.dest(alu.def, 0, pin_free),
2476                      tmp[0],
2477                      tmp[1],
2478                      AluInstr::last_write);
2479    shader.emit_instruction(ir);
2480    return true;
2481 }
2482 
2483 static bool
emit_any_all_fcomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2484 emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2485 {
2486    /* This should probabyl be lowered in nir */
2487    auto& value_factory = shader.value_factory();
2488 
2489    AluInstr *ir = nullptr;
2490    RegisterVec4 v = value_factory.temp_vec4(pin_group);
2491    AluInstr::SrcValues s;
2492 
2493    for (int i = 0; i < nc; ++i) {
2494       s.push_back(v[i]);
2495    }
2496 
2497    for (int i = nc; i < 4; ++i)
2498       s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0));
2499 
2500    for (int i = 0; i < nc; ++i) {
2501       ir = new AluInstr(op,
2502                         v[i],
2503                         value_factory.src(alu.src[0], i),
2504                         value_factory.src(alu.src[1], i),
2505                         {alu_write});
2506       shader.emit_instruction(ir);
2507    }
2508    if (ir)
2509       ir->set_alu_flag(alu_last_instr);
2510 
2511    auto max_val = value_factory.temp_register();
2512 
2513    ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
2514 
2515    if (all) {
2516       ir->set_source_mod(0, AluInstr::mod_neg);
2517       ir->set_source_mod(1, AluInstr::mod_neg);
2518       ir->set_source_mod(2, AluInstr::mod_neg);
2519       ir->set_source_mod(3, AluInstr::mod_neg);
2520    }
2521 
2522    shader.emit_instruction(ir);
2523 
2524    if (all)
2525       op = (op == op2_sete) ? op2_sete_dx10 : op2_setne_dx10;
2526    else
2527       op = (op == op2_sete) ? op2_setne_dx10 : op2_sete_dx10;
2528 
2529    ir = new AluInstr(op,
2530                      value_factory.dest(alu.def, 0, pin_free),
2531                      max_val,
2532                      value_factory.inline_const(ALU_SRC_1, 0),
2533                      AluInstr::last_write);
2534    if (all)
2535       ir->set_source_mod(1, AluInstr::mod_neg);
2536    shader.emit_instruction(ir);
2537 
2538    return true;
2539 }
2540 
2541 static bool
emit_any_all_icomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2542 emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2543 {
2544    /* This should probabyl be lowered in nir */
2545    auto& value_factory = shader.value_factory();
2546 
2547    AluInstr *ir = nullptr;
2548    PRegister v[6];
2549 
2550    auto dest = value_factory.dest(alu.def, 0, pin_free);
2551 
2552    for (int i = 0; i < nc + nc / 2; ++i)
2553       v[i] = value_factory.temp_register();
2554 
2555    EAluOp combine = all ? op2_and_int : op2_or_int;
2556 
2557    for (int i = 0; i < nc; ++i) {
2558       ir = new AluInstr(op,
2559                         v[i],
2560                         value_factory.src(alu.src[0], i),
2561                         value_factory.src(alu.src[1], i),
2562                         AluInstr::write);
2563       shader.emit_instruction(ir);
2564    }
2565    if (ir)
2566       ir->set_alu_flag(alu_last_instr);
2567 
2568    if (nc == 2) {
2569       ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write);
2570       shader.emit_instruction(ir);
2571       return true;
2572    }
2573 
2574    if (nc == 3) {
2575       ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write);
2576       shader.emit_instruction(ir);
2577       ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write);
2578       shader.emit_instruction(ir);
2579       return true;
2580    }
2581 
2582    if (nc == 4) {
2583       ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write);
2584       shader.emit_instruction(ir);
2585       ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write);
2586       shader.emit_instruction(ir);
2587       ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write);
2588       shader.emit_instruction(ir);
2589       return true;
2590    }
2591 
2592    return false;
2593 }
2594 
2595 static bool
emit_dot(const nir_alu_instr & alu,int n,Shader & shader)2596 emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
2597 {
2598    auto& value_factory = shader.value_factory();
2599    const nir_alu_src& src0 = alu.src[0];
2600    const nir_alu_src& src1 = alu.src[1];
2601 
2602    auto dest = value_factory.dest(alu.def, 0, pin_chan);
2603 
2604    AluInstr::SrcValues srcs(2 * n);
2605 
2606    for (int i = 0; i < n; ++i) {
2607       srcs[2 * i] = value_factory.src(src0, i);
2608       srcs[2 * i + 1] = value_factory.src(src1, i);
2609    }
2610 
2611    AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n);
2612 
2613    shader.emit_instruction(ir);
2614    shader.set_flag(Shader::sh_disble_sb);
2615 
2616    return true;
2617 }
2618 
2619 static bool
emit_dot4(const nir_alu_instr & alu,int nelm,Shader & shader)2620 emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader)
2621 {
2622    auto& value_factory = shader.value_factory();
2623    const nir_alu_src& src0 = alu.src[0];
2624    const nir_alu_src& src1 = alu.src[1];
2625 
2626    auto dest = value_factory.dest(alu.def, 0, pin_free);
2627 
2628    AluInstr::SrcValues srcs(8);
2629 
2630    for (int i = 0; i < nelm; ++i) {
2631       srcs[2 * i] = value_factory.src(src0, i);
2632       srcs[2 * i + 1] = value_factory.src(src1, i);
2633    }
2634 
2635    for (int i = nelm; i < 4; ++i) {
2636        srcs[2 * i] = value_factory.zero();
2637        srcs[2 * i + 1] = value_factory.zero();
2638    }
2639 
2640    AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2641 
2642    shader.emit_instruction(ir);
2643    return true;
2644 }
2645 
2646 static bool
emit_fdph(const nir_alu_instr & alu,Shader & shader)2647 emit_fdph(const nir_alu_instr& alu, Shader& shader)
2648 {
2649    auto& value_factory = shader.value_factory();
2650    const nir_alu_src& src0 = alu.src[0];
2651    const nir_alu_src& src1 = alu.src[1];
2652 
2653    auto dest = value_factory.dest(alu.def, 0, pin_free);
2654 
2655    AluInstr::SrcValues srcs(8);
2656 
2657    for (int i = 0; i < 3; ++i) {
2658       srcs[2 * i] = value_factory.src(src0, i);
2659       srcs[2 * i + 1] = value_factory.src(src1, i);
2660    }
2661 
2662    srcs[6] = value_factory.one();
2663    srcs[7] = value_factory.src(src1, 3);
2664 
2665    AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2666    shader.emit_instruction(ir);
2667    return true;
2668 }
2669 
2670 static bool
emit_create_vec(const nir_alu_instr & instr,unsigned nc,Shader & shader)2671 emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
2672 {
2673    auto& value_factory = shader.value_factory();
2674    AluInstr *ir = nullptr;
2675 
2676    for (unsigned i = 0; i < nc; ++i) {
2677       auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
2678       auto dst = value_factory.dest(instr.def, i, pin_none);
2679       shader.emit_instruction(new AluInstr(op1_mov, dst, src, {alu_write}));
2680    }
2681 
2682    if (ir)
2683       ir->set_alu_flag(alu_last_instr);
2684    return true;
2685 }
2686 
2687 static bool
emit_alu_comb_with_zero(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2688 emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2689 {
2690    auto& value_factory = shader.value_factory();
2691    AluInstr *ir = nullptr;
2692    auto pin = pin_for_components(alu);
2693    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2694       ir = new AluInstr(opcode,
2695                         value_factory.dest(alu.def, i, pin),
2696                         value_factory.zero(),
2697                         value_factory.src(alu.src[0], i),
2698                         AluInstr::write);
2699       shader.emit_instruction(ir);
2700    }
2701    if (ir)
2702       ir->set_alu_flag(alu_last_instr);
2703 
2704    return true;
2705 }
2706 
2707 static bool
emit_pack_64_2x32_split(const nir_alu_instr & alu,Shader & shader)2708 emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader)
2709 {
2710    auto& value_factory = shader.value_factory();
2711    AluInstr *ir = nullptr;
2712    for (unsigned i = 0; i < 2; ++i) {
2713       ir = new AluInstr(op1_mov,
2714                         value_factory.dest(alu.def, i, pin_none),
2715                         value_factory.src(alu.src[i], 0),
2716                         AluInstr::write);
2717       shader.emit_instruction(ir);
2718    }
2719    ir->set_alu_flag(alu_last_instr);
2720    return true;
2721 }
2722 
2723 static bool
emit_pack_64_2x32(const nir_alu_instr & alu,Shader & shader)2724 emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2725 {
2726    auto& value_factory = shader.value_factory();
2727    AluInstr *ir = nullptr;
2728    for (unsigned i = 0; i < 2; ++i) {
2729       ir = new AluInstr(op1_mov,
2730                         value_factory.dest(alu.def, i, pin_none),
2731                         value_factory.src(alu.src[0], i),
2732                         AluInstr::write);
2733       shader.emit_instruction(ir);
2734    }
2735    ir->set_alu_flag(alu_last_instr);
2736    return true;
2737 }
2738 
2739 static bool
emit_unpack_64_2x32(const nir_alu_instr & alu,Shader & shader)2740 emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2741 {
2742    auto& value_factory = shader.value_factory();
2743    AluInstr *ir = nullptr;
2744    for (unsigned i = 0; i < 2; ++i) {
2745       ir = new AluInstr(op1_mov,
2746                         value_factory.dest(alu.def, i, pin_none),
2747                         value_factory.src64(alu.src[0], 0, i),
2748                         AluInstr::write);
2749       shader.emit_instruction(ir);
2750    }
2751    ir->set_alu_flag(alu_last_instr);
2752    return true;
2753 }
2754 
2755 bool
emit_alu_vec2_64(const nir_alu_instr & alu,Shader & shader)2756 emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader)
2757 {
2758    auto& value_factory = shader.value_factory();
2759    AluInstr *ir = nullptr;
2760    for (unsigned i = 0; i < 2; ++i) {
2761       ir = new AluInstr(op1_mov,
2762                         value_factory.dest(alu.def, i, pin_chan),
2763                         value_factory.src64(alu.src[0], 0, i),
2764                         AluInstr::write);
2765       shader.emit_instruction(ir);
2766    }
2767    for (unsigned i = 0; i < 2; ++i) {
2768       ir = new AluInstr(op1_mov,
2769                         value_factory.dest(alu.def, i + 2, pin_chan),
2770                         value_factory.src64(alu.src[1], 1, i),
2771                         AluInstr::write);
2772       shader.emit_instruction(ir);
2773    }
2774    ir->set_alu_flag(alu_last_instr);
2775    return true;
2776 }
2777 
2778 static bool
emit_pack_32_2x16_split(const nir_alu_instr & alu,Shader & shader)2779 emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader)
2780 {
2781    auto& value_factory = shader.value_factory();
2782 
2783    auto x = value_factory.temp_register();
2784    auto y = value_factory.temp_register();
2785    auto yy = value_factory.temp_register();
2786 
2787    shader.emit_instruction(new AluInstr(
2788       op1_flt32_to_flt16, x, value_factory.src(alu.src[0], 0), AluInstr::last_write));
2789 
2790    shader.emit_instruction(new AluInstr(
2791       op1_flt32_to_flt16, y, value_factory.src(alu.src[1], 0), AluInstr::last_write));
2792 
2793    shader.emit_instruction(
2794       new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write));
2795 
2796    shader.emit_instruction(new AluInstr(op2_or_int,
2797                                         value_factory.dest(alu.def, 0, pin_free),
2798                                         x,
2799                                         yy,
2800                                         AluInstr::last_write));
2801    return true;
2802 }
2803 
2804 static bool
emit_unpack_64_2x32_split(const nir_alu_instr & alu,int comp,Shader & shader)2805 emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader)
2806 {
2807    auto& value_factory = shader.value_factory();
2808    shader.emit_instruction(new AluInstr(op1_mov,
2809                                         value_factory.dest(alu.def, 0, pin_free),
2810                                         value_factory.src64(alu.src[0], 0, comp),
2811                                         AluInstr::last_write));
2812    return true;
2813 }
2814 
2815 static bool
emit_unpack_32_2x16_split_x(const nir_alu_instr & alu,Shader & shader)2816 emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader)
2817 {
2818    auto& value_factory = shader.value_factory();
2819    shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2820                                         value_factory.dest(alu.def, 0, pin_free),
2821                                         value_factory.src(alu.src[0], 0),
2822                                         AluInstr::last_write));
2823    return true;
2824 }
2825 static bool
emit_unpack_32_2x16_split_y(const nir_alu_instr & alu,Shader & shader)2826 emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader)
2827 {
2828    auto& value_factory = shader.value_factory();
2829    auto tmp = value_factory.temp_register();
2830    shader.emit_instruction(new AluInstr(op2_lshr_int,
2831                                         tmp,
2832                                         value_factory.src(alu.src[0], 0),
2833                                         value_factory.literal(16),
2834                                         AluInstr::last_write));
2835 
2836    shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2837                                         value_factory.dest(alu.def, 0, pin_free),
2838                                         tmp,
2839                                         AluInstr::last_write));
2840    return true;
2841 }
2842 
2843 static bool
emit_alu_trans_op1_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2844 emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2845 {
2846    auto& value_factory = shader.value_factory();
2847    const nir_alu_src& src0 = alu.src[0];
2848 
2849    AluInstr *ir = nullptr;
2850    auto pin = pin_for_components(alu);
2851 
2852    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2853       ir = new AluInstr(opcode,
2854                         value_factory.dest(alu.def, i, pin),
2855                         value_factory.src(src0, i),
2856                         AluInstr::last_write);
2857       ir->set_alu_flag(alu_is_trans);
2858       shader.emit_instruction(ir);
2859    }
2860 
2861    return true;
2862 }
2863 
2864 static bool
emit_alu_f2i32_or_u32_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2865 emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2866 {
2867    auto& value_factory = shader.value_factory();
2868    AluInstr *ir = nullptr;
2869 
2870    PRegister reg[4];
2871 
2872    int num_comp = alu.def.num_components;
2873 
2874    for (int i = 0; i < num_comp; ++i) {
2875       reg[i] = value_factory.temp_register();
2876       ir = new AluInstr(op1_trunc,
2877                         reg[i],
2878                         value_factory.src(alu.src[0], i),
2879                         AluInstr::last_write);
2880       shader.emit_instruction(ir);
2881    }
2882 
2883    auto pin = pin_for_components(alu);
2884    for (int i = 0; i < num_comp; ++i) {
2885       ir = new AluInstr(opcode,
2886                         value_factory.dest(alu.def, i, pin),
2887                         reg[i],
2888                         AluInstr::write);
2889       if (opcode == op1_flt_to_uint) {
2890          ir->set_alu_flag(alu_is_trans);
2891          ir->set_alu_flag(alu_last_instr);
2892       }
2893       shader.emit_instruction(ir);
2894    }
2895    ir->set_alu_flag(alu_last_instr);
2896    return true;
2897 }
2898 
2899 static bool
emit_alu_trans_op1_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2900 emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2901 {
2902    auto& value_factory = shader.value_factory();
2903    const nir_alu_src& src0 = alu.src[0];
2904 
2905    auto pin = pin_for_components(alu);
2906 
2907    const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
2908 
2909    for (unsigned j = 0; j < alu.def.num_components; ++j) {
2910       unsigned ncomp =  j == 3 ? 4 : 3;
2911 
2912       AluInstr::SrcValues srcs(ncomp);
2913       PRegister dest = value_factory.dest(alu.def, j, pin, (1 << ncomp) - 1);
2914 
2915       for (unsigned i = 0; i < ncomp; ++i)
2916          srcs[i] = value_factory.src(src0, j);
2917 
2918       auto ir = new AluInstr(opcode, dest, srcs, flags, ncomp);
2919       shader.emit_instruction(ir);
2920    }
2921    return true;
2922 }
2923 
2924 static bool
emit_alu_trans_op2_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2925 emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2926 {
2927    auto& value_factory = shader.value_factory();
2928 
2929    const nir_alu_src& src0 = alu.src[0];
2930    const nir_alu_src& src1 = alu.src[1];
2931 
2932    AluInstr *ir = nullptr;
2933 
2934    auto pin = pin_for_components(alu);
2935    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2936       ir = new AluInstr(opcode,
2937                         value_factory.dest(alu.def, i, pin),
2938                         value_factory.src(src0, i),
2939                         value_factory.src(src1, i),
2940                         AluInstr::last_write);
2941       ir->set_alu_flag(alu_is_trans);
2942       shader.emit_instruction(ir);
2943    }
2944    return true;
2945 }
2946 
2947 static bool
emit_alu_trans_op2_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2948 emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2949 {
2950    auto& value_factory = shader.value_factory();
2951 
2952    const nir_alu_src& src0 = alu.src[0];
2953    const nir_alu_src& src1 = alu.src[1];
2954 
2955    unsigned last_slot = 4;
2956 
2957    const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
2958 
2959    for (unsigned k = 0; k < alu.def.num_components; ++k) {
2960       AluInstr::SrcValues srcs(2 * last_slot);
2961       PRegister dest = value_factory.dest(alu.def, k, pin_free);
2962 
2963       for (unsigned i = 0; i < last_slot; ++i) {
2964          srcs[2 * i] = value_factory.src(src0, k);
2965          srcs[2 * i + 1] = value_factory.src(src1, k);
2966       }
2967 
2968       auto ir = new AluInstr(opcode, dest, srcs, flags, last_slot);
2969       ir->set_alu_flag(alu_is_cayman_trans);
2970       shader.emit_instruction(ir);
2971    }
2972    return true;
2973 }
2974 
2975 static bool
emit_alu_cube(const nir_alu_instr & alu,Shader & shader)2976 emit_alu_cube(const nir_alu_instr& alu, Shader& shader)
2977 {
2978    auto& value_factory = shader.value_factory();
2979    AluInstr *ir = nullptr;
2980 
2981    const uint16_t src0_chan[4] = {2, 2, 0, 1};
2982    const uint16_t src1_chan[4] = {1, 0, 2, 2};
2983 
2984    auto group = new AluGroup();
2985 
2986    for (int i = 0; i < 4; ++i) {
2987 
2988       ir = new AluInstr(op2_cube,
2989                         value_factory.dest(alu.def, i, pin_chan),
2990                         value_factory.src(alu.src[0], src0_chan[i]),
2991                         value_factory.src(alu.src[0], src1_chan[i]),
2992                         AluInstr::write);
2993       group->add_instruction(ir);
2994    }
2995    ir->set_alu_flag(alu_last_instr);
2996    shader.emit_instruction(group);
2997    return true;
2998 }
2999 
3000 const std::set<AluModifiers> AluInstr::empty;
3001 const std::set<AluModifiers> AluInstr::write({alu_write});
3002 const std::set<AluModifiers> AluInstr::last({alu_last_instr});
3003 const std::set<AluModifiers> AluInstr::last_write({alu_write, alu_last_instr});
3004 
3005 } // namespace r600
3006