1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_instr_alu.h"
8
9 #include "sfn_alu_defines.h"
10 #include "sfn_debug.h"
11 #include "sfn_instr_alugroup.h"
12 #include "sfn_instr_tex.h"
13 #include "sfn_shader.h"
14 #include "sfn_virtualvalues.h"
15
16 #include <algorithm>
17 #include <sstream>
18
19 namespace r600 {
20
21 using std::istream;
22 using std::string;
23 using std::vector;
24
AluInstr(EAluOp opcode,PRegister dest,SrcValues src,const std::set<AluModifiers> & flags,int slots)25 AluInstr::AluInstr(EAluOp opcode,
26 PRegister dest,
27 SrcValues src,
28 const std::set<AluModifiers>& flags,
29 int slots):
30 m_opcode(opcode),
31 m_dest(dest),
32 m_bank_swizzle(alu_vec_unknown),
33 m_cf_type(cf_alu),
34 m_alu_slots(slots)
35 {
36 m_src.swap(src);
37
38 if (m_src.size() == 3)
39 m_alu_flags.set(alu_op3);
40
41 for (auto f : flags)
42 m_alu_flags.set(f);
43
44 ASSERT_OR_THROW(m_src.size() ==
45 static_cast<size_t>(alu_ops.at(opcode).nsrc * m_alu_slots),
46 "Unexpected number of source values");
47
48 if (m_alu_flags.test(alu_write))
49 ASSERT_OR_THROW(dest, "Write flag is set, but no destination register is given");
50
51 update_uses();
52
53 if (dest && slots > 1) {
54 switch (m_opcode) {
55 case op2_dot_ieee: m_allowed_dest_mask = (1 << (5 - slots)) - 1;
56 break;
57 default:
58 if (has_alu_flag(alu_is_cayman_trans)) {
59 m_allowed_dest_mask = (1 << slots) - 1;
60 }
61 }
62 }
63 assert(!dest || (m_allowed_dest_mask & (1 << dest->chan())));
64 }
65
AluInstr(EAluOp opcode)66 AluInstr::AluInstr(EAluOp opcode):
67 AluInstr(opcode, nullptr, SrcValues(alu_ops.at(opcode).nsrc), {}, 1)
68 {
69 }
70
AluInstr(EAluOp opcode,int chan)71 AluInstr::AluInstr(EAluOp opcode, int chan):
72 AluInstr(opcode, nullptr, SrcValues(), {}, 1)
73 {
74 m_fallback_chan = chan;
75 }
76
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,const std::set<AluModifiers> & m_flags)77 AluInstr::AluInstr(EAluOp opcode,
78 PRegister dest,
79 PVirtualValue src0,
80 const std::set<AluModifiers>& m_flags):
81 AluInstr(opcode, dest, SrcValues{src0}, m_flags, 1)
82 {
83 }
84
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,const std::set<AluModifiers> & m_flags)85 AluInstr::AluInstr(EAluOp opcode,
86 PRegister dest,
87 PVirtualValue src0,
88 PVirtualValue src1,
89 const std::set<AluModifiers>& m_flags):
90 AluInstr(opcode, dest, SrcValues{src0, src1}, m_flags, 1)
91 {
92 }
93
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,PVirtualValue src2,const std::set<AluModifiers> & m_flags)94 AluInstr::AluInstr(EAluOp opcode,
95 PRegister dest,
96 PVirtualValue src0,
97 PVirtualValue src1,
98 PVirtualValue src2,
99 const std::set<AluModifiers>& m_flags):
100 AluInstr(opcode, dest, SrcValues{src0, src1, src2}, m_flags, 1)
101 {
102 }
103
AluInstr(ESDOp op,PVirtualValue src0,PVirtualValue src1,PVirtualValue address)104 AluInstr::AluInstr(ESDOp op,
105 PVirtualValue src0,
106 PVirtualValue src1,
107 PVirtualValue address):
108 m_lds_opcode(op)
109 {
110 set_alu_flag(alu_is_lds);
111
112 m_src.push_back(address);
113 if (src0) {
114 m_src.push_back(src0);
115 if (src1)
116 m_src.push_back(src1);
117 }
118 update_uses();
119 }
120
AluInstr(ESDOp op,const SrcValues & src,const std::set<AluModifiers> & flags)121 AluInstr::AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags):
122 m_lds_opcode(op),
123 m_src(src)
124 {
125 for (auto f : flags)
126 set_alu_flag(f);
127
128 set_alu_flag(alu_is_lds);
129 update_uses();
130 }
131
132 void
update_uses()133 AluInstr::update_uses()
134 {
135 for (auto& s : m_src) {
136 auto r = s->as_register();
137 if (r) {
138 r->add_use(this);
139 // move this to add_use
140 if (r->pin() == pin_array) {
141 auto array_elm = static_cast<LocalArrayValue *>(r);
142 auto addr = array_elm->addr();
143 if (addr && addr->as_register())
144 addr->as_register()->add_use(this);
145 }
146 }
147 auto u = s->as_uniform();
148 if (u && u->buf_addr() && u->buf_addr()->as_register())
149 u->buf_addr()->as_register()->add_use(this);
150 }
151
152 if (m_dest &&
153 (has_alu_flag(alu_write) ||
154 m_opcode == op1_mova_int ||
155 m_opcode == op1_set_cf_idx0 ||
156 m_opcode == op1_set_cf_idx1)) {
157 m_dest->add_parent(this);
158
159 if (m_dest->pin() == pin_array) {
160 // move this to add_parent
161 auto array_elm = static_cast<LocalArrayValue *>(m_dest);
162 auto addr = array_elm->addr();
163 if (addr && addr->as_register())
164 addr->as_register()->add_use(this);
165 }
166 }
167 }
168
169 void
accept(ConstInstrVisitor & visitor) const170 AluInstr::accept(ConstInstrVisitor& visitor) const
171 {
172 visitor.visit(*this);
173 }
174
175 void
accept(InstrVisitor & visitor)176 AluInstr::accept(InstrVisitor& visitor)
177 {
178 visitor.visit(this);
179 }
180
181 const std::map<ECFAluOpCode, std::string> AluInstr::cf_map = {
182 {cf_alu_break, "BREAK" },
183 {cf_alu_continue, "CONT" },
184 {cf_alu_else_after, "ELSE_AFTER" },
185 {cf_alu_extended, "EXTENDED" },
186 {cf_alu_pop_after, "POP_AFTER" },
187 {cf_alu_pop2_after, "POP2_AFTER" },
188 {cf_alu_push_before, "PUSH_BEFORE"}
189 };
190
191 const std::map<AluBankSwizzle, std::string> AluInstr::bank_swizzle_map = {
192 {alu_vec_012, "VEC_012"},
193 {alu_vec_021, "VEC_021"},
194 {alu_vec_102, "VEC_102"},
195 {alu_vec_120, "VEC_120"},
196 {alu_vec_201, "VEC_201"},
197 {alu_vec_210, "VEC_210"}
198 };
199
200 const AluModifiers AluInstr::src_rel_flags[3] = {
201 alu_src0_rel, alu_src1_rel, alu_src2_rel};
202
203 struct ValuePrintFlags {
ValuePrintFlagsr600::ValuePrintFlags204 ValuePrintFlags(int im, int f):
205 index_mode(im),
206 flags(f)
207 {
208 }
209 int index_mode = 0;
210 int flags = 0;
211 static const int is_rel = 1;
212 static const int has_abs = 2;
213 static const int has_neg = 4;
214 static const int literal_is_float = 8;
215 static const int index_ar = 16;
216 static const int index_loopidx = 32;
217 };
218
219 void
do_print(std::ostream & os) const220 AluInstr::do_print(std::ostream& os) const
221 {
222 const char swzchar[] = "xyzw01?_";
223
224 unsigned i = 0;
225
226 os << "ALU ";
227
228 if (has_alu_flag(alu_is_lds)) {
229 os << "LDS " << lds_ops.at(m_lds_opcode).name;
230 os << " __.x : ";
231 } else {
232
233 os << alu_ops.at(m_opcode).name;
234 if (has_alu_flag(alu_dst_clamp))
235 os << " CLAMP";
236
237 if (m_dest) {
238 if (has_alu_flag(alu_write) || m_dest->has_flag(Register::addr_or_idx)) {
239 os << " " << *m_dest;
240 } else {
241 os << " __"
242 << "." << swzchar[m_dest->chan()];
243 if (m_dest->pin() != pin_none)
244 os << "@" << m_dest->pin();
245 }
246 os << " : ";
247 } else {
248 os << " __." << swzchar[dest_chan()] << " : ";
249 }
250 }
251
252 const int n_source_per_slot =
253 has_alu_flag(alu_is_lds) ? m_src.size() : alu_ops.at(m_opcode).nsrc;
254
255
256 for (int s = 0; s < m_alu_slots; ++s) {
257
258 if (s > 0)
259 os << " +";
260
261 for (int k = 0; k < n_source_per_slot; ++k) {
262 int pflags = 0;
263 if (i)
264 os << ' ';
265 if (has_source_mod(i, mod_neg))
266 pflags |= ValuePrintFlags::has_neg;
267 if (has_alu_flag(src_rel_flags[k]))
268 pflags |= ValuePrintFlags::is_rel;
269 if (n_source_per_slot <= 2)
270 if (has_source_mod(i, mod_abs))
271 pflags |= ValuePrintFlags::has_abs;
272
273 if (pflags & ValuePrintFlags::has_neg)
274 os << '-';
275 if (pflags & ValuePrintFlags::has_abs)
276 os << '|';
277 os << *m_src[i];
278 if (pflags & ValuePrintFlags::has_abs)
279 os << '|';
280 ++i;
281 }
282 }
283
284 os << " {";
285 if (has_alu_flag(alu_write))
286 os << 'W';
287 if (has_alu_flag(alu_last_instr))
288 os << 'L';
289 if (has_alu_flag(alu_update_exec))
290 os << 'E';
291 if (has_alu_flag(alu_update_pred))
292 os << 'P';
293 os << "}";
294
295 auto bs_name = bank_swizzle_map.find(m_bank_swizzle);
296 if (bs_name != bank_swizzle_map.end())
297 os << ' ' << bs_name->second;
298
299 auto cf_name = cf_map.find(m_cf_type);
300 if (cf_name != cf_map.end())
301 os << ' ' << cf_name->second;
302 }
303
304 bool
can_propagate_src() const305 AluInstr::can_propagate_src() const
306 {
307 /* We can use the source in the next instruction */
308 if (!can_copy_propagate())
309 return false;
310
311 auto src_reg = m_src[0]->as_register();
312 if (!src_reg)
313 return true;
314
315 assert(m_dest);
316
317 if (!m_dest->has_flag(Register::ssa)) {
318 return false;
319 }
320
321 if (m_dest->pin() == pin_fully)
322 return m_dest->equal_to(*src_reg);
323
324 if (m_dest->pin() == pin_chan)
325 return src_reg->pin() == pin_none ||
326 src_reg->pin() == pin_free ||
327 (src_reg->pin() == pin_chan && src_reg->chan() == m_dest->chan());
328
329 return m_dest->pin() == pin_none || m_dest->pin() == pin_free;
330 }
331
332 class ReplaceIndirectArrayAddr : public RegisterVisitor {
333 public:
visit(Register & value)334 void visit(Register& value) override { (void)value; }
visit(LocalArray & value)335 void visit(LocalArray& value) override
336 {
337 (void)value;
338 unreachable("An array can't be used as address");
339 }
340 void visit(LocalArrayValue& value) override;
341 void visit(UniformValue& value) override;
visit(LiteralConstant & value)342 void visit(LiteralConstant& value) override { (void)value; }
visit(InlineConstant & value)343 void visit(InlineConstant& value) override { (void)value; }
344
345 PRegister new_addr;
346 };
347
visit(LocalArrayValue & value)348 void ReplaceIndirectArrayAddr::visit(LocalArrayValue& value)
349 {
350 if (new_addr->sel() == 0 && value.addr()
351 && value.addr()->as_register())
352 value.set_addr(new_addr);
353 }
354
visit(UniformValue & value)355 void ReplaceIndirectArrayAddr::visit(UniformValue& value)
356 {
357 if (value.buf_addr() && value.buf_addr()->as_register() &&
358 (new_addr->sel() == 1 || new_addr->sel() == 2)) {
359 value.set_buf_addr(new_addr);
360 }
361 }
362
update_indirect_addr(UNUSED PRegister old_reg,PRegister reg)363 void AluInstr::update_indirect_addr(UNUSED PRegister old_reg, PRegister reg)
364 {
365 ReplaceIndirectArrayAddr visitor;
366
367 visitor.new_addr = reg;
368 assert(reg->has_flag(Register::addr_or_idx));
369
370 if (m_dest)
371 m_dest->accept(visitor);
372
373 for (auto src : m_src)
374 src->accept(visitor);
375
376 reg->add_use(this);
377 }
378
379 bool
can_propagate_dest() const380 AluInstr::can_propagate_dest() const
381 {
382 if (!can_copy_propagate()) {
383 return false;
384 }
385
386 auto src_reg = m_src[0]->as_register();
387 if (!src_reg) {
388 return false;
389 }
390
391 assert(m_dest);
392
393 if (src_reg->pin() == pin_fully) {
394 return false;
395 }
396
397 if (!src_reg->has_flag(Register::ssa))
398 return false;
399
400 if (!m_dest->has_flag(Register::ssa))
401 return false;
402
403 if (src_reg->pin() == pin_chan)
404 return m_dest->pin() == pin_none || m_dest->pin() == pin_free ||
405 ((m_dest->pin() == pin_chan || m_dest->pin() == pin_group) &&
406 src_reg->chan() == m_dest->chan());
407
408 return (src_reg->pin() == pin_none || src_reg->pin() == pin_free);
409 }
410
411 bool
can_copy_propagate() const412 AluInstr::can_copy_propagate() const
413 {
414 if (m_opcode != op1_mov)
415 return false;
416
417 if (has_source_mod(0, mod_abs) || has_source_mod(0, mod_neg) ||
418 has_alu_flag(alu_dst_clamp))
419 return false;
420
421 return has_alu_flag(alu_write);
422 }
423
424 bool
replace_source(PRegister old_src,PVirtualValue new_src)425 AluInstr::replace_source(PRegister old_src, PVirtualValue new_src)
426 {
427 if (!can_replace_source(old_src, new_src))
428 return false;
429
430 return do_replace_source(old_src, new_src);
431 }
432
do_replace_source(PRegister old_src,PVirtualValue new_src)433 bool AluInstr::do_replace_source(PRegister old_src, PVirtualValue new_src)
434 {
435 bool process = false;
436
437 for (unsigned i = 0; i < m_src.size(); ++i) {
438 if (old_src->equal_to(*m_src[i])) {
439 m_src[i] = new_src;
440 process = true;
441 }
442 }
443 if (process) {
444 auto r = new_src->as_register();
445 if (r)
446 r->add_use(this);
447 old_src->del_use(this);
448 }
449
450 return process;
451 }
452
replace_src(int i,PVirtualValue new_src,uint32_t to_set,SourceMod to_clear)453 bool AluInstr::replace_src(int i, PVirtualValue new_src, uint32_t to_set,
454 SourceMod to_clear)
455 {
456 auto old_src = m_src[i]->as_register();
457 assert(old_src);
458
459 if (!can_replace_source(old_src, new_src))
460 return false;
461
462 assert(old_src);
463 old_src->del_use(this);
464
465 m_src[i] = new_src;
466
467 auto r = new_src->as_register();
468 if (r)
469 r->add_use(this);
470
471 m_source_modifiers |= to_set << (2 * i);
472 m_source_modifiers &= ~(to_clear << (2 * i));
473
474 return true;
475 }
476
477
can_replace_source(PRegister old_src,PVirtualValue new_src)478 bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src)
479 {
480 if (!check_readport_validation(old_src, new_src))
481 return false;
482
483 /* If the old or new source is an array element, we assume that there
484 * might have been an (untracked) indirect access, so don't replace
485 * this source */
486 if (old_src->pin() == pin_array && new_src->pin() == pin_array)
487 return false;
488
489 auto [addr, dummy, index] = indirect_addr();
490 auto addr_reg = addr ? addr->as_register() : nullptr;
491 auto index_reg = index ? index->as_register() : nullptr;
492
493 if (auto u = new_src->as_uniform()) {
494 if (u && u->buf_addr()) {
495
496 /* Don't mix indirect buffer and indirect registers, because the
497 * scheduler can't handle it yet. */
498 if (addr_reg)
499 return false;
500
501 /* Don't allow two different index registers, can't deal with that yet */
502 if (index_reg && !index_reg->equal_to(*u->buf_addr()))
503 return false;
504 }
505 }
506
507 if (auto new_addr = new_src->get_addr()) {
508 auto new_addr_reg = new_addr->as_register();
509 bool new_addr_lowered = new_addr_reg &&
510 new_addr_reg->has_flag(Register::addr_or_idx);
511
512 if (addr_reg) {
513 if (!addr_reg->equal_to(*new_addr) || new_addr_lowered ||
514 addr_reg->has_flag(Register::addr_or_idx))
515 return false;
516 }
517 if (m_dest->has_flag(Register::addr_or_idx)) {
518 if (new_src->pin() == pin_array) {
519 auto s = static_cast<const LocalArrayValue *>(new_src)->addr();
520 if (!s->as_inline_const() || !s->as_literal())
521 return false;
522 }
523 }
524 }
525 return true;
526 }
527
528 void
set_sources(SrcValues src)529 AluInstr::set_sources(SrcValues src)
530 {
531 for (auto& s : m_src) {
532 auto r = s->as_register();
533 if (r)
534 r->del_use(this);
535 }
536 m_src.swap(src);
537 for (auto& s : m_src) {
538 auto r = s->as_register();
539 if (r)
540 r->add_use(this);
541 }
542 }
543
allowed_src_chan_mask() const544 uint8_t AluInstr::allowed_src_chan_mask() const
545 {
546 if (m_alu_slots < 2)
547 return 0xf;
548
549 int chan_use_count[4] = {0};
550
551 for (auto s : m_src) {
552 auto r = s->as_register();
553 if (r)
554 ++chan_use_count[r->chan()];
555 }
556 /* Each channel can only be loaded in one of three cycles,
557 * so if a channel is already used three times, we can't
558 * add another source withthis channel.
559 * Since we want to move away from one channel to another, it
560 * is not important to know which is the old channel that will
561 * be freed by the channel switch.*/
562 int mask = 0;
563
564 /* Be conservative about channel use when using more than two
565 * slots. Currently a constellatioon of
566 *
567 * ALU d.x = f(r0.x, r1.y)
568 * ALU _.y = f(r2.y, r3.x)
569 * ALU _.z = f(r4.x, r5.y)
570 *
571 * will fail to be split. To get constellations like this to be scheduled
572 * properly will need some work on the bank swizzle check.
573 */
574 int maxuse = m_alu_slots > 2 ? 2 : 3;
575 for (int i = 0; i < 4; ++i) {
576 if (chan_use_count[i] < maxuse)
577 mask |= 1 << i;
578 }
579 return mask;
580 }
581
582 bool
replace_dest(PRegister new_dest,AluInstr * move_instr)583 AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
584 {
585 if (m_dest->equal_to(*new_dest))
586 return false;
587
588 if (m_dest->uses().size() > 1)
589 return false;
590
591 if (new_dest->pin() == pin_array)
592 return false;
593
594 /* Currently we bail out when an array write should be moved, because
595 * declaring an array write is currently not well defined. The
596 * Whole "backwards" copy propagation should dprobably be replaced by some
597 * forward peep holew optimization */
598 /*
599 if (new_dest->pin() == pin_array) {
600 auto dav = static_cast<const LocalArrayValue *>(new_dest)->addr();
601 for (auto s: m_src) {
602 if (s->pin() == pin_array) {
603 auto sav = static_cast<const LocalArrayValue *>(s)->addr();
604 if (dav && sav && dav->as_register() && !dav->equal_to(*sav))
605 return false;
606 }
607 }
608 }
609 */
610
611 if (m_dest->pin() == pin_chan && new_dest->chan() != m_dest->chan())
612 return false;
613
614 if (m_dest->pin() == pin_chan) {
615 if (new_dest->pin() == pin_group)
616 new_dest->set_pin(pin_chgr);
617 else if (new_dest->pin() != pin_chgr)
618 new_dest->set_pin(pin_chan);
619 }
620
621 m_dest = new_dest;
622 if (!move_instr->has_alu_flag(alu_last_instr))
623 reset_alu_flag(alu_last_instr);
624
625 if (has_alu_flag(alu_is_cayman_trans)) {
626 /* Copy propagation puts an instruction into the w channel, but we
627 * don't have the slots for a w channel */
628 if (m_dest->chan() == 3 && m_alu_slots < 4) {
629 m_alu_slots = 4;
630 assert(m_src.size() == 3);
631 m_src.push_back(m_src[0]);
632 }
633 }
634
635 return true;
636 }
637
638 void
pin_sources_to_chan()639 AluInstr::pin_sources_to_chan()
640 {
641 for (auto s : m_src) {
642 auto r = s->as_register();
643 if (r) {
644 if (r->pin() == pin_free)
645 r->set_pin(pin_chan);
646 else if (r->pin() == pin_group)
647 r->set_pin(pin_chgr);
648 }
649 }
650 }
651
652 bool
check_readport_validation(PRegister old_src,PVirtualValue new_src) const653 AluInstr::check_readport_validation(PRegister old_src, PVirtualValue new_src) const
654 {
655 if (m_src.size() < 3)
656 return true;
657
658 bool success = true;
659 AluReadportReservation rpr_sum;
660
661 unsigned nsrc = alu_ops.at(m_opcode).nsrc;
662 assert(nsrc * m_alu_slots == m_src.size());
663
664 for (int s = 0; s < m_alu_slots && success; ++s) {
665 PVirtualValue src[3];
666 auto ireg = m_src.begin() + s * nsrc;
667
668 for (unsigned i = 0; i < nsrc; ++i, ++ireg)
669 src[i] = old_src->equal_to(**ireg) ? new_src : *ireg;
670
671 AluBankSwizzle bs = alu_vec_012;
672 while (bs != alu_vec_unknown) {
673 AluReadportReservation rpr = rpr_sum;
674 if (rpr.schedule_vec_src(src, nsrc, bs)) {
675 rpr_sum = rpr;
676 break;
677 }
678 ++bs;
679 }
680
681 if (bs == alu_vec_unknown)
682 success = false;
683 }
684 return success;
685 }
686
687 void
add_extra_dependency(PVirtualValue value)688 AluInstr::add_extra_dependency(PVirtualValue value)
689 {
690 auto reg = value->as_register();
691 if (reg)
692 m_extra_dependencies.insert(reg);
693 }
694
695 bool
is_equal_to(const AluInstr & lhs) const696 AluInstr::is_equal_to(const AluInstr& lhs) const
697 {
698 if (lhs.m_opcode != m_opcode || lhs.m_bank_swizzle != m_bank_swizzle ||
699 lhs.m_cf_type != m_cf_type || lhs.m_alu_flags != m_alu_flags) {
700 return false;
701 }
702
703 if (m_dest) {
704 if (!lhs.m_dest) {
705 return false;
706 } else {
707 if (has_alu_flag(alu_write)) {
708 if (!m_dest->equal_to(*lhs.m_dest))
709 return false;
710 } else {
711 if (m_dest->chan() != lhs.m_dest->chan())
712 return false;
713 }
714 }
715 } else {
716 if (lhs.m_dest)
717 return false;
718 }
719
720 if (m_src.size() != lhs.m_src.size())
721 return false;
722
723 for (unsigned i = 0; i < m_src.size(); ++i) {
724 if (!m_src[i]->equal_to(*lhs.m_src[i]))
725 return false;
726 }
727
728 return true;
729 }
730
731 class ResolveIndirectArrayAddr : public ConstRegisterVisitor {
732 public:
visit(const Register & value)733 void visit(const Register& value) { (void)value; }
visit(const LocalArray & value)734 void visit(const LocalArray& value)
735 {
736 (void)value;
737 unreachable("An array can't be used as address");
738 }
739 void visit(const LocalArrayValue& value);
740 void visit(const UniformValue& value);
visit(const LiteralConstant & value)741 void visit(const LiteralConstant& value) { (void)value; }
visit(const InlineConstant & value)742 void visit(const InlineConstant& value) { (void)value; }
743
744 PRegister addr{nullptr};
745 PRegister index{nullptr};
746 bool addr_is_for_dest{false};
747 };
748
749 void
visit(const LocalArrayValue & value)750 ResolveIndirectArrayAddr::visit(const LocalArrayValue& value)
751 {
752 auto a = value.addr();
753 if (a) {
754 addr = a->as_register();
755 assert(!addr_is_for_dest);
756 }
757 }
758
759 void
visit(const UniformValue & value)760 ResolveIndirectArrayAddr::visit(const UniformValue& value)
761 {
762 auto a = value.buf_addr();
763 if (a) {
764 index = a->as_register();
765 }
766 }
767
768 std::tuple<PRegister, bool, PRegister>
indirect_addr() const769 AluInstr::indirect_addr() const
770 {
771 ResolveIndirectArrayAddr visitor;
772
773 if (m_dest) {
774 m_dest->accept(visitor);
775 if (visitor.addr)
776 visitor.addr_is_for_dest = true;
777 }
778
779 for (auto s : m_src) {
780 s->accept(visitor);
781 }
782 return {visitor.addr, visitor.addr_is_for_dest, visitor.index};
783 }
784
785 AluGroup *
split(ValueFactory & vf)786 AluInstr::split(ValueFactory& vf)
787 {
788 if (m_alu_slots == 1)
789 return nullptr;
790
791 sfn_log << SfnLog::instr << "Split " << *this << "\n";
792
793 auto group = new AluGroup();
794
795 m_dest->del_parent(this);
796
797 int start_slot = 0;
798 bool is_dot = m_opcode == op2_dot_ieee;
799 auto last_opcode = m_opcode;
800
801 if (is_dot) {
802 start_slot = m_dest->chan();
803 last_opcode = op2_mul_ieee;
804 }
805
806
807 for (int k = 0; k < m_alu_slots; ++k) {
808 int s = k + start_slot;
809
810 PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
811 if (dst->pin() != pin_chgr) {
812 auto pin = pin_chan;
813 if (dst->pin() == pin_group && m_dest->chan() == s)
814 pin = pin_chgr;
815 dst->set_pin(pin);
816 }
817
818 SrcValues src;
819 int nsrc = alu_ops.at(m_opcode).nsrc;
820 for (int i = 0; i < nsrc; ++i) {
821 auto old_src = m_src[k * nsrc + i];
822 // Make it easy for the scheduler and pin the register to the
823 // channel, otherwise scheduler would have to check whether a
824 // channel switch is possible
825 auto r = old_src->as_register();
826 if (r) {
827 if (r->pin() == pin_free || r->pin() == pin_none)
828 r->set_pin(pin_chan);
829 else if (r->pin() == pin_group)
830 r->set_pin(pin_chgr);
831 }
832 src.push_back(old_src);
833 }
834
835 auto opcode = k < m_alu_slots -1 ? m_opcode : last_opcode;
836
837
838 auto instr = new AluInstr(opcode, dst, src, {}, 1);
839 instr->set_blockid(block_id(), index());
840
841 if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
842 if (has_source_mod(nsrc * k + 0, mod_neg))
843 instr->set_source_mod(0, mod_neg);
844 if (has_source_mod(nsrc * k + 1, mod_neg))
845 instr->set_source_mod(1, mod_neg);
846 if (has_source_mod(nsrc * k + 2, mod_neg))
847 instr->set_source_mod(2, mod_neg);
848 if (has_source_mod(nsrc * k + 0, mod_abs))
849 instr->set_source_mod(0, mod_abs);
850 if (has_source_mod(nsrc * k + 1, mod_abs))
851 instr->set_source_mod(1, mod_abs);
852 }
853 if (has_alu_flag(alu_dst_clamp))
854 instr->set_alu_flag(alu_dst_clamp);
855
856 if (s == m_dest->chan())
857 instr->set_alu_flag(alu_write);
858
859 m_dest->add_parent(instr);
860 sfn_log << SfnLog::instr << " " << *instr << "\n";
861
862 if (!group->add_instruction(instr)) {
863 std::cerr << "Unable to schedule '" << *instr << "' into\n" << *group << "\n";
864
865 unreachable("Invalid group instruction");
866 }
867 }
868 group->set_blockid(block_id(), index());
869
870 for (auto s : m_src) {
871 auto r = s->as_register();
872 if (r) {
873 r->del_use(this);
874 }
875 }
876 group->set_origin(this);
877
878 return group;
879 }
880
881 /* Alu instructions that have SSA dest registers increase the regietsr
882 * pressure Alu instructions that read from SSA registers may decresase the
883 * register pressure hency evaluate a priorityx values based on register
884 * pressure change */
885 int
register_priority() const886 AluInstr::register_priority() const
887 {
888 int priority = 0;
889 if (!has_alu_flag(alu_no_schedule_bias)) {
890
891 if (m_dest) {
892 if (m_dest->has_flag(Register::ssa) && has_alu_flag(alu_write)) {
893 if (m_dest->pin() != pin_group && m_dest->pin() != pin_chgr &&
894 !m_dest->addr())
895 priority--;
896 } else {
897 // Arrays and registers are pre-allocated, hence scheduling
898 // assignments early is unlikely to increase register pressure
899 priority++;
900 }
901 }
902
903 for (const auto s : m_src) {
904 auto r = s->as_register();
905 if (r) {
906 if (r->has_flag(Register::ssa)) {
907 int pending = 0;
908 for (auto b : r->uses()) {
909 if (!b->is_scheduled())
910 ++pending;
911 }
912 if (pending == 1)
913 ++priority;
914 }
915 if (r->addr() && r->addr()->as_register())
916 priority += 2;
917 }
918 if (s->as_uniform())
919 ++priority;
920 }
921 }
922 return priority;
923 }
924
925 bool
propagate_death()926 AluInstr::propagate_death()
927 {
928 if (!m_dest)
929 return true;
930
931 if (m_dest->pin() == pin_group || m_dest->pin() == pin_chan) {
932 switch (m_opcode) {
933 case op2_interp_x:
934 case op2_interp_xy:
935 case op2_interp_z:
936 case op2_interp_zw:
937 reset_alu_flag(alu_write);
938 return false;
939 default:;
940 }
941 }
942
943 if (m_dest->pin() == pin_array)
944 return false;
945
946 /* We assume that nir does a good job in eliminating all ALU results that
947 * are not needed, and we don't let copy propagation doesn't make the
948 * instruction obsolete, so just keep all */
949 if (has_alu_flag(alu_is_cayman_trans))
950 return false;
951
952 for (auto& src : m_src) {
953 auto reg = src->as_register();
954 if (reg)
955 reg->del_use(this);
956 }
957 return true;
958 }
959
960 bool
has_lds_access() const961 AluInstr::has_lds_access() const
962 {
963 return has_alu_flag(alu_is_lds) || has_lds_queue_read();
964 }
965
966 bool
has_lds_queue_read() const967 AluInstr::has_lds_queue_read() const
968 {
969 for (auto& s : m_src) {
970 auto ic = s->as_inline_const();
971 if (!ic)
972 continue;
973
974 if (ic->sel() == ALU_SRC_LDS_OQ_A_POP || ic->sel() == ALU_SRC_LDS_OQ_B_POP)
975 return true;
976 }
977 return false;
978 }
979
980 struct OpDescr {
981 union {
982 EAluOp alu_opcode;
983 ESDOp lds_opcode;
984 };
985 int nsrc;
986 };
987
988 static std::map<std::string, OpDescr> s_alu_map_by_name;
989 static std::map<std::string, OpDescr> s_lds_map_by_name;
990
991 Instr::Pointer
from_string(istream & is,ValueFactory & value_factory,AluGroup * group,bool is_cayman)992 AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group, bool is_cayman)
993 {
994 vector<string> tokens;
995
996 while (is.good() && !is.eof()) {
997 string t;
998 is >> t;
999 if (t.length() > 0) {
1000 tokens.push_back(t);
1001 }
1002 }
1003
1004 std::set<AluModifiers> flags;
1005 auto t = tokens.begin();
1006
1007 bool is_lds = false;
1008
1009 if (*t == "LDS") {
1010 is_lds = true;
1011 t++;
1012 }
1013
1014 string opstr = *t++;
1015 string deststr = *t++;
1016
1017 if (deststr == "CLAMP") {
1018 flags.insert(alu_dst_clamp);
1019 deststr = *t++;
1020 }
1021
1022 assert(*t == ":");
1023 OpDescr op_descr = {{op_invalid}, -1};
1024
1025 if (is_lds) {
1026 auto op = s_lds_map_by_name.find(opstr);
1027 if (op == s_lds_map_by_name.end()) {
1028 for (auto [opcode, opdescr] : lds_ops) {
1029 if (opstr == opdescr.name) {
1030 op_descr.lds_opcode = opcode;
1031 op_descr.nsrc = opdescr.nsrc;
1032 s_alu_map_by_name[opstr] = op_descr;
1033 break;
1034 }
1035 }
1036
1037 if (op_descr.nsrc == -1) {
1038 std::cerr << "'" << opstr << "'";
1039 unreachable("Unknown opcode");
1040 return nullptr;
1041 }
1042 } else {
1043 op_descr = op->second;
1044 }
1045 } else {
1046 auto op = s_alu_map_by_name.find(opstr);
1047 if (op == s_alu_map_by_name.end()) {
1048 for (auto [opcode, opdescr] : alu_ops) {
1049 if (opstr == opdescr.name) {
1050 op_descr = {{opcode}, opdescr.nsrc};
1051 s_alu_map_by_name[opstr] = op_descr;
1052 break;
1053 }
1054 }
1055
1056 if (op_descr.nsrc == -1) {
1057 std::cerr << "'" << opstr << "'";
1058 unreachable("Unknown opcode");
1059 return nullptr;
1060 }
1061 } else {
1062 op_descr = op->second;
1063 }
1064 if (is_cayman) {
1065 switch (op_descr.alu_opcode) {
1066 case op1_cos:
1067 case op1_exp_ieee:
1068 case op1_log_clamped:
1069 case op1_recip_ieee:
1070 case op1_recipsqrt_ieee1:
1071 case op1_sqrt_ieee:
1072 case op1_sin:
1073 case op2_mullo_int:
1074 case op2_mulhi_int:
1075 case op2_mulhi_uint:
1076 flags.insert(alu_is_cayman_trans);
1077 default:
1078 ;
1079 }
1080 }
1081 }
1082
1083 int slots = 0;
1084
1085 uint32_t src_mods = 0;
1086 SrcValues sources;
1087 do {
1088 ++t;
1089 for (int i = 0; i < op_descr.nsrc; ++i) {
1090 string srcstr = *t++;
1091
1092 if (srcstr[0] == '-') {
1093 src_mods |= AluInstr::mod_neg << (2 * sources.size());
1094 srcstr = srcstr.substr(1);
1095 }
1096
1097 if (srcstr[0] == '|') {
1098 assert(srcstr[srcstr.length() - 1] == '|');
1099 src_mods |= AluInstr::mod_abs << (2 * sources.size());
1100 srcstr = srcstr.substr(1, srcstr.length() - 2);
1101 }
1102
1103 auto src = value_factory.src_from_string(srcstr);
1104 if (!src) {
1105 std::cerr << "Unable to create src[" << i << "] from " << srcstr << "\n";
1106 assert(src);
1107 }
1108 sources.push_back(src);
1109 }
1110 ++slots;
1111 } while (t != tokens.end() && *t == "+");
1112
1113 AluBankSwizzle bank_swizzle = alu_vec_unknown;
1114 ECFAluOpCode cf = cf_alu;
1115
1116 while (t != tokens.end()) {
1117
1118 switch ((*t)[0]) {
1119 case '{': {
1120 auto iflag = t->begin() + 1;
1121 while (iflag != t->end()) {
1122 if (*iflag == '}')
1123 break;
1124
1125 switch (*iflag) {
1126 case 'L':
1127 flags.insert(alu_last_instr);
1128 break;
1129 case 'W':
1130 flags.insert(alu_write);
1131 break;
1132 case 'E':
1133 flags.insert(alu_update_exec);
1134 break;
1135 case 'P':
1136 flags.insert(alu_update_pred);
1137 break;
1138 }
1139 ++iflag;
1140 }
1141 } break;
1142
1143 case 'V': {
1144 string bs = *t;
1145 if (bs == "VEC_012")
1146 bank_swizzle = alu_vec_012;
1147 else if (bs == "VEC_021")
1148 bank_swizzle = alu_vec_021;
1149 else if (bs == "VEC_102")
1150 bank_swizzle = alu_vec_102;
1151 else if (bs == "VEC_120")
1152 bank_swizzle = alu_vec_120;
1153 else if (bs == "VEC_201")
1154 bank_swizzle = alu_vec_201;
1155 else if (bs == "VEC_210")
1156 bank_swizzle = alu_vec_210;
1157 else {
1158 std::cerr << "'" << bs << "': ";
1159 unreachable("Unknowe bankswizzle given");
1160 }
1161 } break;
1162
1163 default: {
1164 string cf_str = *t;
1165 if (cf_str == "PUSH_BEFORE")
1166 cf = cf_alu_push_before;
1167 else if (cf_str == "POP_AFTER")
1168 cf = cf_alu_pop_after;
1169 else if (cf_str == "POP2_AFTER")
1170 cf = cf_alu_pop2_after;
1171 else if (cf_str == "EXTENDED")
1172 cf = cf_alu_extended;
1173 else if (cf_str == "BREAK")
1174 cf = cf_alu_break;
1175 else if (cf_str == "CONT")
1176 cf = cf_alu_continue;
1177 else if (cf_str == "ELSE_AFTER")
1178 cf = cf_alu_else_after;
1179 else {
1180 std::cerr << " '" << cf_str << "' ";
1181 unreachable("Unknown tocken in ALU instruction");
1182 }
1183 }
1184 }
1185 ++t;
1186 }
1187
1188 PRegister dest = nullptr;
1189 // construct instruction
1190 if (deststr != "(null)")
1191 dest = value_factory.dest_from_string(deststr);
1192
1193 AluInstr *retval = nullptr;
1194 if (is_lds)
1195 retval = new AluInstr(op_descr.lds_opcode, sources, flags);
1196 else
1197 retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots);
1198
1199 retval->m_source_modifiers = src_mods;
1200 retval->set_bank_swizzle(bank_swizzle);
1201 retval->set_cf_type(cf);
1202 if (group) {
1203 group->add_instruction(retval);
1204 retval = nullptr;
1205 }
1206 return retval;
1207 }
1208
1209 bool
do_ready() const1210 AluInstr::do_ready() const
1211 {
1212 /* Alu instructions are shuffled by the scheduler, so
1213 * we have to make sure that required ops are already
1214 * scheduled before marking this one ready */
1215 for (auto i : required_instr()) {
1216 if (i->is_dead())
1217 continue;
1218
1219 bool is_older_instr = i->block_id() <= block_id() &&
1220 i->index() < index();
1221 bool is_lds = i->as_alu() && i->as_alu()->has_lds_access();
1222 if (!i->is_scheduled() && (is_older_instr || is_lds))
1223 return false;
1224 }
1225
1226 for (auto s : m_src) {
1227 auto r = s->as_register();
1228 if (r) {
1229 if (!r->ready(block_id(), index()))
1230 return false;
1231 }
1232 auto u = s->as_uniform();
1233 if (u && u->buf_addr() && u->buf_addr()->as_register()) {
1234 if (!u->buf_addr()->as_register()->ready(block_id(), index()))
1235 return false;
1236 }
1237 }
1238
1239 if (m_dest && !m_dest->has_flag(Register::ssa)) {
1240 if (m_dest->pin() == pin_array) {
1241 auto av = static_cast<const LocalArrayValue *>(m_dest);
1242 auto addr = av->addr();
1243 /* For true indiect dest access we have to make sure that all
1244 * instructions that write the value before are schedukled */
1245 if (addr && (!addr->ready(block_id(), index()) ||
1246 !m_dest->ready(block_id(), index() - 1)))
1247 return false;
1248 }
1249
1250 /* If a register is updates, we have to make sure that uses before that
1251 * update are scheduled, otherwise we may use the updated value when we
1252 * shouldn't */
1253 for (auto u : m_dest->uses()) {
1254 /* TODO: This is working around some sloppy use updates, dead instrzuctions
1255 * should remove themselves from uses. */
1256 if (u->is_dead())
1257 continue;
1258 if (!u->is_scheduled() &&
1259 u->block_id() <= block_id() &&
1260 u->index() < index()) {
1261 return false;
1262 }
1263 }
1264 }
1265
1266 for (auto& r : m_extra_dependencies) {
1267 if (!r->ready(block_id(), index()))
1268 return false;
1269 }
1270
1271 return true;
1272 }
1273
1274 void
visit(AluGroup * instr)1275 AluInstrVisitor::visit(AluGroup *instr)
1276 {
1277 for (auto& i : *instr) {
1278 if (i)
1279 i->accept(*this);
1280 }
1281 }
1282
1283 void
visit(Block * instr)1284 AluInstrVisitor::visit(Block *instr)
1285 {
1286 for (auto& i : *instr)
1287 i->accept(*this);
1288 }
1289
1290 void
visit(IfInstr * instr)1291 AluInstrVisitor::visit(IfInstr *instr)
1292 {
1293 instr->predicate()->accept(*this);
1294 }
1295
is_kill() const1296 bool AluInstr::is_kill() const
1297 {
1298 if (has_alu_flag(alu_is_lds))
1299 return false;
1300
1301 switch (m_opcode) {
1302 case op2_kille:
1303 case op2_kille_int:
1304 case op2_killne:
1305 case op2_killne_int:
1306 case op2_killge:
1307 case op2_killge_int:
1308 case op2_killge_uint:
1309 case op2_killgt:
1310 case op2_killgt_int:
1311 case op2_killgt_uint:
1312 return true;
1313 default:
1314 return false;
1315 }
1316 }
1317
1318 enum AluMods {
1319 mod_none,
1320 mod_src0_abs,
1321 mod_src0_neg,
1322 mod_dest_clamp,
1323 };
1324
1325 static bool
1326 emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
1327
1328
1329
1330 static bool
1331 emit_alu_op1(const nir_alu_instr& alu,
1332 EAluOp opcode,
1333 Shader& shader,
1334 AluMods mod = mod_none);
1335 static bool
1336 emit_alu_op1_64bit(const nir_alu_instr& alu,
1337 EAluOp opcode,
1338 Shader& shader,
1339 bool switch_chan);
1340 static bool
1341 emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader);
1342 static bool
1343 emit_alu_neg(const nir_alu_instr& alu, Shader& shader);
1344 static bool
1345 emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1346 static bool
1347 emit_alu_op2_64bit(const nir_alu_instr& alu,
1348 EAluOp opcode,
1349 Shader& shader,
1350 bool switch_order);
1351 static bool
1352 emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
1353 EAluOp opcode,
1354 Shader& shader,
1355 bool switch_order);
1356 static bool
1357 emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1358 static bool
1359 emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader);
1360 static bool
1361 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
1362 static bool
1363 emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
1364 static bool
1365 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
1366 static bool
1367 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
1368 static bool
1369 emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader);
1370
1371 static bool
1372 emit_alu_op2(const nir_alu_instr& alu,
1373 EAluOp opcode,
1374 Shader& shader,
1375 AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1376 static bool
1377 emit_alu_op2_int(const nir_alu_instr& alu,
1378 EAluOp opcode,
1379 Shader& shader,
1380 AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1381 static bool
1382 emit_alu_op3(const nir_alu_instr& alu,
1383 EAluOp opcode,
1384 Shader& shader,
1385 const std::array<int, 3>& src_shuffle = {0, 1, 2});
1386 static bool
1387 emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1388 static bool
1389 emit_any_all_fcomp(
1390 const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1391 static bool
1392 emit_any_all_icomp(
1393 const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1394
1395 static bool
1396 emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1397 static bool
1398 emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader);
1399 static bool
1400 emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1401 static bool
1402 emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1403 static bool
1404 emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader);
1405 static bool
1406 emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader);
1407 static bool
1408 emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader);
1409
1410 static bool
1411 emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader);
1412 static bool
1413 emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader);
1414
1415 static bool
1416 emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
1417 static bool
1418 emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader);
1419 static bool
1420 emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
1421
1422 static bool
1423 emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1424 static bool
1425 emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1426
1427 static bool
1428 emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1429 static bool
1430 emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1431
1432 static bool
1433 emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1434
1435 static bool
1436 emit_alu_cube(const nir_alu_instr& alu, Shader& shader);
1437
1438 static bool
1439 emit_fdph(const nir_alu_instr& alu, Shader& shader);
1440
1441 static bool
check_64_bit_op_src(nir_src * src,void * state)1442 check_64_bit_op_src(nir_src *src, void *state)
1443 {
1444 if (nir_src_bit_size(*src) == 64) {
1445 *(bool *)state = true;
1446 return false;
1447 }
1448 return true;
1449 }
1450
1451 static bool
check_64_bit_op_def(nir_def * def,void * state)1452 check_64_bit_op_def(nir_def *def, void *state)
1453 {
1454 if (def->bit_size == 64) {
1455 *(bool *)state = true;
1456 return false;
1457 }
1458 return true;
1459 }
1460
1461 bool
from_nir(nir_alu_instr * alu,Shader & shader)1462 AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
1463 {
1464 bool is_64bit_op = false;
1465 nir_foreach_src(&alu->instr, check_64_bit_op_src, &is_64bit_op);
1466 if (!is_64bit_op)
1467 nir_foreach_def(&alu->instr, check_64_bit_op_def, &is_64bit_op);
1468
1469 if (is_64bit_op) {
1470 switch (alu->op) {
1471 case nir_op_pack_64_2x32:
1472 case nir_op_unpack_64_2x32:
1473 case nir_op_pack_64_2x32_split:
1474 case nir_op_pack_half_2x16_split:
1475 case nir_op_unpack_64_2x32_split_x:
1476 case nir_op_unpack_64_2x32_split_y:
1477 break;
1478 case nir_op_mov:
1479 return emit_alu_mov_64bit(*alu, shader);
1480 case nir_op_fneg:
1481 return emit_alu_neg(*alu, shader);
1482 case nir_op_fsat:
1483 return emit_alu_fsat64(*alu, shader);
1484 case nir_op_ffract:
1485 return emit_alu_op1_64bit(*alu, op1_fract_64, shader, true);
1486 case nir_op_feq32:
1487 return emit_alu_op2_64bit_one_dst(*alu, op2_sete_64, shader, false);
1488 case nir_op_fge32:
1489 return emit_alu_op2_64bit_one_dst(*alu, op2_setge_64, shader, false);
1490 case nir_op_flt32:
1491 return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
1492 case nir_op_fneu32:
1493 return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
1494 case nir_op_ffma:
1495 return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
1496
1497 case nir_op_fadd:
1498 return emit_alu_op2_64bit(*alu, op2_add_64, shader, false);
1499 case nir_op_fmul:
1500 return emit_alu_op2_64bit(*alu, op2_mul_64, shader, false);
1501 case nir_op_fmax:
1502 return emit_alu_op2_64bit(*alu, op2_max_64, shader, false);
1503 case nir_op_fmin:
1504 return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
1505 case nir_op_b2f64:
1506 return emit_alu_b2f64(*alu, shader);
1507 case nir_op_f2f64:
1508 return emit_alu_f2f64(*alu, shader);
1509 case nir_op_i2f64:
1510 return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
1511 case nir_op_u2f64:
1512 return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
1513 case nir_op_f2f32:
1514 return emit_alu_f2f32(*alu, shader);
1515 case nir_op_fabs:
1516 return emit_alu_abs64(*alu, shader);
1517 case nir_op_fsqrt:
1518 return emit_alu_op1_64bit_trans(*alu, op1_sqrt_64, shader);
1519 case nir_op_frcp:
1520 return emit_alu_op1_64bit_trans(*alu, op1_recip_64, shader);
1521 case nir_op_frsq:
1522 return emit_alu_op1_64bit_trans(*alu, op1_recipsqrt_64, shader);
1523 case nir_op_vec2:
1524 return emit_alu_vec2_64(*alu, shader);
1525 default:
1526 return false;
1527 ;
1528 }
1529 }
1530
1531 if (shader.chip_class() == ISA_CC_CAYMAN) {
1532 switch (alu->op) {
1533 case nir_op_fcos_amd:
1534 return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
1535 case nir_op_fexp2:
1536 return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
1537 case nir_op_flog2:
1538 return emit_alu_trans_op1_cayman(*alu, op1_log_clamped, shader);
1539 case nir_op_frcp:
1540 return emit_alu_trans_op1_cayman(*alu, op1_recip_ieee, shader);
1541 case nir_op_frsq:
1542 return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader);
1543 case nir_op_fsqrt:
1544 return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
1545 case nir_op_fsin_amd:
1546 return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
1547 case nir_op_i2f32:
1548 return emit_alu_op1(*alu, op1_int_to_flt, shader);
1549 case nir_op_u2f32:
1550 return emit_alu_op1(*alu, op1_uint_to_flt, shader);
1551 case nir_op_imul:
1552 return emit_alu_trans_op2_cayman(*alu, op2_mullo_int, shader);
1553 case nir_op_imul_high:
1554 return emit_alu_trans_op2_cayman(*alu, op2_mulhi_int, shader);
1555 case nir_op_umul_high:
1556 return emit_alu_trans_op2_cayman(*alu, op2_mulhi_uint, shader);
1557 case nir_op_f2u32:
1558 return emit_alu_op1(*alu, op1_flt_to_uint, shader);
1559 case nir_op_f2i32:
1560 return emit_alu_op1(*alu, op1_flt_to_int, shader);
1561 case nir_op_ishl:
1562 return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1563 case nir_op_ishr:
1564 return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1565 case nir_op_ushr:
1566 return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1567 default:;
1568 }
1569 } else {
1570 if (shader.chip_class() == ISA_CC_EVERGREEN) {
1571 switch (alu->op) {
1572 case nir_op_f2i32:
1573 return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_int, shader);
1574 case nir_op_f2u32:
1575 return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_uint, shader);
1576 default:;
1577 }
1578 }
1579
1580 if (shader.chip_class() >= ISA_CC_R700) {
1581 switch (alu->op) {
1582 case nir_op_ishl:
1583 return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1584 case nir_op_ishr:
1585 return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1586 case nir_op_ushr:
1587 return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1588 default:;
1589 }
1590 } else {
1591 switch (alu->op) {
1592 case nir_op_ishl:
1593 return emit_alu_trans_op2_eg(*alu, op2_lshl_int, shader);
1594 case nir_op_ishr:
1595 return emit_alu_trans_op2_eg(*alu, op2_ashr_int, shader);
1596 case nir_op_ushr:
1597 return emit_alu_trans_op2_eg(*alu, op2_lshr_int, shader);
1598 default:;
1599 }
1600 }
1601
1602 switch (alu->op) {
1603 case nir_op_f2i32:
1604 return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
1605 case nir_op_f2u32:
1606 return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
1607 case nir_op_fcos_amd:
1608 return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
1609 case nir_op_fexp2:
1610 return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
1611 case nir_op_flog2:
1612 return emit_alu_trans_op1_eg(*alu, op1_log_clamped, shader);
1613 case nir_op_frcp:
1614 return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
1615 case nir_op_frsq:
1616 return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
1617 case nir_op_fsin_amd:
1618 return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
1619 case nir_op_fsqrt:
1620 return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
1621 case nir_op_i2f32:
1622 return emit_alu_trans_op1_eg(*alu, op1_int_to_flt, shader);
1623 case nir_op_u2f32:
1624 return emit_alu_trans_op1_eg(*alu, op1_uint_to_flt, shader);
1625 case nir_op_imul:
1626 return emit_alu_trans_op2_eg(*alu, op2_mullo_int, shader);
1627 case nir_op_imul_high:
1628 return emit_alu_trans_op2_eg(*alu, op2_mulhi_int, shader);
1629 case nir_op_umul_high:
1630 return emit_alu_trans_op2_eg(*alu, op2_mulhi_uint, shader);
1631 default:;
1632 }
1633 }
1634
1635 switch (alu->op) {
1636 case nir_op_b2b1:
1637 return emit_alu_op1(*alu, op1_mov, shader);
1638 case nir_op_b2b32:
1639 return emit_alu_op1(*alu, op1_mov, shader);
1640 case nir_op_b2f32:
1641 return emit_alu_b2x(*alu, ALU_SRC_1, shader);
1642 case nir_op_b2i32:
1643 return emit_alu_b2x(*alu, ALU_SRC_1_INT, shader);
1644
1645 case nir_op_bfm:
1646 return emit_alu_op2_int(*alu, op2_bfm_int, shader, op2_opt_none);
1647 case nir_op_bit_count:
1648 return emit_alu_op1(*alu, op1_bcnt_int, shader);
1649
1650 case nir_op_bitfield_reverse:
1651 return emit_alu_op1(*alu, op1_bfrev_int, shader);
1652 case nir_op_bitfield_select:
1653 return emit_alu_op3(*alu, op3_bfi_int, shader);
1654
1655 case nir_op_b32all_fequal2:
1656 return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader);
1657 case nir_op_b32all_fequal3:
1658 return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader);
1659 case nir_op_b32all_fequal4:
1660 return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader);
1661 case nir_op_b32all_iequal2:
1662 return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader);
1663 case nir_op_b32all_iequal3:
1664 return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader);
1665 case nir_op_b32all_iequal4:
1666 return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader);
1667 case nir_op_b32any_fnequal2:
1668 return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader);
1669 case nir_op_b32any_fnequal3:
1670 return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader);
1671 case nir_op_b32any_fnequal4:
1672 return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader);
1673 case nir_op_b32any_inequal2:
1674 return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader);
1675 case nir_op_b32any_inequal3:
1676 return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader);
1677 case nir_op_b32any_inequal4:
1678 return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader);
1679 case nir_op_b32csel:
1680 return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
1681
1682 case nir_op_fabs:
1683 return emit_alu_op1(*alu, op1_mov, shader, mod_src0_abs);
1684 case nir_op_fadd:
1685 return emit_alu_op2(*alu, op2_add, shader);
1686 case nir_op_fceil:
1687 return emit_alu_op1(*alu, op1_ceil, shader);
1688 case nir_op_fcsel:
1689 return emit_alu_op3(*alu, op3_cnde, shader, {0, 2, 1});
1690 case nir_op_fcsel_ge:
1691 return emit_alu_op3(*alu, op3_cndge, shader, {0, 1, 2});
1692 case nir_op_fcsel_gt:
1693 return emit_alu_op3(*alu, op3_cndgt, shader, {0, 1, 2});
1694
1695 case nir_op_fdph:
1696 return emit_fdph(*alu, shader);
1697 case nir_op_fdot2:
1698 if (shader.chip_class() >= ISA_CC_EVERGREEN)
1699 return emit_dot(*alu, 2, shader);
1700 else
1701 return emit_dot4(*alu, 2, shader);
1702 case nir_op_fdot3:
1703 if (shader.chip_class() >= ISA_CC_EVERGREEN)
1704 return emit_dot(*alu, 3, shader);
1705 else
1706 return emit_dot4(*alu, 3, shader);
1707 case nir_op_fdot4:
1708 return emit_dot4(*alu, 4, shader);
1709
1710 case nir_op_feq32:
1711 case nir_op_feq:
1712 return emit_alu_op2(*alu, op2_sete_dx10, shader);
1713 case nir_op_ffloor:
1714 return emit_alu_op1(*alu, op1_floor, shader);
1715 case nir_op_ffract:
1716 return emit_alu_op1(*alu, op1_fract, shader);
1717 case nir_op_fge32:
1718 return emit_alu_op2(*alu, op2_setge_dx10, shader);
1719 case nir_op_fge:
1720 return emit_alu_op2(*alu, op2_setge_dx10, shader);
1721 case nir_op_find_lsb:
1722 return emit_alu_op1(*alu, op1_ffbl_int, shader);
1723
1724 case nir_op_flt32:
1725 return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1726 case nir_op_flt:
1727 return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1728 case nir_op_fmax:
1729 return emit_alu_op2(*alu, op2_max_dx10, shader);
1730 case nir_op_fmin:
1731 return emit_alu_op2(*alu, op2_min_dx10, shader);
1732
1733 case nir_op_fmul:
1734 if (!shader.has_flag(Shader::sh_legacy_math_rules))
1735 return emit_alu_op2(*alu, op2_mul_ieee, shader);
1736 FALLTHROUGH;
1737 case nir_op_fmulz:
1738 return emit_alu_op2(*alu, op2_mul, shader);
1739
1740 case nir_op_fneg:
1741 return emit_alu_op1(*alu, op1_mov, shader, mod_src0_neg);
1742 case nir_op_fneu32:
1743 return emit_alu_op2(*alu, op2_setne_dx10, shader);
1744 case nir_op_fneu:
1745 return emit_alu_op2(*alu, op2_setne_dx10, shader);
1746
1747 case nir_op_fround_even:
1748 return emit_alu_op1(*alu, op1_rndne, shader);
1749 case nir_op_fsat:
1750 return emit_alu_op1(*alu, op1_mov, shader, mod_dest_clamp);
1751 case nir_op_fsub:
1752 return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
1753 case nir_op_ftrunc:
1754 return emit_alu_op1(*alu, op1_trunc, shader);
1755 case nir_op_iadd:
1756 return emit_alu_op2_int(*alu, op2_add_int, shader);
1757 case nir_op_iand:
1758 return emit_alu_op2_int(*alu, op2_and_int, shader);
1759 case nir_op_ibfe:
1760 return emit_alu_op3(*alu, op3_bfe_int, shader);
1761 case nir_op_i32csel_ge:
1762 return emit_alu_op3(*alu, op3_cndge_int, shader, {0, 1, 2});
1763 case nir_op_i32csel_gt:
1764 return emit_alu_op3(*alu, op3_cndgt_int, shader, {0, 1, 2});
1765 case nir_op_ieq32:
1766 return emit_alu_op2_int(*alu, op2_sete_int, shader);
1767 case nir_op_ieq:
1768 return emit_alu_op2_int(*alu, op2_sete_int, shader);
1769 case nir_op_ifind_msb_rev:
1770 return emit_alu_op1(*alu, op1_ffbh_int, shader);
1771 case nir_op_ige32:
1772 return emit_alu_op2_int(*alu, op2_setge_int, shader);
1773 case nir_op_ige:
1774 return emit_alu_op2_int(*alu, op2_setge_int, shader);
1775 case nir_op_ilt32:
1776 return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1777 case nir_op_ilt:
1778 return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1779 case nir_op_imax:
1780 return emit_alu_op2_int(*alu, op2_max_int, shader);
1781 case nir_op_imin:
1782 return emit_alu_op2_int(*alu, op2_min_int, shader);
1783 case nir_op_ine32:
1784 return emit_alu_op2_int(*alu, op2_setne_int, shader);
1785 case nir_op_ine:
1786 return emit_alu_op2_int(*alu, op2_setne_int, shader);
1787 case nir_op_ineg:
1788 return emit_alu_comb_with_zero(*alu, op2_sub_int, shader);
1789 case nir_op_inot:
1790 return emit_alu_op1(*alu, op1_not_int, shader);
1791 case nir_op_ior:
1792 return emit_alu_op2_int(*alu, op2_or_int, shader);
1793 case nir_op_isub:
1794 return emit_alu_op2_int(*alu, op2_sub_int, shader);
1795 case nir_op_ixor:
1796 return emit_alu_op2_int(*alu, op2_xor_int, shader);
1797 case nir_op_pack_64_2x32:
1798 return emit_pack_64_2x32(*alu, shader);
1799 case nir_op_unpack_64_2x32:
1800 return emit_unpack_64_2x32(*alu, shader);
1801 case nir_op_pack_64_2x32_split:
1802 return emit_pack_64_2x32_split(*alu, shader);
1803 case nir_op_pack_half_2x16_split:
1804 return emit_pack_32_2x16_split(*alu, shader);
1805 case nir_op_slt:
1806 return emit_alu_op2(*alu, op2_setgt, shader, op2_opt_reverse);
1807 case nir_op_sge:
1808 return emit_alu_op2(*alu, op2_setge, shader);
1809 case nir_op_seq:
1810 return emit_alu_op2(*alu, op2_sete, shader);
1811 case nir_op_sne:
1812 return emit_alu_op2(*alu, op2_setne, shader);
1813 case nir_op_ubfe:
1814 return emit_alu_op3(*alu, op3_bfe_uint, shader);
1815 case nir_op_ufind_msb_rev:
1816 return emit_alu_op1(*alu, op1_ffbh_uint, shader);
1817 case nir_op_uge32:
1818 return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1819 case nir_op_uge:
1820 return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1821 case nir_op_ult32:
1822 return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1823 case nir_op_ult:
1824 return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1825 case nir_op_umad24:
1826 return emit_alu_op3(*alu, op3_muladd_uint24, shader, {0, 1, 2});
1827 case nir_op_umax:
1828 return emit_alu_op2_int(*alu, op2_max_uint, shader);
1829 case nir_op_umin:
1830 return emit_alu_op2_int(*alu, op2_min_uint, shader);
1831 case nir_op_umul24:
1832 return emit_alu_op2(*alu, op2_mul_uint24, shader);
1833 case nir_op_unpack_64_2x32_split_x:
1834 return emit_unpack_64_2x32_split(*alu, 0, shader);
1835 case nir_op_unpack_64_2x32_split_y:
1836 return emit_unpack_64_2x32_split(*alu, 1, shader);
1837 case nir_op_unpack_half_2x16_split_x:
1838 return emit_unpack_32_2x16_split_x(*alu, shader);
1839 case nir_op_unpack_half_2x16_split_y:
1840 return emit_unpack_32_2x16_split_y(*alu, shader);
1841
1842 case nir_op_ffma:
1843 if (!shader.has_flag(Shader::sh_legacy_math_rules))
1844 return emit_alu_op3(*alu, op3_muladd_ieee, shader);
1845 FALLTHROUGH;
1846 case nir_op_ffmaz:
1847 return emit_alu_op3(*alu, op3_muladd, shader);
1848
1849 case nir_op_mov:
1850 return emit_alu_op1(*alu, op1_mov, shader);
1851 case nir_op_f2i32:
1852 return emit_alu_op1(*alu, op1_flt_to_int, shader);
1853 case nir_op_vec2:
1854 return emit_create_vec(*alu, 2, shader);
1855 case nir_op_vec3:
1856 return emit_create_vec(*alu, 3, shader);
1857 case nir_op_vec4:
1858 return emit_create_vec(*alu, 4, shader);
1859
1860 case nir_op_cube_amd:
1861 return emit_alu_cube(*alu, shader);
1862 default:
1863 fprintf(stderr, "Unknown instruction '");
1864 nir_print_instr(&alu->instr, stderr);
1865 fprintf(stderr, "'\n");
1866 assert(0);
1867 return false;
1868 }
1869 }
1870
1871 static Pin
pin_for_components(const nir_alu_instr & alu)1872 pin_for_components(const nir_alu_instr& alu)
1873 {
1874 return alu.def.num_components == 1 ? pin_free : pin_none;
1875 }
1876
1877 static bool
emit_alu_op1_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_chan)1878 emit_alu_op1_64bit(const nir_alu_instr& alu,
1879 EAluOp opcode,
1880 Shader& shader,
1881 bool switch_chan)
1882 {
1883 auto& value_factory = shader.value_factory();
1884
1885 auto group = new AluGroup();
1886
1887 AluInstr *ir = nullptr;
1888
1889 int swz[2] = {0, 1};
1890 if (switch_chan) {
1891 swz[0] = 1;
1892 swz[1] = 0;
1893 }
1894
1895 for (unsigned i = 0; i < alu.def.num_components; ++i) {
1896 ir = new AluInstr(opcode,
1897 value_factory.dest(alu.def, 2 * i, pin_chan),
1898 value_factory.src64(alu.src[0], i, swz[0]),
1899 {alu_write});
1900 group->add_instruction(ir);
1901
1902 ir = new AluInstr(opcode,
1903 value_factory.dest(alu.def, 2 * i + 1, pin_chan),
1904 value_factory.src64(alu.src[0], i, swz[1]),
1905 {alu_write});
1906 group->add_instruction(ir);
1907 }
1908 if (ir)
1909 ir->set_alu_flag(alu_last_instr);
1910 shader.emit_instruction(group);
1911 return true;
1912 }
1913
1914 static bool
emit_alu_mov_64bit(const nir_alu_instr & alu,Shader & shader)1915 emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader)
1916 {
1917 auto& value_factory = shader.value_factory();
1918
1919 AluInstr *ir = nullptr;
1920
1921 for (unsigned i = 0; i < alu.def.num_components; ++i) {
1922 for (unsigned c = 0; c < 2; ++c) {
1923 ir = new AluInstr(op1_mov,
1924 value_factory.dest(alu.def, 2 * i + c, pin_free),
1925 value_factory.src64(alu.src[0], i, c),
1926 {alu_write});
1927 shader.emit_instruction(ir);
1928 }
1929 }
1930 if (ir)
1931 ir->set_alu_flag(alu_last_instr);
1932 return true;
1933 }
1934
1935 static bool
emit_alu_neg(const nir_alu_instr & alu,Shader & shader)1936 emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
1937 {
1938 auto& value_factory = shader.value_factory();
1939
1940 AluInstr *ir = nullptr;
1941
1942 for (unsigned i = 0; i < alu.def.num_components; ++i) {
1943 for (unsigned c = 0; c < 2; ++c) {
1944 ir = new AluInstr(op1_mov,
1945 value_factory.dest(alu.def, 2 * i + c, pin_chan),
1946 value_factory.src64(alu.src[0], i, c),
1947 {alu_write});
1948 shader.emit_instruction(ir);
1949 }
1950 ir->set_source_mod(0, AluInstr::mod_neg);
1951 }
1952 if (ir)
1953 ir->set_alu_flag(alu_last_instr);
1954
1955 return true;
1956 }
1957
1958 static bool
emit_alu_abs64(const nir_alu_instr & alu,Shader & shader)1959 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
1960 {
1961 auto& value_factory = shader.value_factory();
1962
1963 assert(alu.def.num_components == 1);
1964
1965 shader.emit_instruction(new AluInstr(op1_mov,
1966 value_factory.dest(alu.def, 0, pin_chan),
1967 value_factory.src64(alu.src[0], 0, 0),
1968 AluInstr::write));
1969
1970 auto ir = new AluInstr(op1_mov,
1971 value_factory.dest(alu.def, 1, pin_chan),
1972 value_factory.src64(alu.src[0], 0, 1),
1973 AluInstr::last_write);
1974 ir->set_source_mod(0, AluInstr::mod_abs);
1975 shader.emit_instruction(ir);
1976 return true;
1977 }
1978
1979 static bool
try_propagat_fsat64(const nir_alu_instr & alu,Shader & shader)1980 try_propagat_fsat64(const nir_alu_instr& alu, Shader& shader)
1981 {
1982 auto& value_factory = shader.value_factory();
1983 auto src0 = value_factory.src64(alu.src[0], 0, 0);
1984 auto reg0 = src0->as_register();
1985 if (!reg0)
1986 return false;
1987
1988 if (!reg0->has_flag(Register::ssa))
1989 return false;
1990
1991 if (reg0->parents().size() != 1)
1992 return false;
1993
1994 if (!reg0->uses().empty())
1995 return false;
1996
1997 auto parent = (*reg0->parents().begin())->as_alu();
1998 if (!parent)
1999 return false;
2000
2001 auto opinfo = alu_ops.at(parent->opcode());
2002 if (!opinfo.can_clamp)
2003 return false;
2004
2005 parent->set_alu_flag(alu_dst_clamp);
2006 return true;
2007 }
2008
2009
2010 static bool
emit_alu_fsat64(const nir_alu_instr & alu,Shader & shader)2011 emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader)
2012 {
2013 auto& value_factory = shader.value_factory();
2014
2015 assert(alu.def.num_components == 1);
2016
2017 if (try_propagat_fsat64(alu, shader)) {
2018 auto ir = new AluInstr(op1_mov,
2019 value_factory.dest(alu.def, 0, pin_chan),
2020 value_factory.src64(alu.src[0], 0, 0),
2021 AluInstr::write);
2022 shader.emit_instruction(ir);
2023
2024 shader.emit_instruction(new AluInstr(op1_mov,
2025 value_factory.dest(alu.def, 1, pin_chan),
2026 value_factory.src64(alu.src[0], 0, 1),
2027 AluInstr::last_write));
2028 } else {
2029
2030 /* dest clamp doesn't work on plain 64 bit move, so add a zero
2031 * to apply the modifier */
2032
2033 auto group = new AluGroup();
2034 auto ir = new AluInstr(op2_add_64,
2035 value_factory.dest(alu.def, 0, pin_chan),
2036 value_factory.src64(alu.src[0], 0, 1),
2037 value_factory.literal(0),
2038 AluInstr::write);
2039 ir->set_alu_flag(alu_dst_clamp);
2040 group->add_instruction(ir);
2041
2042 group->add_instruction(new AluInstr(op2_add_64,
2043 value_factory.dest(alu.def, 1, pin_chan),
2044 value_factory.src64(alu.src[0], 0, 0),
2045 value_factory.literal(0),
2046 AluInstr::last_write));
2047 shader.emit_instruction(group);
2048
2049 }
2050 return true;
2051 }
2052
2053
2054 static bool
emit_alu_op2_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_src)2055 emit_alu_op2_64bit(const nir_alu_instr& alu,
2056 EAluOp opcode,
2057 Shader& shader,
2058 bool switch_src)
2059 {
2060 auto& value_factory = shader.value_factory();
2061 auto group = new AluGroup();
2062 AluInstr *ir = nullptr;
2063 int order[2] = {0, 1};
2064 if (switch_src) {
2065 order[0] = 1;
2066 order[1] = 0;
2067 }
2068
2069 int num_emit0 = opcode == op2_mul_64 ? 3 : 1;
2070
2071 std::array<std::array<PRegister, 4>,2> tmp;
2072 for (unsigned k = 0; k < alu.def.num_components; ++k) {
2073 tmp[k][0] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 1), 0);
2074 tmp[k][1] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 1), 1);
2075 tmp[k][2] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 0), 2);
2076 tmp[k][3] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 0), 3);
2077 }
2078
2079 assert(num_emit0 == 1 || alu.def.num_components == 1);
2080
2081 for (unsigned k = 0; k < alu.def.num_components; ++k) {
2082 int i = 0;
2083 for (; i < num_emit0; ++i) {
2084 auto dest = i < 2 ? value_factory.dest(alu.def, i, pin_chan)
2085 : value_factory.dummy_dest(i);
2086
2087 ir = new AluInstr(opcode,
2088 dest,
2089 tmp[k][0],
2090 tmp[k][1],
2091 i < 2 ? AluInstr::write : AluInstr::empty);
2092 group->add_instruction(ir);
2093 }
2094
2095 auto dest =
2096 i == 1 ? value_factory.dest(alu.def, i, pin_chan) : value_factory.dummy_dest(i);
2097
2098 ir = new AluInstr(opcode,
2099 dest,
2100 tmp[k][2],
2101 tmp[k][3],
2102 i == 1 ? AluInstr::write : AluInstr::empty);
2103 group->add_instruction(ir);
2104 }
2105 if (ir)
2106 ir->set_alu_flag(alu_last_instr);
2107
2108 shader.emit_instruction(group);
2109 return true;
2110 }
2111
2112 static bool
emit_alu_op2_64bit_one_dst(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_order)2113 emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
2114 EAluOp opcode,
2115 Shader& shader,
2116 bool switch_order)
2117 {
2118 auto& value_factory = shader.value_factory();
2119 AluInstr *ir = nullptr;
2120 int order[2] = {0, 1};
2121 if (switch_order) {
2122 order[0] = 1;
2123 order[1] = 0;
2124 }
2125
2126 AluInstr::SrcValues src(4);
2127
2128 for (unsigned k = 0; k < alu.def.num_components; ++k) {
2129 auto dest = value_factory.dest(alu.def, 2 * k, pin_chan);
2130 src[0] = value_factory.src64(alu.src[order[0]], k, 1);
2131 src[1] = value_factory.src64(alu.src[order[1]], k, 1);
2132 src[2] = value_factory.src64(alu.src[order[0]], k, 0);
2133 src[3] = value_factory.src64(alu.src[order[1]], k, 0);
2134
2135 ir = new AluInstr(opcode, dest, src, AluInstr::write, 2);
2136 ir->set_alu_flag(alu_64bit_op);
2137
2138 shader.emit_instruction(ir);
2139 }
2140 if (ir)
2141 ir->set_alu_flag(alu_last_instr);
2142
2143 return true;
2144 }
2145
2146 static bool
emit_alu_op1_64bit_trans(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2147 emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2148 {
2149 auto& value_factory = shader.value_factory();
2150 auto group = new AluGroup();
2151 AluInstr *ir = nullptr;
2152 for (unsigned i = 0; i < 3; ++i) {
2153 ir = new AluInstr(opcode,
2154 i < 2 ? value_factory.dest(alu.def, i, pin_chan)
2155 : value_factory.dummy_dest(i),
2156 value_factory.src64(alu.src[0], 0, 1),
2157 value_factory.src64(alu.src[0], 0, 0),
2158 i < 2 ? AluInstr::write : AluInstr::empty);
2159
2160 if (opcode == op1_sqrt_64)
2161 ir->set_source_mod(0, AluInstr::mod_abs);
2162 group->add_instruction(ir);
2163 }
2164 if (ir)
2165 ir->set_alu_flag(alu_last_instr);
2166 shader.emit_instruction(group);
2167 return true;
2168 }
2169
2170 static bool
emit_alu_fma_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2171 emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2172 {
2173 auto& value_factory = shader.value_factory();
2174 auto group = new AluGroup();
2175 AluInstr *ir = nullptr;
2176 for (unsigned i = 0; i < 4; ++i) {
2177
2178 int chan = i < 3 ? 1 : 0;
2179 auto dest =
2180 i < 2 ? value_factory.dest(alu.def, i, pin_chan) : value_factory.dummy_dest(i);
2181
2182 ir = new AluInstr(opcode,
2183 dest,
2184 value_factory.src64(alu.src[0], 0, chan),
2185 value_factory.src64(alu.src[1], 0, chan),
2186 value_factory.src64(alu.src[2], 0, chan),
2187 i < 2 ? AluInstr::write : AluInstr::empty);
2188 group->add_instruction(ir);
2189 }
2190 if (ir)
2191 ir->set_alu_flag(alu_last_instr);
2192 shader.emit_instruction(group);
2193 return true;
2194 }
2195
2196 static bool
emit_alu_b2f64(const nir_alu_instr & alu,Shader & shader)2197 emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader)
2198 {
2199 auto& value_factory = shader.value_factory();
2200
2201 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2202 auto ir = new AluInstr(op2_and_int,
2203 value_factory.dest(alu.def, 2 * i, pin_group),
2204 value_factory.src(alu.src[0], i),
2205 value_factory.zero(),
2206 {alu_write});
2207 shader.emit_instruction(ir);
2208
2209 ir = new AluInstr(op2_and_int,
2210 value_factory.dest(alu.def, 2 * i + 1, pin_group),
2211 value_factory.src(alu.src[0], i),
2212 value_factory.literal(0x3ff00000),
2213 {alu_write});
2214 shader.emit_instruction(ir);
2215 }
2216 return true;
2217 }
2218
2219 static bool
emit_alu_i2f64(const nir_alu_instr & alu,EAluOp op,Shader & shader)2220 emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
2221 {
2222 /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
2223 auto& value_factory = shader.value_factory();
2224 auto group = new AluGroup();
2225 AluInstr *ir = nullptr;
2226
2227 assert(alu.def.num_components == 1);
2228
2229 auto tmpx = value_factory.temp_register();
2230 shader.emit_instruction(new AluInstr(op2_and_int,
2231 tmpx,
2232 value_factory.src(alu.src[0], 0),
2233 value_factory.literal(0xffffff00),
2234 AluInstr::write));
2235 auto tmpy = value_factory.temp_register();
2236 shader.emit_instruction(new AluInstr(op2_and_int,
2237 tmpy,
2238 value_factory.src(alu.src[0], 0),
2239 value_factory.literal(0xff),
2240 AluInstr::last_write));
2241
2242 auto tmpx2 = value_factory.temp_register();
2243 auto tmpy2 = value_factory.temp_register();
2244 shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, AluInstr::last_write));
2245 shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, AluInstr::last_write));
2246
2247 auto tmpx3 = value_factory.temp_register(0);
2248 auto tmpy3 = value_factory.temp_register(1);
2249 auto tmpz3 = value_factory.temp_register(2);
2250 auto tmpw3 = value_factory.temp_register(3);
2251
2252 ir = new AluInstr(op1_flt32_to_flt64, tmpx3, tmpx2, AluInstr::write);
2253 group->add_instruction(ir);
2254 ir = new AluInstr(op1_flt32_to_flt64, tmpy3, value_factory.zero(), AluInstr::write);
2255 group->add_instruction(ir);
2256 ir = new AluInstr(op1_flt32_to_flt64, tmpz3, tmpy2, AluInstr::write);
2257 group->add_instruction(ir);
2258 ir =
2259 new AluInstr(op1_flt32_to_flt64, tmpw3, value_factory.zero(), AluInstr::last_write);
2260 group->add_instruction(ir);
2261 shader.emit_instruction(group);
2262
2263 group = new AluGroup();
2264
2265 ir = new AluInstr(op2_add_64,
2266 value_factory.dest(alu.def, 0, pin_chan),
2267 tmpy3,
2268 tmpw3,
2269 AluInstr::write);
2270 group->add_instruction(ir);
2271 ir = new AluInstr(op2_add_64,
2272 value_factory.dest(alu.def, 1, pin_chan),
2273 tmpx3,
2274 tmpz3,
2275 AluInstr::write);
2276 group->add_instruction(ir);
2277 shader.emit_instruction(group);
2278
2279 return true;
2280 }
2281
2282 static bool
emit_alu_f2f64(const nir_alu_instr & alu,Shader & shader)2283 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
2284 {
2285 auto& value_factory = shader.value_factory();
2286 auto group = new AluGroup();
2287 AluInstr *ir = nullptr;
2288
2289 assert(alu.def.num_components == 1);
2290
2291 ir = new AluInstr(op1_flt32_to_flt64,
2292 value_factory.dest(alu.def, 0, pin_chan),
2293 value_factory.src(alu.src[0], 0),
2294 AluInstr::write);
2295 group->add_instruction(ir);
2296 ir = new AluInstr(op1_flt32_to_flt64,
2297 value_factory.dest(alu.def, 1, pin_chan),
2298 value_factory.zero(),
2299 AluInstr::last_write);
2300 group->add_instruction(ir);
2301 shader.emit_instruction(group);
2302 return true;
2303 }
2304
2305 static bool
emit_alu_f2f32(const nir_alu_instr & alu,Shader & shader)2306 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader)
2307 {
2308 auto& value_factory = shader.value_factory();
2309 auto group = new AluGroup();
2310 AluInstr *ir = nullptr;
2311
2312 ir = new AluInstr(op1v_flt64_to_flt32,
2313 value_factory.dest(alu.def, 0, pin_chan),
2314 value_factory.src64(alu.src[0], 0, 1),
2315 {alu_write});
2316 group->add_instruction(ir);
2317 ir = new AluInstr(op1v_flt64_to_flt32,
2318 value_factory.dummy_dest(1),
2319 value_factory.src64(alu.src[0], 0, 0),
2320 AluInstr::last);
2321 group->add_instruction(ir);
2322 shader.emit_instruction(group);
2323 return true;
2324 }
2325
2326 static bool
emit_alu_b2x(const nir_alu_instr & alu,AluInlineConstants mask,Shader & shader)2327 emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader)
2328 {
2329 auto& value_factory = shader.value_factory();
2330 AluInstr *ir = nullptr;
2331 auto pin = pin_for_components(alu);
2332
2333 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2334 auto src = value_factory.src(alu.src[0], i);
2335 ir = new AluInstr(op2_and_int,
2336 value_factory.dest(alu.def, i, pin),
2337 src,
2338 value_factory.inline_const(mask, 0),
2339 {alu_write});
2340 shader.emit_instruction(ir);
2341 }
2342 if (ir)
2343 ir->set_alu_flag(alu_last_instr);
2344 return true;
2345 }
2346
2347 static bool
emit_alu_op1(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluMods mod)2348 emit_alu_op1(const nir_alu_instr& alu,
2349 EAluOp opcode,
2350 Shader& shader,
2351 AluMods mod)
2352 {
2353 auto& value_factory = shader.value_factory();
2354
2355 AluInstr *ir = nullptr;
2356 auto pin = pin_for_components(alu);
2357
2358 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2359 ir = new AluInstr(opcode,
2360 value_factory.dest(alu.def, i, pin),
2361 value_factory.src(alu.src[0], i),
2362 {alu_write});
2363 switch (mod) {
2364 case mod_src0_abs:
2365 ir->set_source_mod(0, AluInstr::mod_abs); break;
2366 case mod_src0_neg:
2367 ir->set_source_mod(0, AluInstr::mod_neg); break;
2368 case mod_dest_clamp:
2369 ir->set_alu_flag(alu_dst_clamp);
2370 default:;
2371 }
2372 shader.emit_instruction(ir);
2373 }
2374 if (ir)
2375 ir->set_alu_flag(alu_last_instr);
2376 return true;
2377 }
2378
2379 static bool
emit_alu_op2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)2380 emit_alu_op2(const nir_alu_instr& alu,
2381 EAluOp opcode,
2382 Shader& shader,
2383 AluInstr::Op2Options opts)
2384 {
2385 auto& value_factory = shader.value_factory();
2386 const nir_alu_src *src0 = &alu.src[0];
2387 const nir_alu_src *src1 = &alu.src[1];
2388
2389 int idx0 = 0;
2390 int idx1 = 1;
2391 if (opts & AluInstr::op2_opt_reverse) {
2392 std::swap(src0, src1);
2393 std::swap(idx0, idx1);
2394 }
2395
2396 bool src1_negate = (opts & AluInstr::op2_opt_neg_src1);
2397
2398 auto pin = pin_for_components(alu);
2399 AluInstr *ir = nullptr;
2400 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2401 ir = new AluInstr(opcode,
2402 value_factory.dest(alu.def, i, pin),
2403 value_factory.src(*src0, i),
2404 value_factory.src(*src1, i),
2405 {alu_write});
2406 if (src1_negate)
2407 ir->set_source_mod(1, AluInstr::mod_neg);
2408 shader.emit_instruction(ir);
2409 }
2410 if (ir)
2411 ir->set_alu_flag(alu_last_instr);
2412 return true;
2413 }
2414
2415 static bool
emit_alu_op2_int(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)2416 emit_alu_op2_int(const nir_alu_instr& alu,
2417 EAluOp opcode,
2418 Shader& shader,
2419 AluInstr::Op2Options opts)
2420 {
2421 return emit_alu_op2(alu, opcode, shader, opts);
2422 }
2423
2424 static bool
emit_alu_op3(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,const std::array<int,3> & src_shuffle)2425 emit_alu_op3(const nir_alu_instr& alu,
2426 EAluOp opcode,
2427 Shader& shader,
2428 const std::array<int, 3>& src_shuffle)
2429 {
2430 auto& value_factory = shader.value_factory();
2431 const nir_alu_src *src[3];
2432 src[0] = &alu.src[src_shuffle[0]];
2433 src[1] = &alu.src[src_shuffle[1]];
2434 src[2] = &alu.src[src_shuffle[2]];
2435
2436 auto pin = pin_for_components(alu);
2437 AluInstr *ir = nullptr;
2438 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2439 ir = new AluInstr(opcode,
2440 value_factory.dest(alu.def, i, pin),
2441 value_factory.src(*src[0], i),
2442 value_factory.src(*src[1], i),
2443 value_factory.src(*src[2], i),
2444 {alu_write});
2445 ir->set_alu_flag(alu_write);
2446 shader.emit_instruction(ir);
2447 }
2448 if (ir)
2449 ir->set_alu_flag(alu_last_instr);
2450 return true;
2451 }
2452
2453 static bool
emit_any_all_fcomp2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2454 emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2455 {
2456 AluInstr *ir = nullptr;
2457 auto& value_factory = shader.value_factory();
2458
2459 PRegister tmp[2];
2460 tmp[0] = value_factory.temp_register();
2461 tmp[1] = value_factory.temp_register();
2462
2463 for (unsigned i = 0; i < 2; ++i) {
2464 ir = new AluInstr(opcode,
2465 tmp[i],
2466 value_factory.src(alu.src[0], i),
2467 value_factory.src(alu.src[1], i),
2468 {alu_write});
2469 shader.emit_instruction(ir);
2470 }
2471 ir->set_alu_flag(alu_last_instr);
2472
2473 opcode = (opcode == op2_setne_dx10) ? op2_or_int : op2_and_int;
2474 ir = new AluInstr(opcode,
2475 value_factory.dest(alu.def, 0, pin_free),
2476 tmp[0],
2477 tmp[1],
2478 AluInstr::last_write);
2479 shader.emit_instruction(ir);
2480 return true;
2481 }
2482
2483 static bool
emit_any_all_fcomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2484 emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2485 {
2486 /* This should probabyl be lowered in nir */
2487 auto& value_factory = shader.value_factory();
2488
2489 AluInstr *ir = nullptr;
2490 RegisterVec4 v = value_factory.temp_vec4(pin_group);
2491 AluInstr::SrcValues s;
2492
2493 for (int i = 0; i < nc; ++i) {
2494 s.push_back(v[i]);
2495 }
2496
2497 for (int i = nc; i < 4; ++i)
2498 s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0));
2499
2500 for (int i = 0; i < nc; ++i) {
2501 ir = new AluInstr(op,
2502 v[i],
2503 value_factory.src(alu.src[0], i),
2504 value_factory.src(alu.src[1], i),
2505 {alu_write});
2506 shader.emit_instruction(ir);
2507 }
2508 if (ir)
2509 ir->set_alu_flag(alu_last_instr);
2510
2511 auto max_val = value_factory.temp_register();
2512
2513 ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
2514
2515 if (all) {
2516 ir->set_source_mod(0, AluInstr::mod_neg);
2517 ir->set_source_mod(1, AluInstr::mod_neg);
2518 ir->set_source_mod(2, AluInstr::mod_neg);
2519 ir->set_source_mod(3, AluInstr::mod_neg);
2520 }
2521
2522 shader.emit_instruction(ir);
2523
2524 if (all)
2525 op = (op == op2_sete) ? op2_sete_dx10 : op2_setne_dx10;
2526 else
2527 op = (op == op2_sete) ? op2_setne_dx10 : op2_sete_dx10;
2528
2529 ir = new AluInstr(op,
2530 value_factory.dest(alu.def, 0, pin_free),
2531 max_val,
2532 value_factory.inline_const(ALU_SRC_1, 0),
2533 AluInstr::last_write);
2534 if (all)
2535 ir->set_source_mod(1, AluInstr::mod_neg);
2536 shader.emit_instruction(ir);
2537
2538 return true;
2539 }
2540
2541 static bool
emit_any_all_icomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2542 emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2543 {
2544 /* This should probabyl be lowered in nir */
2545 auto& value_factory = shader.value_factory();
2546
2547 AluInstr *ir = nullptr;
2548 PRegister v[6];
2549
2550 auto dest = value_factory.dest(alu.def, 0, pin_free);
2551
2552 for (int i = 0; i < nc + nc / 2; ++i)
2553 v[i] = value_factory.temp_register();
2554
2555 EAluOp combine = all ? op2_and_int : op2_or_int;
2556
2557 for (int i = 0; i < nc; ++i) {
2558 ir = new AluInstr(op,
2559 v[i],
2560 value_factory.src(alu.src[0], i),
2561 value_factory.src(alu.src[1], i),
2562 AluInstr::write);
2563 shader.emit_instruction(ir);
2564 }
2565 if (ir)
2566 ir->set_alu_flag(alu_last_instr);
2567
2568 if (nc == 2) {
2569 ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write);
2570 shader.emit_instruction(ir);
2571 return true;
2572 }
2573
2574 if (nc == 3) {
2575 ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write);
2576 shader.emit_instruction(ir);
2577 ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write);
2578 shader.emit_instruction(ir);
2579 return true;
2580 }
2581
2582 if (nc == 4) {
2583 ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write);
2584 shader.emit_instruction(ir);
2585 ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write);
2586 shader.emit_instruction(ir);
2587 ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write);
2588 shader.emit_instruction(ir);
2589 return true;
2590 }
2591
2592 return false;
2593 }
2594
2595 static bool
emit_dot(const nir_alu_instr & alu,int n,Shader & shader)2596 emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
2597 {
2598 auto& value_factory = shader.value_factory();
2599 const nir_alu_src& src0 = alu.src[0];
2600 const nir_alu_src& src1 = alu.src[1];
2601
2602 auto dest = value_factory.dest(alu.def, 0, pin_chan);
2603
2604 AluInstr::SrcValues srcs(2 * n);
2605
2606 for (int i = 0; i < n; ++i) {
2607 srcs[2 * i] = value_factory.src(src0, i);
2608 srcs[2 * i + 1] = value_factory.src(src1, i);
2609 }
2610
2611 AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n);
2612
2613 shader.emit_instruction(ir);
2614 shader.set_flag(Shader::sh_disble_sb);
2615
2616 return true;
2617 }
2618
2619 static bool
emit_dot4(const nir_alu_instr & alu,int nelm,Shader & shader)2620 emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader)
2621 {
2622 auto& value_factory = shader.value_factory();
2623 const nir_alu_src& src0 = alu.src[0];
2624 const nir_alu_src& src1 = alu.src[1];
2625
2626 auto dest = value_factory.dest(alu.def, 0, pin_free);
2627
2628 AluInstr::SrcValues srcs(8);
2629
2630 for (int i = 0; i < nelm; ++i) {
2631 srcs[2 * i] = value_factory.src(src0, i);
2632 srcs[2 * i + 1] = value_factory.src(src1, i);
2633 }
2634
2635 for (int i = nelm; i < 4; ++i) {
2636 srcs[2 * i] = value_factory.zero();
2637 srcs[2 * i + 1] = value_factory.zero();
2638 }
2639
2640 AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2641
2642 shader.emit_instruction(ir);
2643 return true;
2644 }
2645
2646 static bool
emit_fdph(const nir_alu_instr & alu,Shader & shader)2647 emit_fdph(const nir_alu_instr& alu, Shader& shader)
2648 {
2649 auto& value_factory = shader.value_factory();
2650 const nir_alu_src& src0 = alu.src[0];
2651 const nir_alu_src& src1 = alu.src[1];
2652
2653 auto dest = value_factory.dest(alu.def, 0, pin_free);
2654
2655 AluInstr::SrcValues srcs(8);
2656
2657 for (int i = 0; i < 3; ++i) {
2658 srcs[2 * i] = value_factory.src(src0, i);
2659 srcs[2 * i + 1] = value_factory.src(src1, i);
2660 }
2661
2662 srcs[6] = value_factory.one();
2663 srcs[7] = value_factory.src(src1, 3);
2664
2665 AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2666 shader.emit_instruction(ir);
2667 return true;
2668 }
2669
2670 static bool
emit_create_vec(const nir_alu_instr & instr,unsigned nc,Shader & shader)2671 emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
2672 {
2673 auto& value_factory = shader.value_factory();
2674 AluInstr *ir = nullptr;
2675
2676 for (unsigned i = 0; i < nc; ++i) {
2677 auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
2678 auto dst = value_factory.dest(instr.def, i, pin_none);
2679 shader.emit_instruction(new AluInstr(op1_mov, dst, src, {alu_write}));
2680 }
2681
2682 if (ir)
2683 ir->set_alu_flag(alu_last_instr);
2684 return true;
2685 }
2686
2687 static bool
emit_alu_comb_with_zero(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2688 emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2689 {
2690 auto& value_factory = shader.value_factory();
2691 AluInstr *ir = nullptr;
2692 auto pin = pin_for_components(alu);
2693 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2694 ir = new AluInstr(opcode,
2695 value_factory.dest(alu.def, i, pin),
2696 value_factory.zero(),
2697 value_factory.src(alu.src[0], i),
2698 AluInstr::write);
2699 shader.emit_instruction(ir);
2700 }
2701 if (ir)
2702 ir->set_alu_flag(alu_last_instr);
2703
2704 return true;
2705 }
2706
2707 static bool
emit_pack_64_2x32_split(const nir_alu_instr & alu,Shader & shader)2708 emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader)
2709 {
2710 auto& value_factory = shader.value_factory();
2711 AluInstr *ir = nullptr;
2712 for (unsigned i = 0; i < 2; ++i) {
2713 ir = new AluInstr(op1_mov,
2714 value_factory.dest(alu.def, i, pin_none),
2715 value_factory.src(alu.src[i], 0),
2716 AluInstr::write);
2717 shader.emit_instruction(ir);
2718 }
2719 ir->set_alu_flag(alu_last_instr);
2720 return true;
2721 }
2722
2723 static bool
emit_pack_64_2x32(const nir_alu_instr & alu,Shader & shader)2724 emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2725 {
2726 auto& value_factory = shader.value_factory();
2727 AluInstr *ir = nullptr;
2728 for (unsigned i = 0; i < 2; ++i) {
2729 ir = new AluInstr(op1_mov,
2730 value_factory.dest(alu.def, i, pin_none),
2731 value_factory.src(alu.src[0], i),
2732 AluInstr::write);
2733 shader.emit_instruction(ir);
2734 }
2735 ir->set_alu_flag(alu_last_instr);
2736 return true;
2737 }
2738
2739 static bool
emit_unpack_64_2x32(const nir_alu_instr & alu,Shader & shader)2740 emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2741 {
2742 auto& value_factory = shader.value_factory();
2743 AluInstr *ir = nullptr;
2744 for (unsigned i = 0; i < 2; ++i) {
2745 ir = new AluInstr(op1_mov,
2746 value_factory.dest(alu.def, i, pin_none),
2747 value_factory.src64(alu.src[0], 0, i),
2748 AluInstr::write);
2749 shader.emit_instruction(ir);
2750 }
2751 ir->set_alu_flag(alu_last_instr);
2752 return true;
2753 }
2754
2755 bool
emit_alu_vec2_64(const nir_alu_instr & alu,Shader & shader)2756 emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader)
2757 {
2758 auto& value_factory = shader.value_factory();
2759 AluInstr *ir = nullptr;
2760 for (unsigned i = 0; i < 2; ++i) {
2761 ir = new AluInstr(op1_mov,
2762 value_factory.dest(alu.def, i, pin_chan),
2763 value_factory.src64(alu.src[0], 0, i),
2764 AluInstr::write);
2765 shader.emit_instruction(ir);
2766 }
2767 for (unsigned i = 0; i < 2; ++i) {
2768 ir = new AluInstr(op1_mov,
2769 value_factory.dest(alu.def, i + 2, pin_chan),
2770 value_factory.src64(alu.src[1], 1, i),
2771 AluInstr::write);
2772 shader.emit_instruction(ir);
2773 }
2774 ir->set_alu_flag(alu_last_instr);
2775 return true;
2776 }
2777
2778 static bool
emit_pack_32_2x16_split(const nir_alu_instr & alu,Shader & shader)2779 emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader)
2780 {
2781 auto& value_factory = shader.value_factory();
2782
2783 auto x = value_factory.temp_register();
2784 auto y = value_factory.temp_register();
2785 auto yy = value_factory.temp_register();
2786
2787 shader.emit_instruction(new AluInstr(
2788 op1_flt32_to_flt16, x, value_factory.src(alu.src[0], 0), AluInstr::last_write));
2789
2790 shader.emit_instruction(new AluInstr(
2791 op1_flt32_to_flt16, y, value_factory.src(alu.src[1], 0), AluInstr::last_write));
2792
2793 shader.emit_instruction(
2794 new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write));
2795
2796 shader.emit_instruction(new AluInstr(op2_or_int,
2797 value_factory.dest(alu.def, 0, pin_free),
2798 x,
2799 yy,
2800 AluInstr::last_write));
2801 return true;
2802 }
2803
2804 static bool
emit_unpack_64_2x32_split(const nir_alu_instr & alu,int comp,Shader & shader)2805 emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader)
2806 {
2807 auto& value_factory = shader.value_factory();
2808 shader.emit_instruction(new AluInstr(op1_mov,
2809 value_factory.dest(alu.def, 0, pin_free),
2810 value_factory.src64(alu.src[0], 0, comp),
2811 AluInstr::last_write));
2812 return true;
2813 }
2814
2815 static bool
emit_unpack_32_2x16_split_x(const nir_alu_instr & alu,Shader & shader)2816 emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader)
2817 {
2818 auto& value_factory = shader.value_factory();
2819 shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2820 value_factory.dest(alu.def, 0, pin_free),
2821 value_factory.src(alu.src[0], 0),
2822 AluInstr::last_write));
2823 return true;
2824 }
2825 static bool
emit_unpack_32_2x16_split_y(const nir_alu_instr & alu,Shader & shader)2826 emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader)
2827 {
2828 auto& value_factory = shader.value_factory();
2829 auto tmp = value_factory.temp_register();
2830 shader.emit_instruction(new AluInstr(op2_lshr_int,
2831 tmp,
2832 value_factory.src(alu.src[0], 0),
2833 value_factory.literal(16),
2834 AluInstr::last_write));
2835
2836 shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2837 value_factory.dest(alu.def, 0, pin_free),
2838 tmp,
2839 AluInstr::last_write));
2840 return true;
2841 }
2842
2843 static bool
emit_alu_trans_op1_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2844 emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2845 {
2846 auto& value_factory = shader.value_factory();
2847 const nir_alu_src& src0 = alu.src[0];
2848
2849 AluInstr *ir = nullptr;
2850 auto pin = pin_for_components(alu);
2851
2852 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2853 ir = new AluInstr(opcode,
2854 value_factory.dest(alu.def, i, pin),
2855 value_factory.src(src0, i),
2856 AluInstr::last_write);
2857 ir->set_alu_flag(alu_is_trans);
2858 shader.emit_instruction(ir);
2859 }
2860
2861 return true;
2862 }
2863
2864 static bool
emit_alu_f2i32_or_u32_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2865 emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2866 {
2867 auto& value_factory = shader.value_factory();
2868 AluInstr *ir = nullptr;
2869
2870 PRegister reg[4];
2871
2872 int num_comp = alu.def.num_components;
2873
2874 for (int i = 0; i < num_comp; ++i) {
2875 reg[i] = value_factory.temp_register();
2876 ir = new AluInstr(op1_trunc,
2877 reg[i],
2878 value_factory.src(alu.src[0], i),
2879 AluInstr::last_write);
2880 shader.emit_instruction(ir);
2881 }
2882
2883 auto pin = pin_for_components(alu);
2884 for (int i = 0; i < num_comp; ++i) {
2885 ir = new AluInstr(opcode,
2886 value_factory.dest(alu.def, i, pin),
2887 reg[i],
2888 AluInstr::write);
2889 if (opcode == op1_flt_to_uint) {
2890 ir->set_alu_flag(alu_is_trans);
2891 ir->set_alu_flag(alu_last_instr);
2892 }
2893 shader.emit_instruction(ir);
2894 }
2895 ir->set_alu_flag(alu_last_instr);
2896 return true;
2897 }
2898
2899 static bool
emit_alu_trans_op1_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2900 emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2901 {
2902 auto& value_factory = shader.value_factory();
2903 const nir_alu_src& src0 = alu.src[0];
2904
2905 auto pin = pin_for_components(alu);
2906
2907 const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
2908
2909 for (unsigned j = 0; j < alu.def.num_components; ++j) {
2910 unsigned ncomp = j == 3 ? 4 : 3;
2911
2912 AluInstr::SrcValues srcs(ncomp);
2913 PRegister dest = value_factory.dest(alu.def, j, pin, (1 << ncomp) - 1);
2914
2915 for (unsigned i = 0; i < ncomp; ++i)
2916 srcs[i] = value_factory.src(src0, j);
2917
2918 auto ir = new AluInstr(opcode, dest, srcs, flags, ncomp);
2919 shader.emit_instruction(ir);
2920 }
2921 return true;
2922 }
2923
2924 static bool
emit_alu_trans_op2_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2925 emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2926 {
2927 auto& value_factory = shader.value_factory();
2928
2929 const nir_alu_src& src0 = alu.src[0];
2930 const nir_alu_src& src1 = alu.src[1];
2931
2932 AluInstr *ir = nullptr;
2933
2934 auto pin = pin_for_components(alu);
2935 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2936 ir = new AluInstr(opcode,
2937 value_factory.dest(alu.def, i, pin),
2938 value_factory.src(src0, i),
2939 value_factory.src(src1, i),
2940 AluInstr::last_write);
2941 ir->set_alu_flag(alu_is_trans);
2942 shader.emit_instruction(ir);
2943 }
2944 return true;
2945 }
2946
2947 static bool
emit_alu_trans_op2_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2948 emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2949 {
2950 auto& value_factory = shader.value_factory();
2951
2952 const nir_alu_src& src0 = alu.src[0];
2953 const nir_alu_src& src1 = alu.src[1];
2954
2955 unsigned last_slot = 4;
2956
2957 const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
2958
2959 for (unsigned k = 0; k < alu.def.num_components; ++k) {
2960 AluInstr::SrcValues srcs(2 * last_slot);
2961 PRegister dest = value_factory.dest(alu.def, k, pin_free);
2962
2963 for (unsigned i = 0; i < last_slot; ++i) {
2964 srcs[2 * i] = value_factory.src(src0, k);
2965 srcs[2 * i + 1] = value_factory.src(src1, k);
2966 }
2967
2968 auto ir = new AluInstr(opcode, dest, srcs, flags, last_slot);
2969 ir->set_alu_flag(alu_is_cayman_trans);
2970 shader.emit_instruction(ir);
2971 }
2972 return true;
2973 }
2974
2975 static bool
emit_alu_cube(const nir_alu_instr & alu,Shader & shader)2976 emit_alu_cube(const nir_alu_instr& alu, Shader& shader)
2977 {
2978 auto& value_factory = shader.value_factory();
2979 AluInstr *ir = nullptr;
2980
2981 const uint16_t src0_chan[4] = {2, 2, 0, 1};
2982 const uint16_t src1_chan[4] = {1, 0, 2, 2};
2983
2984 auto group = new AluGroup();
2985
2986 for (int i = 0; i < 4; ++i) {
2987
2988 ir = new AluInstr(op2_cube,
2989 value_factory.dest(alu.def, i, pin_chan),
2990 value_factory.src(alu.src[0], src0_chan[i]),
2991 value_factory.src(alu.src[0], src1_chan[i]),
2992 AluInstr::write);
2993 group->add_instruction(ir);
2994 }
2995 ir->set_alu_flag(alu_last_instr);
2996 shader.emit_instruction(group);
2997 return true;
2998 }
2999
3000 const std::set<AluModifiers> AluInstr::empty;
3001 const std::set<AluModifiers> AluInstr::write({alu_write});
3002 const std::set<AluModifiers> AluInstr::last({alu_last_instr});
3003 const std::set<AluModifiers> AluInstr::last_write({alu_write, alu_last_instr});
3004
3005 } // namespace r600
3006